diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff7857f202bae11cb55fed6459f298ef8acc3b23 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/callbacks.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/callbacks.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0910b2ca692fb3c94d34be111a4f21ffb9bebfc0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/callbacks.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/configuration_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/configuration_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..89ecf618cb1c9d13dca74691f56c8c5da83f36b2 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/configuration_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/dependency_versions_check.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/dependency_versions_check.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1d14565a7cfc1acf8ee54301c1dbe7a72459bd0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/dependency_versions_check.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/dependency_versions_table.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/dependency_versions_table.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52e98e9b70c9bc98e4a4ad7e2286a49040cb189e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/dependency_versions_table.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/image_processor.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/image_processor.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a521eeada934a699978908cbcbcbdc198654cc4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/image_processor.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/optimization.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/optimization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8ed820c0ae70f5a292cb62562c60ad1ef048516f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/optimization.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/training_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/training_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..961d06db5b37124d92ce8d1ac84d9d38f9659abc Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/training_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/video_processor.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/video_processor.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ae20671fc04da8c68c747d508ba52f4e4d5b4cc Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/__pycache__/video_processor.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3a8c10147e8bdc6be186193fa7775460b474a9ba --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/__init__.py @@ -0,0 +1,27 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod +from argparse import ArgumentParser + + +class BaseDiffusersCLICommand(ABC): + @staticmethod + @abstractmethod + def register_subcommand(parser: ArgumentParser): + raise NotImplementedError() + + @abstractmethod + def run(self): + raise NotImplementedError() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/custom_blocks.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/custom_blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..953240c5a2c3c9df259316b47c285ac98276cb49 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/custom_blocks.py @@ -0,0 +1,132 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Usage example: + TODO +""" + +import ast +import importlib.util +import os +from argparse import ArgumentParser, Namespace +from pathlib import Path + +from ..utils import logging +from . import BaseDiffusersCLICommand + + +EXPECTED_PARENT_CLASSES = ["ModularPipelineBlocks"] +CONFIG = "config.json" + + +def conversion_command_factory(args: Namespace): + return CustomBlocksCommand(args.block_module_name, args.block_class_name) + + +class CustomBlocksCommand(BaseDiffusersCLICommand): + @staticmethod + def register_subcommand(parser: ArgumentParser): + conversion_parser = parser.add_parser("custom_blocks") + conversion_parser.add_argument( + "--block_module_name", + type=str, + default="block.py", + help="Module filename in which the custom block will be implemented.", + ) + conversion_parser.add_argument( + "--block_class_name", + type=str, + default=None, + help="Name of the custom block. If provided None, we will try to infer it.", + ) + conversion_parser.set_defaults(func=conversion_command_factory) + + def __init__(self, block_module_name: str = "block.py", block_class_name: str = None): + self.logger = logging.get_logger("diffusers-cli/custom_blocks") + self.block_module_name = Path(block_module_name) + self.block_class_name = block_class_name + + def run(self): + # determine the block to be saved. + out = self._get_class_names(self.block_module_name) + classes_found = list({cls for cls, _ in out}) + + if self.block_class_name is not None: + child_class, parent_class = self._choose_block(out, self.block_class_name) + if child_class is None and parent_class is None: + raise ValueError( + "`block_class_name` could not be retrieved. Available classes from " + f"{self.block_module_name}:\n{classes_found}" + ) + else: + self.logger.info( + f"Found classes: {classes_found} will be using {classes_found[0]}. " + "If this needs to be changed, re-run the command specifying `block_class_name`." + ) + child_class, parent_class = out[0][0], out[0][1] + + # dynamically get the custom block and initialize it to call `save_pretrained` in the current directory. + # the user is responsible for running it, so I guess that is safe? + module_name = f"__dynamic__{self.block_module_name.stem}" + spec = importlib.util.spec_from_file_location(module_name, str(self.block_module_name)) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + getattr(module, child_class)().save_pretrained(os.getcwd()) + + # or, we could create it manually. + # automap = self._create_automap(parent_class=parent_class, child_class=child_class) + # with open(CONFIG, "w") as f: + # json.dump(automap, f) + + def _choose_block(self, candidates, chosen=None): + for cls, base in candidates: + if cls == chosen: + return cls, base + return None, None + + def _get_class_names(self, file_path): + source = file_path.read_text(encoding="utf-8") + try: + tree = ast.parse(source, filename=file_path) + except SyntaxError as e: + raise ValueError(f"Could not parse {file_path!r}: {e}") from e + + results: list[tuple[str, str]] = [] + for node in tree.body: + if not isinstance(node, ast.ClassDef): + continue + + # extract all base names for this class + base_names = [bname for b in node.bases if (bname := self._get_base_name(b)) is not None] + + # for each allowed base that appears in the class's bases, emit a tuple + for allowed in EXPECTED_PARENT_CLASSES: + if allowed in base_names: + results.append((node.name, allowed)) + + return results + + def _get_base_name(self, node: ast.expr): + if isinstance(node, ast.Name): + return node.id + elif isinstance(node, ast.Attribute): + val = self._get_base_name(node.value) + return f"{val}.{node.attr}" if val else node.attr + return None + + def _create_automap(self, parent_class, child_class): + module = str(self.block_module_name).replace(".py", "").rsplit(".", 1)[-1] + auto_map = {f"{parent_class}": f"{module}.{child_class}"} + return {"auto_map": auto_map} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/diffusers_cli.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/diffusers_cli.py new file mode 100644 index 0000000000000000000000000000000000000000..a27ac24f2a3eab862e2e78c2e3eea872b8036eb0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/diffusers_cli.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from argparse import ArgumentParser + +from .custom_blocks import CustomBlocksCommand +from .env import EnvironmentCommand +from .fp16_safetensors import FP16SafetensorsCommand + + +def main(): + parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli []") + commands_parser = parser.add_subparsers(help="diffusers-cli command helpers") + + # Register commands + EnvironmentCommand.register_subcommand(commands_parser) + FP16SafetensorsCommand.register_subcommand(commands_parser) + CustomBlocksCommand.register_subcommand(commands_parser) + + # Let's go + args = parser.parse_args() + + if not hasattr(args, "func"): + parser.print_help() + exit(1) + + # Run + service = args.func(args) + service.run() + + +if __name__ == "__main__": + main() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/env.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/env.py new file mode 100644 index 0000000000000000000000000000000000000000..58f31d478bf35420b3bd324b0aaf57bef934b380 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/env.py @@ -0,0 +1,180 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import platform +import subprocess +from argparse import ArgumentParser + +import huggingface_hub + +from .. import __version__ as version +from ..utils import ( + is_accelerate_available, + is_bitsandbytes_available, + is_flax_available, + is_google_colab, + is_peft_available, + is_safetensors_available, + is_torch_available, + is_transformers_available, + is_xformers_available, +) +from . import BaseDiffusersCLICommand + + +def info_command_factory(_): + return EnvironmentCommand() + + +class EnvironmentCommand(BaseDiffusersCLICommand): + @staticmethod + def register_subcommand(parser: ArgumentParser) -> None: + download_parser = parser.add_parser("env") + download_parser.set_defaults(func=info_command_factory) + + def run(self) -> dict: + hub_version = huggingface_hub.__version__ + + safetensors_version = "not installed" + if is_safetensors_available(): + import safetensors + + safetensors_version = safetensors.__version__ + + pt_version = "not installed" + pt_cuda_available = "NA" + if is_torch_available(): + import torch + + pt_version = torch.__version__ + pt_cuda_available = torch.cuda.is_available() + + flax_version = "not installed" + jax_version = "not installed" + jaxlib_version = "not installed" + jax_backend = "NA" + if is_flax_available(): + import flax + import jax + import jaxlib + + flax_version = flax.__version__ + jax_version = jax.__version__ + jaxlib_version = jaxlib.__version__ + jax_backend = jax.lib.xla_bridge.get_backend().platform + + transformers_version = "not installed" + if is_transformers_available(): + import transformers + + transformers_version = transformers.__version__ + + accelerate_version = "not installed" + if is_accelerate_available(): + import accelerate + + accelerate_version = accelerate.__version__ + + peft_version = "not installed" + if is_peft_available(): + import peft + + peft_version = peft.__version__ + + bitsandbytes_version = "not installed" + if is_bitsandbytes_available(): + import bitsandbytes + + bitsandbytes_version = bitsandbytes.__version__ + + xformers_version = "not installed" + if is_xformers_available(): + import xformers + + xformers_version = xformers.__version__ + + platform_info = platform.platform() + + is_google_colab_str = "Yes" if is_google_colab() else "No" + + accelerator = "NA" + if platform.system() in {"Linux", "Windows"}: + try: + sp = subprocess.Popen( + ["nvidia-smi", "--query-gpu=gpu_name,memory.total", "--format=csv,noheader"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + out_str, _ = sp.communicate() + out_str = out_str.decode("utf-8") + + if len(out_str) > 0: + accelerator = out_str.strip() + except FileNotFoundError: + pass + elif platform.system() == "Darwin": # Mac OS + try: + sp = subprocess.Popen( + ["system_profiler", "SPDisplaysDataType"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + out_str, _ = sp.communicate() + out_str = out_str.decode("utf-8") + + start = out_str.find("Chipset Model:") + if start != -1: + start += len("Chipset Model:") + end = out_str.find("\n", start) + accelerator = out_str[start:end].strip() + + start = out_str.find("VRAM (Total):") + if start != -1: + start += len("VRAM (Total):") + end = out_str.find("\n", start) + accelerator += " VRAM: " + out_str[start:end].strip() + except FileNotFoundError: + pass + else: + print("It seems you are running an unusual OS. Could you fill in the accelerator manually?") + + info = { + "🤗 Diffusers version": version, + "Platform": platform_info, + "Running on Google Colab?": is_google_colab_str, + "Python version": platform.python_version(), + "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})", + "Flax version (CPU?/GPU?/TPU?)": f"{flax_version} ({jax_backend})", + "Jax version": jax_version, + "JaxLib version": jaxlib_version, + "Huggingface_hub version": hub_version, + "Transformers version": transformers_version, + "Accelerate version": accelerate_version, + "PEFT version": peft_version, + "Bitsandbytes version": bitsandbytes_version, + "Safetensors version": safetensors_version, + "xFormers version": xformers_version, + "Accelerator": accelerator, + "Using GPU in script?": "", + "Using distributed or parallel set-up in script?": "", + } + + print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n") + print(self.format_dict(info)) + + return info + + @staticmethod + def format_dict(d: dict) -> str: + return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/fp16_safetensors.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/fp16_safetensors.py new file mode 100644 index 0000000000000000000000000000000000000000..382d6c39bd19e3cede9ae6d40abff3d17eb7820e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/commands/fp16_safetensors.py @@ -0,0 +1,132 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Usage example: + diffusers-cli fp16_safetensors --ckpt_id=openai/shap-e --fp16 --use_safetensors +""" + +import glob +import json +import warnings +from argparse import ArgumentParser, Namespace +from importlib import import_module + +import huggingface_hub +import torch +from huggingface_hub import hf_hub_download +from packaging import version + +from ..utils import logging +from . import BaseDiffusersCLICommand + + +def conversion_command_factory(args: Namespace): + if args.use_auth_token: + warnings.warn( + "The `--use_auth_token` flag is deprecated and will be removed in a future version." + "Authentication is now handled automatically if the user is logged in." + ) + return FP16SafetensorsCommand(args.ckpt_id, args.fp16, args.use_safetensors) + + +class FP16SafetensorsCommand(BaseDiffusersCLICommand): + @staticmethod + def register_subcommand(parser: ArgumentParser): + conversion_parser = parser.add_parser("fp16_safetensors") + conversion_parser.add_argument( + "--ckpt_id", + type=str, + help="Repo id of the checkpoints on which to run the conversion. Example: 'openai/shap-e'.", + ) + conversion_parser.add_argument( + "--fp16", action="store_true", help="If serializing the variables in FP16 precision." + ) + conversion_parser.add_argument( + "--use_safetensors", action="store_true", help="If serializing in the safetensors format." + ) + conversion_parser.add_argument( + "--use_auth_token", + action="store_true", + help="When working with checkpoints having private visibility. When used `hf auth login` needs to be run beforehand.", + ) + conversion_parser.set_defaults(func=conversion_command_factory) + + def __init__(self, ckpt_id: str, fp16: bool, use_safetensors: bool): + self.logger = logging.get_logger("diffusers-cli/fp16_safetensors") + self.ckpt_id = ckpt_id + self.local_ckpt_dir = f"/tmp/{ckpt_id}" + self.fp16 = fp16 + + self.use_safetensors = use_safetensors + + if not self.use_safetensors and not self.fp16: + raise NotImplementedError( + "When `use_safetensors` and `fp16` both are False, then this command is of no use." + ) + + def run(self): + if version.parse(huggingface_hub.__version__) < version.parse("0.9.0"): + raise ImportError( + "The huggingface_hub version must be >= 0.9.0 to use this command. Please update your huggingface_hub" + " installation." + ) + else: + from huggingface_hub import create_commit + from huggingface_hub._commit_api import CommitOperationAdd + + model_index = hf_hub_download(repo_id=self.ckpt_id, filename="model_index.json") + with open(model_index, "r") as f: + pipeline_class_name = json.load(f)["_class_name"] + pipeline_class = getattr(import_module("diffusers"), pipeline_class_name) + self.logger.info(f"Pipeline class imported: {pipeline_class_name}.") + + # Load the appropriate pipeline. We could have used `DiffusionPipeline` + # here, but just to avoid potential edge cases. + pipeline = pipeline_class.from_pretrained( + self.ckpt_id, torch_dtype=torch.float16 if self.fp16 else torch.float32 + ) + pipeline.save_pretrained( + self.local_ckpt_dir, + safe_serialization=True if self.use_safetensors else False, + variant="fp16" if self.fp16 else None, + ) + self.logger.info(f"Pipeline locally saved to {self.local_ckpt_dir}.") + + # Fetch all the paths. + if self.fp16: + modified_paths = glob.glob(f"{self.local_ckpt_dir}/*/*.fp16.*") + elif self.use_safetensors: + modified_paths = glob.glob(f"{self.local_ckpt_dir}/*/*.safetensors") + + # Prepare for the PR. + commit_message = f"Serialize variables with FP16: {self.fp16} and safetensors: {self.use_safetensors}." + operations = [] + for path in modified_paths: + operations.append(CommitOperationAdd(path_in_repo="/".join(path.split("/")[4:]), path_or_fileobj=path)) + + # Open the PR. + commit_description = ( + "Variables converted by the [`diffusers`' `fp16_safetensors`" + " CLI](https://github.com/huggingface/diffusers/blob/main/src/diffusers/commands/fp16_safetensors.py)." + ) + hub_pr_url = create_commit( + repo_id=self.ckpt_id, + operations=operations, + commit_message=commit_message, + commit_description=commit_description, + repo_type="model", + create_pr=True, + ).pr_url + self.logger.info(f"PR created here: {hub_pr_url}.") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/experimental/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/experimental/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ebc8155403016dfd8ad7fb78d246f9da9098ac50 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/experimental/__init__.py @@ -0,0 +1 @@ +from .rl import ValueGuidedRLPipeline diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b6653817dc959a404fd0a3f98e64851717c95ee6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/__init__.py @@ -0,0 +1,31 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from ..utils import is_torch_available, logging + + +if is_torch_available(): + from .adaptive_projected_guidance import AdaptiveProjectedGuidance + from .adaptive_projected_guidance_mix import AdaptiveProjectedMixGuidance + from .auto_guidance import AutoGuidance + from .classifier_free_guidance import ClassifierFreeGuidance + from .classifier_free_zero_star_guidance import ClassifierFreeZeroStarGuidance + from .frequency_decoupled_guidance import FrequencyDecoupledGuidance + from .guider_utils import BaseGuidance + from .magnitude_aware_guidance import MagnitudeAwareGuidance + from .perturbed_attention_guidance import PerturbedAttentionGuidance + from .skip_layer_guidance import SkipLayerGuidance + from .smoothed_energy_guidance import SmoothedEnergyGuidance + from .tangential_classifier_free_guidance import TangentialClassifierFreeGuidance diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/adaptive_projected_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/adaptive_projected_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..b210cb3e67aaad907680a7a60786a4973be365c4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/adaptive_projected_guidance.py @@ -0,0 +1,237 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import torch + +from ..configuration_utils import register_to_config +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class AdaptiveProjectedGuidance(BaseGuidance): + """ + Adaptive Projected Guidance (APG): https://huggingface.co/papers/2410.02416 + + Args: + guidance_scale (`float`, defaults to `7.5`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + adaptive_projected_guidance_momentum (`float`, defaults to `None`): + The momentum parameter for the adaptive projected guidance. Disabled if set to `None`. + adaptive_projected_guidance_rescale (`float`, defaults to `15.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.0`): + The fraction of the total number of denoising steps after which guidance starts. + stop (`float`, defaults to `1.0`): + The fraction of the total number of denoising steps after which guidance stops. + """ + + _input_predictions = ["pred_cond", "pred_uncond"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 7.5, + adaptive_projected_guidance_momentum: float | None = None, + adaptive_projected_guidance_rescale: float = 15.0, + eta: float = 1.0, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.adaptive_projected_guidance_momentum = adaptive_projected_guidance_momentum + self.adaptive_projected_guidance_rescale = adaptive_projected_guidance_rescale + self.eta = eta + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + self.momentum_buffer = None + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + if self._step == 0: + if self.adaptive_projected_guidance_momentum is not None: + self.momentum_buffer = MomentumBuffer(self.adaptive_projected_guidance_momentum) + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + if self._step == 0: + if self.adaptive_projected_guidance_momentum is not None: + self.momentum_buffer = MomentumBuffer(self.adaptive_projected_guidance_momentum) + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward(self, pred_cond: torch.Tensor, pred_uncond: torch.Tensor | None = None) -> GuiderOutput: + pred = None + + if not self._is_apg_enabled(): + pred = pred_cond + else: + pred = normalized_guidance( + pred_cond, + pred_uncond, + self.guidance_scale, + self.momentum_buffer, + self.eta, + self.adaptive_projected_guidance_rescale, + self.use_original_formulation, + ) + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_apg_enabled(): + num_conditions += 1 + return num_conditions + + def _is_apg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close + + +class MomentumBuffer: + def __init__(self, momentum: float): + self.momentum = momentum + self.running_average = 0 + + def update(self, update_value: torch.Tensor): + new_average = self.momentum * self.running_average + self.running_average = update_value + new_average + + def __repr__(self) -> str: + """ + Returns a string representation showing momentum, shape, statistics, and a slice of the running_average. + """ + if isinstance(self.running_average, torch.Tensor): + shape = tuple(self.running_average.shape) + + # Calculate statistics + with torch.no_grad(): + stats = { + "mean": self.running_average.mean().item(), + "std": self.running_average.std().item(), + "min": self.running_average.min().item(), + "max": self.running_average.max().item(), + } + + # Get a slice (max 3 elements per dimension) + slice_indices = tuple(slice(None, min(3, dim)) for dim in shape) + sliced_data = self.running_average[slice_indices] + + # Format the slice for display (convert to float32 for numpy compatibility with bfloat16) + slice_str = str(sliced_data.detach().float().cpu().numpy()) + if len(slice_str) > 200: # Truncate if too long + slice_str = slice_str[:200] + "..." + + stats_str = ", ".join([f"{k}={v:.4f}" for k, v in stats.items()]) + + return ( + f"MomentumBuffer(\n" + f" momentum={self.momentum},\n" + f" shape={shape},\n" + f" stats=[{stats_str}],\n" + f" slice={slice_str}\n" + f")" + ) + else: + return f"MomentumBuffer(momentum={self.momentum}, running_average={self.running_average})" + + +def normalized_guidance( + pred_cond: torch.Tensor, + pred_uncond: torch.Tensor, + guidance_scale: float, + momentum_buffer: MomentumBuffer | None = None, + eta: float = 1.0, + norm_threshold: float = 0.0, + use_original_formulation: bool = False, +): + diff = pred_cond - pred_uncond + dim = [-i for i in range(1, len(diff.shape))] + + if momentum_buffer is not None: + momentum_buffer.update(diff) + diff = momentum_buffer.running_average + + if norm_threshold > 0: + ones = torch.ones_like(diff) + diff_norm = diff.norm(p=2, dim=dim, keepdim=True) + scale_factor = torch.minimum(ones, norm_threshold / diff_norm) + diff = diff * scale_factor + + v0, v1 = diff.double(), pred_cond.double() + v1 = torch.nn.functional.normalize(v1, dim=dim) + v0_parallel = (v0 * v1).sum(dim=dim, keepdim=True) * v1 + v0_orthogonal = v0 - v0_parallel + diff_parallel, diff_orthogonal = v0_parallel.type_as(diff), v0_orthogonal.type_as(diff) + normalized_update = diff_orthogonal + eta * diff_parallel + + pred = pred_cond if use_original_formulation else pred_uncond + pred = pred + guidance_scale * normalized_update + + return pred diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/adaptive_projected_guidance_mix.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/adaptive_projected_guidance_mix.py new file mode 100644 index 0000000000000000000000000000000000000000..559e30d2aabe0c596b19f7deaebd85aa13d45fdf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/adaptive_projected_guidance_mix.py @@ -0,0 +1,297 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import TYPE_CHECKING + +import torch + +from ..configuration_utils import register_to_config +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class AdaptiveProjectedMixGuidance(BaseGuidance): + """ + Adaptive Projected Guidance (APG) https://huggingface.co/papers/2410.02416 combined with Classifier-Free Guidance + (CFG). This guider is used in HunyuanImage2.1 https://github.com/Tencent-Hunyuan/HunyuanImage-2.1 + + Args: + guidance_scale (`float`, defaults to `7.5`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + adaptive_projected_guidance_momentum (`float`, defaults to `None`): + The momentum parameter for the adaptive projected guidance. Disabled if set to `None`. + adaptive_projected_guidance_rescale (`float`, defaults to `15.0`): + The rescale factor applied to the noise predictions for adaptive projected guidance. This is used to + improve image quality and fix + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions for classifier-free guidance. This is used to improve + image quality and fix overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample + Steps are Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.0`): + The fraction of the total number of denoising steps after which the classifier-free guidance starts. + stop (`float`, defaults to `1.0`): + The fraction of the total number of denoising steps after which the classifier-free guidance stops. + adaptive_projected_guidance_start_step (`int`, defaults to `5`): + The step at which the adaptive projected guidance starts (before this step, classifier-free guidance is + used, and momentum buffer is updated). + enabled (`bool`, defaults to `True`): + Whether this guidance is enabled. + """ + + _input_predictions = ["pred_cond", "pred_uncond"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 3.5, + guidance_rescale: float = 0.0, + adaptive_projected_guidance_scale: float = 10.0, + adaptive_projected_guidance_momentum: float = -0.5, + adaptive_projected_guidance_rescale: float = 10.0, + eta: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + adaptive_projected_guidance_start_step: int = 5, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.guidance_rescale = guidance_rescale + self.adaptive_projected_guidance_scale = adaptive_projected_guidance_scale + self.adaptive_projected_guidance_momentum = adaptive_projected_guidance_momentum + self.adaptive_projected_guidance_rescale = adaptive_projected_guidance_rescale + self.eta = eta + self.adaptive_projected_guidance_start_step = adaptive_projected_guidance_start_step + self.use_original_formulation = use_original_formulation + self.momentum_buffer = None + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + if self._step == 0: + if self.adaptive_projected_guidance_momentum is not None: + self.momentum_buffer = MomentumBuffer(self.adaptive_projected_guidance_momentum) + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + if self._step == 0: + if self.adaptive_projected_guidance_momentum is not None: + self.momentum_buffer = MomentumBuffer(self.adaptive_projected_guidance_momentum) + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward(self, pred_cond: torch.Tensor, pred_uncond: torch.Tensor | None = None) -> GuiderOutput: + pred = None + + # no guidance + if not self._is_cfg_enabled(): + pred = pred_cond + + # CFG + update momentum buffer + elif not self._is_apg_enabled(): + if self.momentum_buffer is not None: + update_momentum_buffer(pred_cond, pred_uncond, self.momentum_buffer) + # CFG + update momentum buffer + shift = pred_cond - pred_uncond + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + + # APG + elif self._is_apg_enabled(): + pred = normalized_guidance( + pred_cond, + pred_uncond, + self.adaptive_projected_guidance_scale, + self.momentum_buffer, + self.eta, + self.adaptive_projected_guidance_rescale, + self.use_original_formulation, + ) + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_apg_enabled() or self._is_cfg_enabled(): + num_conditions += 1 + return num_conditions + + # Copied from diffusers.guiders.classifier_free_guidance.ClassifierFreeGuidance._is_cfg_enabled + def _is_cfg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close + + def _is_apg_enabled(self) -> bool: + if not self._enabled: + return False + + if not self._is_cfg_enabled(): + return False + + is_within_range = False + if self._step is not None: + is_within_range = self._step > self.adaptive_projected_guidance_start_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.adaptive_projected_guidance_scale, 0.0) + else: + is_close = math.isclose(self.adaptive_projected_guidance_scale, 1.0) + + return is_within_range and not is_close + + def get_state(self): + state = super().get_state() + state["momentum_buffer"] = self.momentum_buffer + state["is_apg_enabled"] = self._is_apg_enabled() + state["is_cfg_enabled"] = self._is_cfg_enabled() + return state + + +# Copied from diffusers.guiders.adaptive_projected_guidance.MomentumBuffer +class MomentumBuffer: + def __init__(self, momentum: float): + self.momentum = momentum + self.running_average = 0 + + def update(self, update_value: torch.Tensor): + new_average = self.momentum * self.running_average + self.running_average = update_value + new_average + + def __repr__(self) -> str: + """ + Returns a string representation showing momentum, shape, statistics, and a slice of the running_average. + """ + if isinstance(self.running_average, torch.Tensor): + shape = tuple(self.running_average.shape) + + # Calculate statistics + with torch.no_grad(): + stats = { + "mean": self.running_average.mean().item(), + "std": self.running_average.std().item(), + "min": self.running_average.min().item(), + "max": self.running_average.max().item(), + } + + # Get a slice (max 3 elements per dimension) + slice_indices = tuple(slice(None, min(3, dim)) for dim in shape) + sliced_data = self.running_average[slice_indices] + + # Format the slice for display (convert to float32 for numpy compatibility with bfloat16) + slice_str = str(sliced_data.detach().float().cpu().numpy()) + if len(slice_str) > 200: # Truncate if too long + slice_str = slice_str[:200] + "..." + + stats_str = ", ".join([f"{k}={v:.4f}" for k, v in stats.items()]) + + return ( + f"MomentumBuffer(\n" + f" momentum={self.momentum},\n" + f" shape={shape},\n" + f" stats=[{stats_str}],\n" + f" slice={slice_str}\n" + f")" + ) + else: + return f"MomentumBuffer(momentum={self.momentum}, running_average={self.running_average})" + + +def update_momentum_buffer( + pred_cond: torch.Tensor, + pred_uncond: torch.Tensor, + momentum_buffer: MomentumBuffer | None = None, +): + diff = pred_cond - pred_uncond + if momentum_buffer is not None: + momentum_buffer.update(diff) + + +def normalized_guidance( + pred_cond: torch.Tensor, + pred_uncond: torch.Tensor, + guidance_scale: float, + momentum_buffer: MomentumBuffer | None = None, + eta: float = 1.0, + norm_threshold: float = 0.0, + use_original_formulation: bool = False, +): + if momentum_buffer is not None: + update_momentum_buffer(pred_cond, pred_uncond, momentum_buffer) + diff = momentum_buffer.running_average + else: + diff = pred_cond - pred_uncond + + dim = [-i for i in range(1, len(diff.shape))] + + if norm_threshold > 0: + ones = torch.ones_like(diff) + diff_norm = diff.norm(p=2, dim=dim, keepdim=True) + scale_factor = torch.minimum(ones, norm_threshold / diff_norm) + diff = diff * scale_factor + + v0, v1 = diff.double(), pred_cond.double() + v1 = torch.nn.functional.normalize(v1, dim=dim) + v0_parallel = (v0 * v1).sum(dim=dim, keepdim=True) * v1 + v0_orthogonal = v0 - v0_parallel + diff_parallel, diff_orthogonal = v0_parallel.type_as(diff), v0_orthogonal.type_as(diff) + normalized_update = diff_orthogonal + eta * diff_parallel + + pred = pred_cond if use_original_formulation else pred_uncond + pred = pred + guidance_scale * normalized_update + + return pred diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/auto_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/auto_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..d6b6d3c492f0a1b76eb3d6ae34e83bedaf7adeda --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/auto_guidance.py @@ -0,0 +1,198 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING, Any + +import torch + +from ..configuration_utils import register_to_config +from ..hooks import HookRegistry, LayerSkipConfig +from ..hooks.layer_skip import _apply_layer_skip_hook +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class AutoGuidance(BaseGuidance): + """ + AutoGuidance: https://huggingface.co/papers/2406.02507 + + Args: + guidance_scale (`float`, defaults to `7.5`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + auto_guidance_layers (`int` or `list[int]`, *optional*): + The layer indices to apply skip layer guidance to. Can be a single integer or a list of integers. If not + provided, `skip_layer_config` must be provided. + auto_guidance_config (`LayerSkipConfig` or `list[LayerSkipConfig]`, *optional*): + The configuration for the skip layer guidance. Can be a single `LayerSkipConfig` or a list of + `LayerSkipConfig`. If not provided, `skip_layer_guidance_layers` must be provided. + dropout (`float`, *optional*): + The dropout probability for autoguidance on the enabled skip layers (either with `auto_guidance_layers` or + `auto_guidance_config`). If not provided, the dropout probability will be set to 1.0. + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.0`): + The fraction of the total number of denoising steps after which guidance starts. + stop (`float`, defaults to `1.0`): + The fraction of the total number of denoising steps after which guidance stops. + """ + + _input_predictions = ["pred_cond", "pred_uncond"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 7.5, + auto_guidance_layers: int | list[int] | None = None, + auto_guidance_config: LayerSkipConfig | list[LayerSkipConfig] | dict[str, Any] = None, + dropout: float | None = None, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.auto_guidance_layers = auto_guidance_layers + self.auto_guidance_config = auto_guidance_config + self.dropout = dropout + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + + is_layer_or_config_provided = auto_guidance_layers is not None or auto_guidance_config is not None + is_layer_and_config_provided = auto_guidance_layers is not None and auto_guidance_config is not None + if not is_layer_or_config_provided: + raise ValueError( + "Either `auto_guidance_layers` or `auto_guidance_config` must be provided to enable AutoGuidance." + ) + if is_layer_and_config_provided: + raise ValueError("Only one of `auto_guidance_layers` or `auto_guidance_config` can be provided.") + if auto_guidance_config is None and dropout is None: + raise ValueError("`dropout` must be provided if `auto_guidance_layers` is provided.") + + if auto_guidance_layers is not None: + if isinstance(auto_guidance_layers, int): + auto_guidance_layers = [auto_guidance_layers] + if not isinstance(auto_guidance_layers, list): + raise ValueError( + f"Expected `auto_guidance_layers` to be an int or a list of ints, but got {type(auto_guidance_layers)}." + ) + auto_guidance_config = [ + LayerSkipConfig(layer, fqn="auto", dropout=dropout) for layer in auto_guidance_layers + ] + + if isinstance(auto_guidance_config, dict): + auto_guidance_config = LayerSkipConfig.from_dict(auto_guidance_config) + + if isinstance(auto_guidance_config, LayerSkipConfig): + auto_guidance_config = [auto_guidance_config] + + if not isinstance(auto_guidance_config, list): + raise ValueError( + f"Expected `auto_guidance_config` to be a LayerSkipConfig or a list of LayerSkipConfig, but got {type(auto_guidance_config)}." + ) + elif isinstance(next(iter(auto_guidance_config), None), dict): + auto_guidance_config = [LayerSkipConfig.from_dict(config) for config in auto_guidance_config] + + self.auto_guidance_config = auto_guidance_config + self._auto_guidance_hook_names = [f"AutoGuidance_{i}" for i in range(len(self.auto_guidance_config))] + + def prepare_models(self, denoiser: torch.nn.Module) -> None: + self._count_prepared += 1 + if self._is_ag_enabled() and self.is_unconditional: + for name, config in zip(self._auto_guidance_hook_names, self.auto_guidance_config): + _apply_layer_skip_hook(denoiser, config, name=name) + + def cleanup_models(self, denoiser: torch.nn.Module) -> None: + if self._is_ag_enabled() and self.is_unconditional: + for name in self._auto_guidance_hook_names: + registry = HookRegistry.check_if_exists_or_initialize(denoiser) + registry.remove_hook(name, recurse=True) + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward(self, pred_cond: torch.Tensor, pred_uncond: torch.Tensor | None = None) -> GuiderOutput: + pred = None + + if not self._is_ag_enabled(): + pred = pred_cond + else: + shift = pred_cond - pred_uncond + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_ag_enabled(): + num_conditions += 1 + return num_conditions + + def _is_ag_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/classifier_free_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/classifier_free_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..a2180a626bfcda4e706d22b8492f4116862e3828 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/classifier_free_guidance.py @@ -0,0 +1,156 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import torch + +from ..configuration_utils import register_to_config +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class ClassifierFreeGuidance(BaseGuidance): + """ + Implements Classifier-Free Guidance (CFG) for diffusion models. + + Reference: https://huggingface.co/papers/2207.12598 + + CFG improves generation quality and prompt adherence by jointly training models on both conditional and + unconditional data, then combining predictions during inference. This allows trading off between quality (high + guidance) and diversity (low guidance). + + **Two CFG Formulations:** + + 1. **Original formulation** (from paper): + ``` + x_pred = x_cond + guidance_scale * (x_cond - x_uncond) + ``` + Moves conditional predictions further from unconditional ones. + + 2. **Diffusers-native formulation** (default, from Imagen paper): + ``` + x_pred = x_uncond + guidance_scale * (x_cond - x_uncond) + ``` + Moves unconditional predictions toward conditional ones, effectively suppressing negative features (e.g., "bad + quality", "watermarks"). Equivalent in theory but more intuitive. + + Use `use_original_formulation=True` to switch to the original formulation. + + Args: + guidance_scale (`float`, defaults to `7.5`): + CFG scale applied by this guider during post-processing. Higher values = stronger prompt conditioning but + may reduce quality. Typical range: 1.0-20.0. + guidance_rescale (`float`, defaults to `0.0`): + Rescaling factor to prevent overexposure from high guidance scales. Based on [Common Diffusion Noise + Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). Range: 0.0 (no rescaling) + to 1.0 (full rescaling). + use_original_formulation (`bool`, defaults to `False`): + If `True`, uses the original CFG formulation from the paper. If `False` (default), uses the + diffusers-native formulation from the Imagen paper. + start (`float`, defaults to `0.0`): + Fraction of denoising steps (0.0-1.0) after which CFG starts. Use > 0.0 to disable CFG in early denoising + steps. + stop (`float`, defaults to `1.0`): + Fraction of denoising steps (0.0-1.0) after which CFG stops. Use < 1.0 to disable CFG in late denoising + steps. + enabled (`bool`, defaults to `True`): + Whether CFG is enabled. Set to `False` to disable CFG entirely (uses only conditional predictions). + """ + + _input_predictions = ["pred_cond", "pred_uncond"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 7.5, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward(self, pred_cond: torch.Tensor, pred_uncond: torch.Tensor | None = None) -> GuiderOutput: + pred = None + + if not self._is_cfg_enabled(): + pred = pred_cond + else: + shift = pred_cond - pred_uncond + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_cfg_enabled(): + num_conditions += 1 + return num_conditions + + def _is_cfg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/classifier_free_zero_star_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/classifier_free_zero_star_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..dd71c7537cac133c2cb5b0e5aff31f7783e81bf3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/classifier_free_zero_star_guidance.py @@ -0,0 +1,164 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import torch + +from ..configuration_utils import register_to_config +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class ClassifierFreeZeroStarGuidance(BaseGuidance): + """ + Classifier-free Zero* (CFG-Zero*): https://huggingface.co/papers/2503.18886 + + This is an implementation of the Classifier-Free Zero* guidance technique, which is a variant of classifier-free + guidance. It proposes zero initialization of the noise predictions for the first few steps of the diffusion + process, and also introduces an optimal rescaling factor for the noise predictions, which can help in improving the + quality of generated images. + + The authors of the paper suggest setting zero initialization in the first 4% of the inference steps. + + Args: + guidance_scale (`float`, defaults to `7.5`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + zero_init_steps (`int`, defaults to `1`): + The number of inference steps for which the noise predictions are zeroed out (see Section 4.2). + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.01`): + The fraction of the total number of denoising steps after which guidance starts. + stop (`float`, defaults to `0.2`): + The fraction of the total number of denoising steps after which guidance stops. + """ + + _input_predictions = ["pred_cond", "pred_uncond"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 7.5, + zero_init_steps: int = 1, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.zero_init_steps = zero_init_steps + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward(self, pred_cond: torch.Tensor, pred_uncond: torch.Tensor | None = None) -> GuiderOutput: + pred = None + + # YiYi Notes: add default behavior for self._enabled == False + if not self._enabled: + pred = pred_cond + + elif self._step < self.zero_init_steps: + pred = torch.zeros_like(pred_cond) + elif not self._is_cfg_enabled(): + pred = pred_cond + else: + pred_cond_flat = pred_cond.flatten(1) + pred_uncond_flat = pred_uncond.flatten(1) + alpha = cfg_zero_star_scale(pred_cond_flat, pred_uncond_flat) + alpha = alpha.view(-1, *(1,) * (len(pred_cond.shape) - 1)) + pred_uncond = pred_uncond * alpha + shift = pred_cond - pred_uncond + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_cfg_enabled(): + num_conditions += 1 + return num_conditions + + def _is_cfg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close + + +def cfg_zero_star_scale(cond: torch.Tensor, uncond: torch.Tensor, eps: float = 1e-8) -> torch.Tensor: + cond_dtype = cond.dtype + cond = cond.float() + uncond = uncond.float() + dot_product = torch.sum(cond * uncond, dim=1, keepdim=True) + squared_norm = torch.sum(uncond**2, dim=1, keepdim=True) + eps + # st_star = v_cond^T * v_uncond / ||v_uncond||^2 + scale = dot_product / squared_norm + return scale.to(dtype=cond_dtype) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/frequency_decoupled_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/frequency_decoupled_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..b92ddf2c03f9f4ffd968fd54be3feceaa3d81226 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/frequency_decoupled_guidance.py @@ -0,0 +1,335 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import torch + +from ..configuration_utils import register_to_config +from ..utils import is_kornia_available +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +_CAN_USE_KORNIA = is_kornia_available() + + +if _CAN_USE_KORNIA: + from kornia.geometry import pyrup as upsample_and_blur_func + from kornia.geometry.transform import build_laplacian_pyramid as build_laplacian_pyramid_func +else: + upsample_and_blur_func = None + build_laplacian_pyramid_func = None + + +def project(v0: torch.Tensor, v1: torch.Tensor, upcast_to_double: bool = True) -> tuple[torch.Tensor, torch.Tensor]: + """ + Project vector v0 onto vector v1, returning the parallel and orthogonal components of v0. Implementation from paper + (Algorithm 2). + """ + # v0 shape: [B, ...] + # v1 shape: [B, ...] + # Assume first dim is a batch dim and all other dims are channel or "spatial" dims + all_dims_but_first = list(range(1, len(v0.shape))) + if upcast_to_double: + dtype = v0.dtype + v0, v1 = v0.double(), v1.double() + v1 = torch.nn.functional.normalize(v1, dim=all_dims_but_first) + v0_parallel = (v0 * v1).sum(dim=all_dims_but_first, keepdim=True) * v1 + v0_orthogonal = v0 - v0_parallel + if upcast_to_double: + v0_parallel = v0_parallel.to(dtype) + v0_orthogonal = v0_orthogonal.to(dtype) + return v0_parallel, v0_orthogonal + + +def build_image_from_pyramid(pyramid: list[torch.Tensor]) -> torch.Tensor: + """ + Recovers the data space latents from the Laplacian pyramid frequency space. Implementation from the paper + (Algorithm 2). + """ + # pyramid shapes: [[B, C, H, W], [B, C, H/2, W/2], ...] + img = pyramid[-1] + for i in range(len(pyramid) - 2, -1, -1): + img = upsample_and_blur_func(img) + pyramid[i] + return img + + +class FrequencyDecoupledGuidance(BaseGuidance): + """ + Frequency-Decoupled Guidance (FDG): https://huggingface.co/papers/2506.19713 + + FDG is a technique similar to (and based on) classifier-free guidance (CFG) which is used to improve generation + quality and condition-following in diffusion models. Like CFG, during training we jointly train the model on both + conditional and unconditional data, and use a combination of the two during inference. (If you want more details on + how CFG works, you can check out the CFG guider.) + + FDG differs from CFG in that the normal CFG prediction is instead decoupled into low- and high-frequency components + using a frequency transform (such as a Laplacian pyramid). The CFG update is then performed in frequency space + separately for the low- and high-frequency components with different guidance scales. Finally, the inverse + frequency transform is used to map the CFG frequency predictions back to data space (e.g. pixel space for images) + to form the final FDG prediction. + + For images, the FDG authors found that using low guidance scales for the low-frequency components retains sample + diversity and realistic color composition, while using high guidance scales for high-frequency components enhances + sample quality (such as better visual details). Therefore, they recommend using low guidance scales (low w_low) for + the low-frequency components and high guidance scales (high w_high) for the high-frequency components. As an + example, they suggest w_low = 5.0 and w_high = 10.0 for Stable Diffusion XL (see Table 8 in the paper). + + As with CFG, Diffusers implements the scaling and shifting on the unconditional prediction based on the [Imagen + paper](https://huggingface.co/papers/2205.11487), which is equivalent to what the original CFG paper proposed in + theory. [x_pred = x_uncond + scale * (x_cond - x_uncond)] + + The `use_original_formulation` argument can be set to `True` to use the original CFG formulation mentioned in the + paper. By default, we use the diffusers-native implementation that has been in the codebase for a long time. + + Args: + guidance_scales (`list[float]`, defaults to `[10.0, 5.0]`): + The scale parameter for frequency-decoupled guidance for each frequency component, listed from highest + frequency level to lowest. Higher values result in stronger conditioning on the text prompt, while lower + values allow for more freedom in generation. Higher values may lead to saturation and deterioration of + image quality. The FDG authors recommend using higher guidance scales for higher frequency components and + lower guidance scales for lower frequency components (so `guidance_scales` should typically be sorted in + descending order). + guidance_rescale (`float` or `list[float]`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). If a list is supplied, it should be the same length as + `guidance_scales`. + parallel_weights (`float` or `list[float]`, *optional*): + Optional weights for the parallel component of each frequency component of the projected CFG shift. If not + set, the weights will default to `1.0` for all components, which corresponds to using the normal CFG shift + (that is, equal weights for the parallel and orthogonal components). If set, a value in `[0, 1]` is + recommended. If a list is supplied, it should be the same length as `guidance_scales`. + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float` or `list[float]`, defaults to `0.0`): + The fraction of the total number of denoising steps after which guidance starts. If a list is supplied, it + should be the same length as `guidance_scales`. + stop (`float` or `list[float]`, defaults to `1.0`): + The fraction of the total number of denoising steps after which guidance stops. If a list is supplied, it + should be the same length as `guidance_scales`. + guidance_rescale_space (`str`, defaults to `"data"`): + Whether to performance guidance rescaling in `"data"` space (after the full FDG update in data space) or in + `"freq"` space (right after the CFG update, for each freq level). Note that frequency space rescaling is + speculative and may not produce expected results. If `"data"` is set, the first `guidance_rescale` value + will be used; otherwise, per-frequency-level guidance rescale values will be used if available. + upcast_to_double (`bool`, defaults to `True`): + Whether to upcast certain operations, such as the projection operation when using `parallel_weights`, to + float64 when performing guidance. This may result in better performance at the cost of increased runtime. + """ + + _input_predictions = ["pred_cond", "pred_uncond"] + + @register_to_config + def __init__( + self, + guidance_scales: list[float] | tuple[float] = [10.0, 5.0], + guidance_rescale: float | list[float] | tuple[float] = 0.0, + parallel_weights: float | list[float] | tuple[float] | None = None, + use_original_formulation: bool = False, + start: float | list[float] | tuple[float] = 0.0, + stop: float | list[float] | tuple[float] = 1.0, + guidance_rescale_space: str = "data", + upcast_to_double: bool = True, + enabled: bool = True, + ): + if not _CAN_USE_KORNIA: + raise ImportError( + "The `FrequencyDecoupledGuidance` guider cannot be instantiated because the `kornia` library on which " + "it depends is not available in the current environment. You can install `kornia` with `pip install " + "kornia`." + ) + + # Set start to earliest start for any freq component and stop to latest stop for any freq component + min_start = start if isinstance(start, float) else min(start) + max_stop = stop if isinstance(stop, float) else max(stop) + super().__init__(min_start, max_stop, enabled) + + self.guidance_scales = guidance_scales + self.levels = len(guidance_scales) + + if isinstance(guidance_rescale, float): + self.guidance_rescale = [guidance_rescale] * self.levels + elif len(guidance_rescale) == self.levels: + self.guidance_rescale = guidance_rescale + else: + raise ValueError( + f"`guidance_rescale` has length {len(guidance_rescale)} but should have the same length as " + f"`guidance_scales` ({len(self.guidance_scales)})" + ) + # Whether to perform guidance rescaling in frequency space (right after the CFG update) or data space (after + # transforming from frequency space back to data space) + if guidance_rescale_space not in ["data", "freq"]: + raise ValueError( + f"Guidance rescale space is {guidance_rescale_space} but must be one of `data` or `freq`." + ) + self.guidance_rescale_space = guidance_rescale_space + + if parallel_weights is None: + # Use normal CFG shift (equal weights for parallel and orthogonal components) + self.parallel_weights = [1.0] * self.levels + elif isinstance(parallel_weights, float): + self.parallel_weights = [parallel_weights] * self.levels + elif len(parallel_weights) == self.levels: + self.parallel_weights = parallel_weights + else: + raise ValueError( + f"`parallel_weights` has length {len(parallel_weights)} but should have the same length as " + f"`guidance_scales` ({len(self.guidance_scales)})" + ) + + self.use_original_formulation = use_original_formulation + self.upcast_to_double = upcast_to_double + + if isinstance(start, float): + self.guidance_start = [start] * self.levels + elif len(start) == self.levels: + self.guidance_start = start + else: + raise ValueError( + f"`start` has length {len(start)} but should have the same length as `guidance_scales` " + f"({len(self.guidance_scales)})" + ) + if isinstance(stop, float): + self.guidance_stop = [stop] * self.levels + elif len(stop) == self.levels: + self.guidance_stop = stop + else: + raise ValueError( + f"`stop` has length {len(stop)} but should have the same length as `guidance_scales` " + f"({len(self.guidance_scales)})" + ) + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward(self, pred_cond: torch.Tensor, pred_uncond: torch.Tensor | None = None) -> GuiderOutput: + pred = None + + if not self._is_fdg_enabled(): + pred = pred_cond + else: + # Apply the frequency transform (e.g. Laplacian pyramid) to the conditional and unconditional predictions. + pred_cond_pyramid = build_laplacian_pyramid_func(pred_cond, self.levels) + pred_uncond_pyramid = build_laplacian_pyramid_func(pred_uncond, self.levels) + + # From high frequencies to low frequencies, following the paper implementation + pred_guided_pyramid = [] + parameters = zip(self.guidance_scales, self.parallel_weights, self.guidance_rescale) + for level, (guidance_scale, parallel_weight, guidance_rescale) in enumerate(parameters): + if self._is_fdg_enabled_for_level(level): + # Get the cond/uncond preds (in freq space) at the current frequency level + pred_cond_freq = pred_cond_pyramid[level] + pred_uncond_freq = pred_uncond_pyramid[level] + + shift = pred_cond_freq - pred_uncond_freq + + # Apply parallel weights, if used (1.0 corresponds to using the normal CFG shift) + if not math.isclose(parallel_weight, 1.0): + shift_parallel, shift_orthogonal = project(shift, pred_cond_freq, self.upcast_to_double) + shift = parallel_weight * shift_parallel + shift_orthogonal + + # Apply CFG update for the current frequency level + pred = pred_cond_freq if self.use_original_formulation else pred_uncond_freq + pred = pred + guidance_scale * shift + + if self.guidance_rescale_space == "freq" and guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond_freq, guidance_rescale) + + # Add the current FDG guided level to the FDG prediction pyramid + pred_guided_pyramid.append(pred) + else: + # Add the current pred_cond_pyramid level as the "non-FDG" prediction + pred_guided_pyramid.append(pred_cond_freq) + + # Convert from frequency space back to data (e.g. pixel) space by applying inverse freq transform + pred = build_image_from_pyramid(pred_guided_pyramid) + + # If rescaling in data space, use the first elem of self.guidance_rescale as the "global" rescale value + # across all freq levels + if self.guidance_rescale_space == "data" and self.guidance_rescale[0] > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale[0]) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_fdg_enabled(): + num_conditions += 1 + return num_conditions + + def _is_fdg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = all(math.isclose(guidance_scale, 0.0) for guidance_scale in self.guidance_scales) + else: + is_close = all(math.isclose(guidance_scale, 1.0) for guidance_scale in self.guidance_scales) + + return is_within_range and not is_close + + def _is_fdg_enabled_for_level(self, level: int) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self.guidance_start[level] * self._num_inference_steps) + skip_stop_step = int(self.guidance_stop[level] * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scales[level], 0.0) + else: + is_close = math.isclose(self.guidance_scales[level], 1.0) + + return is_within_range and not is_close diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/guider_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/guider_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7be68424c345050dea854930255d8ca722d76a2f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/guider_utils.py @@ -0,0 +1,396 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, Any + +import torch +from huggingface_hub.utils import validate_hf_hub_args +from typing_extensions import Self + +from ..configuration_utils import ConfigMixin +from ..utils import BaseOutput, PushToHubMixin, get_logger + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +GUIDER_CONFIG_NAME = "guider_config.json" + + +logger = get_logger(__name__) # pylint: disable=invalid-name + + +class BaseGuidance(ConfigMixin, PushToHubMixin): + r"""Base class providing the skeleton for implementing guidance techniques.""" + + config_name = GUIDER_CONFIG_NAME + _input_predictions = None + _identifier_key = "__guidance_identifier__" + + def __init__(self, start: float = 0.0, stop: float = 1.0, enabled: bool = True): + logger.warning( + "Guiders are currently an experimental feature under active development. The API is subject to breaking changes in future releases." + ) + + self._start = start + self._stop = stop + self._step: int = None + self._num_inference_steps: int = None + self._timestep: torch.LongTensor = None + self._count_prepared = 0 + self._input_fields: dict[str, str | tuple[str, str]] = None + self._enabled = enabled + + if not (0.0 <= start < 1.0): + raise ValueError(f"Expected `start` to be between 0.0 and 1.0, but got {start}.") + if not (start <= stop <= 1.0): + raise ValueError(f"Expected `stop` to be between {start} and 1.0, but got {stop}.") + + if self._input_predictions is None or not isinstance(self._input_predictions, list): + raise ValueError( + "`_input_predictions` must be a list of required prediction names for the guidance technique." + ) + + def new(self, **kwargs): + """ + Creates a copy of this guider instance, optionally with modified configuration parameters. + + Args: + **kwargs: Configuration parameters to override in the new instance. If no kwargs are provided, + returns an exact copy with the same configuration. + + Returns: + A new guider instance with the same (or updated) configuration. + + Example: + ```python + # Create a CFG guider + guider = ClassifierFreeGuidance(guidance_scale=3.5) + + # Create an exact copy + same_guider = guider.new() + + # Create a copy with different start step, keeping other config the same + new_guider = guider.new(guidance_scale=5) + ``` + """ + return self.__class__.from_config(self.config, **kwargs) + + def disable(self): + self._enabled = False + + def enable(self): + self._enabled = True + + def set_state(self, step: int, num_inference_steps: int, timestep: torch.LongTensor) -> None: + self._step = step + self._num_inference_steps = num_inference_steps + self._timestep = timestep + self._count_prepared = 0 + + def get_state(self) -> dict[str, Any]: + """ + Returns the current state of the guidance technique as a dictionary. The state variables will be included in + the __repr__ method. Returns: + `dict[str, Any]`: A dictionary containing the current state variables including: + - step: Current inference step + - num_inference_steps: Total number of inference steps + - timestep: Current timestep tensor + - count_prepared: Number of times prepare_models has been called + - enabled: Whether the guidance is enabled + - num_conditions: Number of conditions + """ + state = { + "step": self._step, + "num_inference_steps": self._num_inference_steps, + "timestep": self._timestep, + "count_prepared": self._count_prepared, + "enabled": self._enabled, + "num_conditions": self.num_conditions, + } + return state + + def __repr__(self) -> str: + """ + Returns a string representation of the guidance object including both config and current state. + """ + # Get ConfigMixin's __repr__ + str_repr = super().__repr__() + + # Get current state + state = self.get_state() + + # Format each state variable on its own line with indentation + state_lines = [] + for k, v in state.items(): + # Convert value to string and handle multi-line values + v_str = str(v) + if "\n" in v_str: + # For multi-line values (like MomentumBuffer), indent subsequent lines + v_lines = v_str.split("\n") + v_str = v_lines[0] + "\n" + "\n".join([" " + line for line in v_lines[1:]]) + state_lines.append(f" {k}: {v_str}") + + state_str = "\n".join(state_lines) + + return f"{str_repr}\nState:\n{state_str}" + + def prepare_models(self, denoiser: torch.nn.Module) -> None: + """ + Prepares the models for the guidance technique on a given batch of data. This method should be overridden in + subclasses to implement specific model preparation logic. + """ + self._count_prepared += 1 + + def cleanup_models(self, denoiser: torch.nn.Module) -> None: + """ + Cleans up the models for the guidance technique after a given batch of data. This method should be overridden + in subclasses to implement specific model cleanup logic. It is useful for removing any hooks or other stateful + modifications made during `prepare_models`. + """ + pass + + def prepare_inputs(self, data: "BlockState") -> list["BlockState"]: + raise NotImplementedError("BaseGuidance::prepare_inputs must be implemented in subclasses.") + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + raise NotImplementedError("BaseGuidance::prepare_inputs_from_block_state must be implemented in subclasses.") + + def __call__(self, data: list["BlockState"]) -> Any: + if not all(hasattr(d, "noise_pred") for d in data): + raise ValueError("Expected all data to have `noise_pred` attribute.") + if len(data) != self.num_conditions: + raise ValueError( + f"Expected {self.num_conditions} data items, but got {len(data)}. Please check the input data." + ) + forward_inputs = {getattr(d, self._identifier_key): d.noise_pred for d in data} + return self.forward(**forward_inputs) + + def forward(self, *args, **kwargs) -> Any: + raise NotImplementedError("BaseGuidance::forward must be implemented in subclasses.") + + @property + def is_conditional(self) -> bool: + raise NotImplementedError("BaseGuidance::is_conditional must be implemented in subclasses.") + + @property + def is_unconditional(self) -> bool: + return not self.is_conditional + + @property + def num_conditions(self) -> int: + raise NotImplementedError("BaseGuidance::num_conditions must be implemented in subclasses.") + + @classmethod + def _prepare_batch( + cls, + data: dict[str, tuple[torch.Tensor, torch.Tensor]], + tuple_index: int, + identifier: str, + ) -> "BlockState": + """ + Prepares a batch of data for the guidance technique. This method is used in the `prepare_inputs` method of the + `BaseGuidance` class. It prepares the batch based on the provided tuple index. + + Args: + input_fields (`dict[str, str | tuple[str, str]]`): + A dictionary where the keys are the names of the fields that will be used to store the data once it is + prepared with `prepare_inputs`. The values can be either a string or a tuple of length 2, which is used + to look up the required data provided for preparation. If a string is provided, it will be used as the + conditional data (or unconditional if used with a guidance method that requires it). If a tuple of + length 2 is provided, the first element must be the conditional data identifier and the second element + must be the unconditional data identifier or None. + data (`BlockState`): + The input data to be prepared. + tuple_index (`int`): + The index to use when accessing input fields that are tuples. + + Returns: + `BlockState`: The prepared batch of data. + """ + from ..modular_pipelines.modular_pipeline import BlockState + + data_batch = {} + for key, value in data.items(): + try: + if isinstance(value, torch.Tensor): + data_batch[key] = value + elif isinstance(value, tuple): + data_batch[key] = value[tuple_index] + else: + raise ValueError(f"Invalid value type: {type(value)}") + except ValueError: + logger.debug(f"`data` does not have attribute(s) {value}, skipping.") + data_batch[cls._identifier_key] = identifier + return BlockState(**data_batch) + + @classmethod + def _prepare_batch_from_block_state( + cls, + input_fields: dict[str, str | tuple[str, str]], + data: "BlockState", + tuple_index: int, + identifier: str, + ) -> "BlockState": + """ + Prepares a batch of data for the guidance technique. This method is used in the `prepare_inputs` method of the + `BaseGuidance` class. It prepares the batch based on the provided tuple index. + + Args: + input_fields (`dict[str, str | tuple[str, str]]`): + A dictionary where the keys are the names of the fields that will be used to store the data once it is + prepared with `prepare_inputs`. The values can be either a string or a tuple of length 2, which is used + to look up the required data provided for preparation. If a string is provided, it will be used as the + conditional data (or unconditional if used with a guidance method that requires it). If a tuple of + length 2 is provided, the first element must be the conditional data identifier and the second element + must be the unconditional data identifier or None. + data (`BlockState`): + The input data to be prepared. + tuple_index (`int`): + The index to use when accessing input fields that are tuples. + + Returns: + `BlockState`: The prepared batch of data. + """ + from ..modular_pipelines.modular_pipeline import BlockState + + data_batch = {} + for key, value in input_fields.items(): + try: + if isinstance(value, str): + data_batch[key] = getattr(data, value) + elif isinstance(value, tuple): + data_batch[key] = getattr(data, value[tuple_index]) + else: + # We've already checked that value is a string or a tuple of strings with length 2 + pass + except AttributeError: + logger.debug(f"`data` does not have attribute(s) {value}, skipping.") + data_batch[cls._identifier_key] = identifier + return BlockState(**data_batch) + + @classmethod + @validate_hf_hub_args + def from_pretrained( + cls, + pretrained_model_name_or_path: str | os.PathLike | None = None, + subfolder: str | None = None, + return_unused_kwargs=False, + **kwargs, + ) -> Self: + r""" + Instantiate a guider from a pre-defined JSON configuration file in a local directory or Hub repository. + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the guider configuration + saved with [`~BaseGuidance.save_pretrained`]. + subfolder (`str`, *optional*): + The subfolder location of a model file within a larger model repository on the Hub or locally. + return_unused_kwargs (`bool`, *optional*, defaults to `False`): + Whether kwargs that are not consumed by the Python class should be returned or not. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + + > [!TIP] > To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in + with `hf > auth login`. You can also activate the special > + ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use this method in a > + firewalled environment. + + """ + config, kwargs, commit_hash = cls.load_config( + pretrained_model_name_or_path=pretrained_model_name_or_path, + subfolder=subfolder, + return_unused_kwargs=True, + return_commit_hash=True, + **kwargs, + ) + return cls.from_config(config, return_unused_kwargs=return_unused_kwargs, **kwargs) + + def save_pretrained(self, save_directory: str | os.PathLike, push_to_hub: bool = False, **kwargs): + """ + Save a guider configuration object to a directory so that it can be reloaded using the + [`~BaseGuidance.from_pretrained`] class method. + + Args: + save_directory (`str` or `os.PathLike`): + Directory where the configuration JSON file will be saved (will be created if it does not exist). + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the + repository you want to push to with `repo_id` (will default to the name of `save_directory` in your + namespace). + kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + self.save_config(save_directory=save_directory, push_to_hub=push_to_hub, **kwargs) + + +class GuiderOutput(BaseOutput): + pred: torch.Tensor + pred_cond: torch.Tensor | None + pred_uncond: torch.Tensor | None + + +def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): + r""" + Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on + Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + + Args: + noise_cfg (`torch.Tensor`): + The predicted noise tensor for the guided diffusion process. + noise_pred_text (`torch.Tensor`): + The predicted noise tensor for the text-guided diffusion process. + guidance_rescale (`float`, *optional*, defaults to 0.0): + A rescale factor applied to the noise predictions. + Returns: + noise_cfg (`torch.Tensor`): The rescaled noise prediction tensor. + """ + std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) + std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) + # rescale the results from guidance (fixes overexposure) + noise_pred_rescaled = noise_cfg * (std_text / std_cfg) + # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images + noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg + return noise_cfg diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/magnitude_aware_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/magnitude_aware_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..e83545fd889ab92c21566a7030dd817ccfc610c0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/magnitude_aware_guidance.py @@ -0,0 +1,159 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import TYPE_CHECKING + +import torch + +from ..configuration_utils import register_to_config +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class MagnitudeAwareGuidance(BaseGuidance): + """ + Magnitude-Aware Mitigation for Boosted Guidance (MAMBO-G): https://huggingface.co/papers/2508.03442 + + Args: + guidance_scale (`float`, defaults to `10.0`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + alpha (`float`, defaults to `8.0`): + The alpha parameter for the magnitude-aware guidance. Higher values cause more aggressive supression of + guidance scale when the magnitude of the guidance update is large. + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.0`): + The fraction of the total number of denoising steps after which guidance starts. + stop (`float`, defaults to `1.0`): + The fraction of the total number of denoising steps after which guidance stops. + """ + + _input_predictions = ["pred_cond", "pred_uncond"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 10.0, + alpha: float = 8.0, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.alpha = alpha + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward(self, pred_cond: torch.Tensor, pred_uncond: torch.Tensor | None = None) -> GuiderOutput: + pred = None + + if not self._is_mambo_g_enabled(): + pred = pred_cond + else: + pred = mambo_guidance( + pred_cond, + pred_uncond, + self.guidance_scale, + self.alpha, + self.use_original_formulation, + ) + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_mambo_g_enabled(): + num_conditions += 1 + return num_conditions + + def _is_mambo_g_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close + + +def mambo_guidance( + pred_cond: torch.Tensor, + pred_uncond: torch.Tensor, + guidance_scale: float, + alpha: float = 8.0, + use_original_formulation: bool = False, +): + dim = list(range(1, len(pred_cond.shape))) + diff = pred_cond - pred_uncond + ratio = torch.norm(diff, dim=dim, keepdim=True) / torch.norm(pred_uncond, dim=dim, keepdim=True) + guidance_scale_final = ( + guidance_scale * torch.exp(-alpha * ratio) + if use_original_formulation + else 1.0 + (guidance_scale - 1.0) * torch.exp(-alpha * ratio) + ) + pred = pred_cond if use_original_formulation else pred_uncond + pred = pred + guidance_scale_final * diff + + return pred diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/perturbed_attention_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/perturbed_attention_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..904d319ec3bbcbc17ff55982bacf702fe4b73bc5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/perturbed_attention_guidance.py @@ -0,0 +1,289 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING, Any + +import torch + +from ..configuration_utils import register_to_config +from ..hooks import HookRegistry, LayerSkipConfig +from ..hooks.layer_skip import _apply_layer_skip_hook +from ..utils import get_logger +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +logger = get_logger(__name__) # pylint: disable=invalid-name + + +class PerturbedAttentionGuidance(BaseGuidance): + """ + Perturbed Attention Guidance (PAG): https://huggingface.co/papers/2403.17377 + + The intution behind PAG can be thought of as moving the CFG predicted distribution estimates further away from + worse versions of the conditional distribution estimates. PAG was one of the first techniques to introduce the idea + of using a worse version of the trained model for better guiding itself in the denoising process. It perturbs the + attention scores of the latent stream by replacing the score matrix with an identity matrix for selectively chosen + layers. + + Additional reading: + - [Guiding a Diffusion Model with a Bad Version of Itself](https://huggingface.co/papers/2406.02507) + + PAG is implemented with similar implementation to SkipLayerGuidance due to overlap in the configuration parameters + and implementation details. + + Args: + guidance_scale (`float`, defaults to `7.5`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + perturbed_guidance_scale (`float`, defaults to `2.8`): + The scale parameter for perturbed attention guidance. + perturbed_guidance_start (`float`, defaults to `0.01`): + The fraction of the total number of denoising steps after which perturbed attention guidance starts. + perturbed_guidance_stop (`float`, defaults to `0.2`): + The fraction of the total number of denoising steps after which perturbed attention guidance stops. + perturbed_guidance_layers (`int` or `list[int]`, *optional*): + The layer indices to apply perturbed attention guidance to. Can be a single integer or a list of integers. + If not provided, `perturbed_guidance_config` must be provided. + perturbed_guidance_config (`LayerSkipConfig` or `list[LayerSkipConfig]`, *optional*): + The configuration for the perturbed attention guidance. Can be a single `LayerSkipConfig` or a list of + `LayerSkipConfig`. If not provided, `perturbed_guidance_layers` must be provided. + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.01`): + The fraction of the total number of denoising steps after which guidance starts. + stop (`float`, defaults to `0.2`): + The fraction of the total number of denoising steps after which guidance stops. + """ + + # NOTE: The current implementation does not account for joint latent conditioning (text + image/video tokens in + # the same latent stream). It assumes the entire latent is a single stream of visual tokens. It would be very + # complex to support joint latent conditioning in a model-agnostic manner without specializing the implementation + # for each model architecture. + + _input_predictions = ["pred_cond", "pred_uncond", "pred_cond_skip"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 7.5, + perturbed_guidance_scale: float = 2.8, + perturbed_guidance_start: float = 0.01, + perturbed_guidance_stop: float = 0.2, + perturbed_guidance_layers: int | list[int] | None = None, + perturbed_guidance_config: LayerSkipConfig | list[LayerSkipConfig] | dict[str, Any] = None, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.skip_layer_guidance_scale = perturbed_guidance_scale + self.skip_layer_guidance_start = perturbed_guidance_start + self.skip_layer_guidance_stop = perturbed_guidance_stop + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + + if perturbed_guidance_config is None: + if perturbed_guidance_layers is None: + raise ValueError( + "`perturbed_guidance_layers` must be provided if `perturbed_guidance_config` is not specified." + ) + perturbed_guidance_config = LayerSkipConfig( + indices=perturbed_guidance_layers, + fqn="auto", + skip_attention=False, + skip_attention_scores=True, + skip_ff=False, + ) + else: + if perturbed_guidance_layers is not None: + raise ValueError( + "`perturbed_guidance_layers` should not be provided if `perturbed_guidance_config` is specified." + ) + + if isinstance(perturbed_guidance_config, dict): + perturbed_guidance_config = LayerSkipConfig.from_dict(perturbed_guidance_config) + + if isinstance(perturbed_guidance_config, LayerSkipConfig): + perturbed_guidance_config = [perturbed_guidance_config] + + if not isinstance(perturbed_guidance_config, list): + raise ValueError( + "`perturbed_guidance_config` must be a `LayerSkipConfig`, a list of `LayerSkipConfig`, or a dict that can be converted to a `LayerSkipConfig`." + ) + elif isinstance(next(iter(perturbed_guidance_config), None), dict): + perturbed_guidance_config = [LayerSkipConfig.from_dict(config) for config in perturbed_guidance_config] + + for config in perturbed_guidance_config: + if config.skip_attention or not config.skip_attention_scores or config.skip_ff: + logger.warning( + "Perturbed Attention Guidance is designed to perturb attention scores, so `skip_attention` should be False, `skip_attention_scores` should be True, and `skip_ff` should be False. " + "Please check your configuration. Modifying the config to match the expected values." + ) + config.skip_attention = False + config.skip_attention_scores = True + config.skip_ff = False + + self.skip_layer_config = perturbed_guidance_config + self._skip_layer_hook_names = [f"SkipLayerGuidance_{i}" for i in range(len(self.skip_layer_config))] + + # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance.prepare_models + def prepare_models(self, denoiser: torch.nn.Module) -> None: + self._count_prepared += 1 + if self._is_slg_enabled() and self.is_conditional and self._count_prepared > 1: + for name, config in zip(self._skip_layer_hook_names, self.skip_layer_config): + _apply_layer_skip_hook(denoiser, config, name=name) + + # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance.cleanup_models + def cleanup_models(self, denoiser: torch.nn.Module) -> None: + if self._is_slg_enabled() and self.is_conditional and self._count_prepared > 1: + registry = HookRegistry.check_if_exists_or_initialize(denoiser) + # Remove the hooks after inference + for hook_name in self._skip_layer_hook_names: + registry.remove_hook(hook_name, recurse=True) + + # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance.prepare_inputs + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + if self.num_conditions == 1: + tuple_indices = [0] + input_predictions = ["pred_cond"] + elif self.num_conditions == 2: + tuple_indices = [0, 1] + input_predictions = ( + ["pred_cond", "pred_uncond"] if self._is_cfg_enabled() else ["pred_cond", "pred_cond_skip"] + ) + else: + tuple_indices = [0, 1, 0] + input_predictions = ["pred_cond", "pred_uncond", "pred_cond_skip"] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + if self.num_conditions == 1: + tuple_indices = [0] + input_predictions = ["pred_cond"] + elif self.num_conditions == 2: + tuple_indices = [0, 1] + input_predictions = ( + ["pred_cond", "pred_uncond"] if self._is_cfg_enabled() else ["pred_cond", "pred_cond_skip"] + ) + else: + tuple_indices = [0, 1, 0] + input_predictions = ["pred_cond", "pred_uncond", "pred_cond_skip"] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance.forward + def forward( + self, + pred_cond: torch.Tensor, + pred_uncond: torch.Tensor | None = None, + pred_cond_skip: torch.Tensor | None = None, + ) -> GuiderOutput: + pred = None + + if not self._is_cfg_enabled() and not self._is_slg_enabled(): + pred = pred_cond + elif not self._is_cfg_enabled(): + shift = pred_cond - pred_cond_skip + pred = pred_cond if self.use_original_formulation else pred_cond_skip + pred = pred + self.skip_layer_guidance_scale * shift + elif not self._is_slg_enabled(): + shift = pred_cond - pred_uncond + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + else: + shift = pred_cond - pred_uncond + shift_skip = pred_cond - pred_cond_skip + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + self.skip_layer_guidance_scale * shift_skip + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance.is_conditional + def is_conditional(self) -> bool: + return self._count_prepared == 1 or self._count_prepared == 3 + + @property + # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance.num_conditions + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_cfg_enabled(): + num_conditions += 1 + if self._is_slg_enabled(): + num_conditions += 1 + return num_conditions + + # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance._is_cfg_enabled + def _is_cfg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close + + # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance._is_slg_enabled + def _is_slg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self.skip_layer_guidance_start * self._num_inference_steps) + skip_stop_step = int(self.skip_layer_guidance_stop * self._num_inference_steps) + is_within_range = skip_start_step < self._step < skip_stop_step + + is_zero = math.isclose(self.skip_layer_guidance_scale, 0.0) + + return is_within_range and not is_zero diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/skip_layer_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/skip_layer_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..cb7e85e179d2bd4d96af5bd9325a276141383fd8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/skip_layer_guidance.py @@ -0,0 +1,280 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING, Any + +import torch + +from ..configuration_utils import register_to_config +from ..hooks import HookRegistry, LayerSkipConfig +from ..hooks.layer_skip import _apply_layer_skip_hook +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class SkipLayerGuidance(BaseGuidance): + """ + Skip Layer Guidance (SLG): https://github.com/Stability-AI/sd3.5 + + Spatio-Temporal Guidance (STG): https://huggingface.co/papers/2411.18664 + + SLG was introduced by StabilityAI for improving structure and anotomy coherence in generated images. It works by + skipping the forward pass of specified transformer blocks during the denoising process on an additional conditional + batch of data, apart from the conditional and unconditional batches already used in CFG + ([~guiders.classifier_free_guidance.ClassifierFreeGuidance]), and then scaling and shifting the CFG predictions + based on the difference between conditional without skipping and conditional with skipping predictions. + + The intution behind SLG can be thought of as moving the CFG predicted distribution estimates further away from + worse versions of the conditional distribution estimates (because skipping layers is equivalent to using a worse + version of the model for the conditional prediction). + + STG is an improvement and follow-up work combining ideas from SLG, PAG and similar techniques for improving + generation quality in video diffusion models. + + Additional reading: + - [Guiding a Diffusion Model with a Bad Version of Itself](https://huggingface.co/papers/2406.02507) + + The values for `skip_layer_guidance_scale`, `skip_layer_guidance_start`, and `skip_layer_guidance_stop` are + defaulted to the recommendations by StabilityAI for Stable Diffusion 3.5 Medium. + + Args: + guidance_scale (`float`, defaults to `7.5`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + skip_layer_guidance_scale (`float`, defaults to `2.8`): + The scale parameter for skip layer guidance. Anatomy and structure coherence may improve with higher + values, but it may also lead to overexposure and saturation. + skip_layer_guidance_start (`float`, defaults to `0.01`): + The fraction of the total number of denoising steps after which skip layer guidance starts. + skip_layer_guidance_stop (`float`, defaults to `0.2`): + The fraction of the total number of denoising steps after which skip layer guidance stops. + skip_layer_guidance_layers (`int` or `list[int]`, *optional*): + The layer indices to apply skip layer guidance to. Can be a single integer or a list of integers. If not + provided, `skip_layer_config` must be provided. The recommended values are `[7, 8, 9]` for Stable Diffusion + 3.5 Medium. + skip_layer_config (`LayerSkipConfig` or `list[LayerSkipConfig]`, *optional*): + The configuration for the skip layer guidance. Can be a single `LayerSkipConfig` or a list of + `LayerSkipConfig`. If not provided, `skip_layer_guidance_layers` must be provided. + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.01`): + The fraction of the total number of denoising steps after which guidance starts. + stop (`float`, defaults to `0.2`): + The fraction of the total number of denoising steps after which guidance stops. + """ + + _input_predictions = ["pred_cond", "pred_uncond", "pred_cond_skip"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 7.5, + skip_layer_guidance_scale: float = 2.8, + skip_layer_guidance_start: float = 0.01, + skip_layer_guidance_stop: float = 0.2, + skip_layer_guidance_layers: int | list[int] | None = None, + skip_layer_config: LayerSkipConfig | list[LayerSkipConfig] | dict[str, Any] = None, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.skip_layer_guidance_scale = skip_layer_guidance_scale + self.skip_layer_guidance_start = skip_layer_guidance_start + self.skip_layer_guidance_stop = skip_layer_guidance_stop + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + + if not (0.0 <= skip_layer_guidance_start < 1.0): + raise ValueError( + f"Expected `skip_layer_guidance_start` to be between 0.0 and 1.0, but got {skip_layer_guidance_start}." + ) + if not (skip_layer_guidance_start <= skip_layer_guidance_stop <= 1.0): + raise ValueError( + f"Expected `skip_layer_guidance_stop` to be between 0.0 and 1.0, but got {skip_layer_guidance_stop}." + ) + + if skip_layer_guidance_layers is None and skip_layer_config is None: + raise ValueError( + "Either `skip_layer_guidance_layers` or `skip_layer_config` must be provided to enable Skip Layer Guidance." + ) + if skip_layer_guidance_layers is not None and skip_layer_config is not None: + raise ValueError("Only one of `skip_layer_guidance_layers` or `skip_layer_config` can be provided.") + + if skip_layer_guidance_layers is not None: + if isinstance(skip_layer_guidance_layers, int): + skip_layer_guidance_layers = [skip_layer_guidance_layers] + if not isinstance(skip_layer_guidance_layers, list): + raise ValueError( + f"Expected `skip_layer_guidance_layers` to be an int or a list of ints, but got {type(skip_layer_guidance_layers)}." + ) + skip_layer_config = [LayerSkipConfig(layer, fqn="auto") for layer in skip_layer_guidance_layers] + + if isinstance(skip_layer_config, dict): + skip_layer_config = LayerSkipConfig.from_dict(skip_layer_config) + + if isinstance(skip_layer_config, LayerSkipConfig): + skip_layer_config = [skip_layer_config] + + if not isinstance(skip_layer_config, list): + raise ValueError( + f"Expected `skip_layer_config` to be a LayerSkipConfig or a list of LayerSkipConfig, but got {type(skip_layer_config)}." + ) + elif isinstance(next(iter(skip_layer_config), None), dict): + skip_layer_config = [LayerSkipConfig.from_dict(config) for config in skip_layer_config] + + self.skip_layer_config = skip_layer_config + self._skip_layer_hook_names = [f"SkipLayerGuidance_{i}" for i in range(len(self.skip_layer_config))] + + def prepare_models(self, denoiser: torch.nn.Module) -> None: + self._count_prepared += 1 + if self._is_slg_enabled() and self.is_conditional and self._count_prepared > 1: + for name, config in zip(self._skip_layer_hook_names, self.skip_layer_config): + _apply_layer_skip_hook(denoiser, config, name=name) + + def cleanup_models(self, denoiser: torch.nn.Module) -> None: + if self._is_slg_enabled() and self.is_conditional and self._count_prepared > 1: + registry = HookRegistry.check_if_exists_or_initialize(denoiser) + # Remove the hooks after inference + for hook_name in self._skip_layer_hook_names: + registry.remove_hook(hook_name, recurse=True) + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + if self.num_conditions == 1: + tuple_indices = [0] + input_predictions = ["pred_cond"] + elif self.num_conditions == 2: + tuple_indices = [0, 1] + input_predictions = ( + ["pred_cond", "pred_uncond"] if self._is_cfg_enabled() else ["pred_cond", "pred_cond_skip"] + ) + else: + tuple_indices = [0, 1, 0] + input_predictions = ["pred_cond", "pred_uncond", "pred_cond_skip"] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + if self.num_conditions == 1: + tuple_indices = [0] + input_predictions = ["pred_cond"] + elif self.num_conditions == 2: + tuple_indices = [0, 1] + input_predictions = ( + ["pred_cond", "pred_uncond"] if self._is_cfg_enabled() else ["pred_cond", "pred_cond_skip"] + ) + else: + tuple_indices = [0, 1, 0] + input_predictions = ["pred_cond", "pred_uncond", "pred_cond_skip"] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward( + self, + pred_cond: torch.Tensor, + pred_uncond: torch.Tensor | None = None, + pred_cond_skip: torch.Tensor | None = None, + ) -> GuiderOutput: + pred = None + + if not self._is_cfg_enabled() and not self._is_slg_enabled(): + pred = pred_cond + elif not self._is_cfg_enabled(): + shift = pred_cond - pred_cond_skip + pred = pred_cond if self.use_original_formulation else pred_cond_skip + pred = pred + self.skip_layer_guidance_scale * shift + elif not self._is_slg_enabled(): + shift = pred_cond - pred_uncond + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + else: + shift = pred_cond - pred_uncond + shift_skip = pred_cond - pred_cond_skip + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + self.skip_layer_guidance_scale * shift_skip + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 or self._count_prepared == 3 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_cfg_enabled(): + num_conditions += 1 + if self._is_slg_enabled(): + num_conditions += 1 + return num_conditions + + def _is_cfg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close + + def _is_slg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self.skip_layer_guidance_start * self._num_inference_steps) + skip_stop_step = int(self.skip_layer_guidance_stop * self._num_inference_steps) + is_within_range = skip_start_step < self._step < skip_stop_step + + is_zero = math.isclose(self.skip_layer_guidance_scale, 0.0) + + return is_within_range and not is_zero diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/smoothed_energy_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/smoothed_energy_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..4767607421de58f2e26cb27c2bcb314e05795834 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/smoothed_energy_guidance.py @@ -0,0 +1,269 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import torch + +from ..configuration_utils import register_to_config +from ..hooks import HookRegistry +from ..hooks.smoothed_energy_guidance_utils import SmoothedEnergyGuidanceConfig, _apply_smoothed_energy_guidance_hook +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class SmoothedEnergyGuidance(BaseGuidance): + """ + Smoothed Energy Guidance (SEG): https://huggingface.co/papers/2408.00760 + + SEG is only supported as an experimental prototype feature for now, so the implementation may be modified in the + future without warning or guarantee of reproducibility. This implementation assumes: + - Generated images are square (height == width) + - The model does not combine different modalities together (e.g., text and image latent streams are not combined + together such as Flux) + + Args: + guidance_scale (`float`, defaults to `7.5`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + seg_guidance_scale (`float`, defaults to `3.0`): + The scale parameter for smoothed energy guidance. Anatomy and structure coherence may improve with higher + values, but it may also lead to overexposure and saturation. + seg_blur_sigma (`float`, defaults to `9999999.0`): + The amount by which we blur the attention weights. Setting this value greater than 9999.0 results in + infinite blur, which means uniform queries. Controlling it exponentially is empirically effective. + seg_blur_threshold_inf (`float`, defaults to `9999.0`): + The threshold above which the blur is considered infinite. + seg_guidance_start (`float`, defaults to `0.0`): + The fraction of the total number of denoising steps after which smoothed energy guidance starts. + seg_guidance_stop (`float`, defaults to `1.0`): + The fraction of the total number of denoising steps after which smoothed energy guidance stops. + seg_guidance_layers (`int` or `list[int]`, *optional*): + The layer indices to apply smoothed energy guidance to. Can be a single integer or a list of integers. If + not provided, `seg_guidance_config` must be provided. The recommended values are `[7, 8, 9]` for Stable + Diffusion 3.5 Medium. + seg_guidance_config (`SmoothedEnergyGuidanceConfig` or `list[SmoothedEnergyGuidanceConfig]`, *optional*): + The configuration for the smoothed energy layer guidance. Can be a single `SmoothedEnergyGuidanceConfig` or + a list of `SmoothedEnergyGuidanceConfig`. If not provided, `seg_guidance_layers` must be provided. + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.01`): + The fraction of the total number of denoising steps after which guidance starts. + stop (`float`, defaults to `0.2`): + The fraction of the total number of denoising steps after which guidance stops. + """ + + _input_predictions = ["pred_cond", "pred_uncond", "pred_cond_seg"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 7.5, + seg_guidance_scale: float = 2.8, + seg_blur_sigma: float = 9999999.0, + seg_blur_threshold_inf: float = 9999.0, + seg_guidance_start: float = 0.0, + seg_guidance_stop: float = 1.0, + seg_guidance_layers: int | list[int] | None = None, + seg_guidance_config: SmoothedEnergyGuidanceConfig | list[SmoothedEnergyGuidanceConfig] = None, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.seg_guidance_scale = seg_guidance_scale + self.seg_blur_sigma = seg_blur_sigma + self.seg_blur_threshold_inf = seg_blur_threshold_inf + self.seg_guidance_start = seg_guidance_start + self.seg_guidance_stop = seg_guidance_stop + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + + if not (0.0 <= seg_guidance_start < 1.0): + raise ValueError(f"Expected `seg_guidance_start` to be between 0.0 and 1.0, but got {seg_guidance_start}.") + if not (seg_guidance_start <= seg_guidance_stop <= 1.0): + raise ValueError(f"Expected `seg_guidance_stop` to be between 0.0 and 1.0, but got {seg_guidance_stop}.") + + if seg_guidance_layers is None and seg_guidance_config is None: + raise ValueError( + "Either `seg_guidance_layers` or `seg_guidance_config` must be provided to enable Smoothed Energy Guidance." + ) + if seg_guidance_layers is not None and seg_guidance_config is not None: + raise ValueError("Only one of `seg_guidance_layers` or `seg_guidance_config` can be provided.") + + if seg_guidance_layers is not None: + if isinstance(seg_guidance_layers, int): + seg_guidance_layers = [seg_guidance_layers] + if not isinstance(seg_guidance_layers, list): + raise ValueError( + f"Expected `seg_guidance_layers` to be an int or a list of ints, but got {type(seg_guidance_layers)}." + ) + seg_guidance_config = [SmoothedEnergyGuidanceConfig(layer, fqn="auto") for layer in seg_guidance_layers] + + if isinstance(seg_guidance_config, dict): + seg_guidance_config = SmoothedEnergyGuidanceConfig.from_dict(seg_guidance_config) + + if isinstance(seg_guidance_config, SmoothedEnergyGuidanceConfig): + seg_guidance_config = [seg_guidance_config] + + if not isinstance(seg_guidance_config, list): + raise ValueError( + f"Expected `seg_guidance_config` to be a SmoothedEnergyGuidanceConfig or a list of SmoothedEnergyGuidanceConfig, but got {type(seg_guidance_config)}." + ) + elif isinstance(next(iter(seg_guidance_config), None), dict): + seg_guidance_config = [SmoothedEnergyGuidanceConfig.from_dict(config) for config in seg_guidance_config] + + self.seg_guidance_config = seg_guidance_config + self._seg_layer_hook_names = [f"SmoothedEnergyGuidance_{i}" for i in range(len(self.seg_guidance_config))] + + def prepare_models(self, denoiser: torch.nn.Module) -> None: + if self._is_seg_enabled() and self.is_conditional and self._count_prepared > 1: + for name, config in zip(self._seg_layer_hook_names, self.seg_guidance_config): + _apply_smoothed_energy_guidance_hook(denoiser, config, self.seg_blur_sigma, name=name) + + def cleanup_models(self, denoiser: torch.nn.Module): + if self._is_seg_enabled() and self.is_conditional and self._count_prepared > 1: + registry = HookRegistry.check_if_exists_or_initialize(denoiser) + # Remove the hooks after inference + for hook_name in self._seg_layer_hook_names: + registry.remove_hook(hook_name, recurse=True) + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + if self.num_conditions == 1: + tuple_indices = [0] + input_predictions = ["pred_cond"] + elif self.num_conditions == 2: + tuple_indices = [0, 1] + input_predictions = ( + ["pred_cond", "pred_uncond"] if self._is_cfg_enabled() else ["pred_cond", "pred_cond_seg"] + ) + else: + tuple_indices = [0, 1, 0] + input_predictions = ["pred_cond", "pred_uncond", "pred_cond_seg"] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + if self.num_conditions == 1: + tuple_indices = [0] + input_predictions = ["pred_cond"] + elif self.num_conditions == 2: + tuple_indices = [0, 1] + input_predictions = ( + ["pred_cond", "pred_uncond"] if self._is_cfg_enabled() else ["pred_cond", "pred_cond_seg"] + ) + else: + tuple_indices = [0, 1, 0] + input_predictions = ["pred_cond", "pred_uncond", "pred_cond_seg"] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward( + self, + pred_cond: torch.Tensor, + pred_uncond: torch.Tensor | None = None, + pred_cond_seg: torch.Tensor | None = None, + ) -> GuiderOutput: + pred = None + + if not self._is_cfg_enabled() and not self._is_seg_enabled(): + pred = pred_cond + elif not self._is_cfg_enabled(): + shift = pred_cond - pred_cond_seg + pred = pred_cond if self.use_original_formulation else pred_cond_seg + pred = pred + self.seg_guidance_scale * shift + elif not self._is_seg_enabled(): + shift = pred_cond - pred_uncond + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + else: + shift = pred_cond - pred_uncond + shift_seg = pred_cond - pred_cond_seg + pred = pred_cond if self.use_original_formulation else pred_uncond + pred = pred + self.guidance_scale * shift + self.seg_guidance_scale * shift_seg + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._count_prepared == 1 or self._count_prepared == 3 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_cfg_enabled(): + num_conditions += 1 + if self._is_seg_enabled(): + num_conditions += 1 + return num_conditions + + def _is_cfg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close + + def _is_seg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self.seg_guidance_start * self._num_inference_steps) + skip_stop_step = int(self.seg_guidance_stop * self._num_inference_steps) + is_within_range = skip_start_step < self._step < skip_stop_step + + is_zero = math.isclose(self.seg_guidance_scale, 0.0) + + return is_within_range and not is_zero diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/tangential_classifier_free_guidance.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/tangential_classifier_free_guidance.py new file mode 100644 index 0000000000000000000000000000000000000000..c8911f4a69d91c8b3cfd1bd14e7b690b0100758c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/guiders/tangential_classifier_free_guidance.py @@ -0,0 +1,151 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import torch + +from ..configuration_utils import register_to_config +from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg + + +if TYPE_CHECKING: + from ..modular_pipelines.modular_pipeline import BlockState + + +class TangentialClassifierFreeGuidance(BaseGuidance): + """ + Tangential Classifier Free Guidance (TCFG): https://huggingface.co/papers/2503.18137 + + Args: + guidance_scale (`float`, defaults to `7.5`): + The scale parameter for classifier-free guidance. Higher values result in stronger conditioning on the text + prompt, while lower values allow for more freedom in generation. Higher values may lead to saturation and + deterioration of image quality. + guidance_rescale (`float`, defaults to `0.0`): + The rescale factor applied to the noise predictions. This is used to improve image quality and fix + overexposure. Based on Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891). + use_original_formulation (`bool`, defaults to `False`): + Whether to use the original formulation of classifier-free guidance as proposed in the paper. By default, + we use the diffusers-native implementation that has been in the codebase for a long time. See + [~guiders.classifier_free_guidance.ClassifierFreeGuidance] for more details. + start (`float`, defaults to `0.0`): + The fraction of the total number of denoising steps after which guidance starts. + stop (`float`, defaults to `1.0`): + The fraction of the total number of denoising steps after which guidance stops. + """ + + _input_predictions = ["pred_cond", "pred_uncond"] + + @register_to_config + def __init__( + self, + guidance_scale: float = 7.5, + guidance_rescale: float = 0.0, + use_original_formulation: bool = False, + start: float = 0.0, + stop: float = 1.0, + enabled: bool = True, + ): + super().__init__(start, stop, enabled) + + self.guidance_scale = guidance_scale + self.guidance_rescale = guidance_rescale + self.use_original_formulation = use_original_formulation + + def prepare_inputs(self, data: dict[str, tuple[torch.Tensor, torch.Tensor]]) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch(data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def prepare_inputs_from_block_state( + self, data: "BlockState", input_fields: dict[str, str | tuple[str, str]] + ) -> list["BlockState"]: + tuple_indices = [0] if self.num_conditions == 1 else [0, 1] + data_batches = [] + for tuple_idx, input_prediction in zip(tuple_indices, self._input_predictions): + data_batch = self._prepare_batch_from_block_state(input_fields, data, tuple_idx, input_prediction) + data_batches.append(data_batch) + return data_batches + + def forward(self, pred_cond: torch.Tensor, pred_uncond: torch.Tensor | None = None) -> GuiderOutput: + pred = None + + if not self._is_tcfg_enabled(): + pred = pred_cond + else: + pred = normalized_guidance(pred_cond, pred_uncond, self.guidance_scale, self.use_original_formulation) + + if self.guidance_rescale > 0.0: + pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale) + + return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond) + + @property + def is_conditional(self) -> bool: + return self._num_outputs_prepared == 1 + + @property + def num_conditions(self) -> int: + num_conditions = 1 + if self._is_tcfg_enabled(): + num_conditions += 1 + return num_conditions + + def _is_tcfg_enabled(self) -> bool: + if not self._enabled: + return False + + is_within_range = True + if self._num_inference_steps is not None: + skip_start_step = int(self._start * self._num_inference_steps) + skip_stop_step = int(self._stop * self._num_inference_steps) + is_within_range = skip_start_step <= self._step < skip_stop_step + + is_close = False + if self.use_original_formulation: + is_close = math.isclose(self.guidance_scale, 0.0) + else: + is_close = math.isclose(self.guidance_scale, 1.0) + + return is_within_range and not is_close + + +def normalized_guidance( + pred_cond: torch.Tensor, pred_uncond: torch.Tensor, guidance_scale: float, use_original_formulation: bool = False +) -> torch.Tensor: + cond_dtype = pred_cond.dtype + preds = torch.stack([pred_cond, pred_uncond], dim=1).float() + preds = preds.flatten(2) + U, S, Vh = torch.linalg.svd(preds, full_matrices=False) + Vh_modified = Vh.clone() + Vh_modified[:, 1] = 0 + + uncond_flat = pred_uncond.reshape(pred_uncond.size(0), 1, -1).float() + x_Vh = torch.matmul(uncond_flat, Vh.transpose(-2, -1)) + x_Vh_V = torch.matmul(x_Vh, Vh_modified) + pred_uncond = x_Vh_V.reshape(pred_uncond.shape).to(cond_dtype) + + pred = pred_cond if use_original_formulation else pred_uncond + shift = pred_cond - pred_uncond + pred = pred + guidance_scale * shift + + return pred diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..23c8bc92b2f1ba663017c3f83dae2cfdc256d81a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..utils import is_torch_available + + +if is_torch_available(): + from .context_parallel import apply_context_parallel + from .faster_cache import FasterCacheConfig, apply_faster_cache + from .first_block_cache import FirstBlockCacheConfig, apply_first_block_cache + from .group_offloading import apply_group_offloading + from .hooks import HookRegistry, ModelHook + from .layer_skip import LayerSkipConfig, apply_layer_skip + from .layerwise_casting import apply_layerwise_casting, apply_layerwise_casting_hook + from .mag_cache import MagCacheConfig, apply_mag_cache + from .pyramid_attention_broadcast import PyramidAttentionBroadcastConfig, apply_pyramid_attention_broadcast + from .smoothed_energy_guidance_utils import SmoothedEnergyGuidanceConfig + from .taylorseer_cache import TaylorSeerCacheConfig, apply_taylorseer_cache diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ecd5fb4756f18f73c70bf3526420958f9e22ea22 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/_common.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/_common.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..444e8a050a276f2244010faae2389175265216f8 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/_common.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/_helpers.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/_helpers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4ce7d90fbd9244f6a1970d64d3ebaf232def0ec Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/_helpers.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/context_parallel.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/context_parallel.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c737dff49bd01e307c8769f45558c50ef505bbba Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/context_parallel.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/faster_cache.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/faster_cache.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29a0b59274c34c2d3dbc93bef0d7dfe5f743c5c5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/faster_cache.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/first_block_cache.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/first_block_cache.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5415094e40fc0ba0d76e0d6f42c01a9e9abf677 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/first_block_cache.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/group_offloading.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/group_offloading.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d26fc6b96287eca63efcd0a93b195ca2e2ff072 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/group_offloading.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/hooks.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/hooks.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95df77c091f82871842f71e5add3444c317e9512 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/hooks.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/layer_skip.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/layer_skip.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e3c099acc4b3a003046e8eb556b35af296676755 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/layer_skip.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/layerwise_casting.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/layerwise_casting.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b906306b01c4e76d1e8dea91d1038c8a37df5cd Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/layerwise_casting.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/mag_cache.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/mag_cache.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..431d544dd442031a3b470f5dfa84fa58da8ed7bd Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/mag_cache.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/pyramid_attention_broadcast.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/pyramid_attention_broadcast.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..881cebf05889d8326e4b4b8110f5bb1a97c96dd0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/pyramid_attention_broadcast.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/smoothed_energy_guidance_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/smoothed_energy_guidance_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c029ff6e27020a5e6a6d24223f1db70fe1ee820b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/smoothed_energy_guidance_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/taylorseer_cache.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/taylorseer_cache.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f02ddf32d25c127362de1ab579eb024da52f740b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/taylorseer_cache.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53bdfcebd5d1840a95e0420176012eb2c3018509 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/__pycache__/utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/_common.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/_common.py new file mode 100644 index 0000000000000000000000000000000000000000..fa7ab770da6d3d92573654e92095cec7be31817a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/_common.py @@ -0,0 +1,61 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from ..models.attention import AttentionModuleMixin, FeedForward, LuminaFeedForward +from ..models.attention_processor import Attention, MochiAttention + + +_ATTENTION_CLASSES = (Attention, MochiAttention, AttentionModuleMixin) +_FEEDFORWARD_CLASSES = (FeedForward, LuminaFeedForward) + +_SPATIAL_TRANSFORMER_BLOCK_IDENTIFIERS = ( + "blocks", + "transformer_blocks", + "single_transformer_blocks", + "layers", + "visual_transformer_blocks", +) +_TEMPORAL_TRANSFORMER_BLOCK_IDENTIFIERS = ("temporal_transformer_blocks",) +_CROSS_TRANSFORMER_BLOCK_IDENTIFIERS = ("blocks", "transformer_blocks", "layers") + +_ALL_TRANSFORMER_BLOCK_IDENTIFIERS = tuple( + { + *_SPATIAL_TRANSFORMER_BLOCK_IDENTIFIERS, + *_TEMPORAL_TRANSFORMER_BLOCK_IDENTIFIERS, + *_CROSS_TRANSFORMER_BLOCK_IDENTIFIERS, + } +) + +# Layers supported for group offloading and layerwise casting +_GO_LC_SUPPORTED_PYTORCH_LAYERS = ( + torch.nn.Conv1d, + torch.nn.Conv2d, + torch.nn.Conv3d, + torch.nn.ConvTranspose1d, + torch.nn.ConvTranspose2d, + torch.nn.ConvTranspose3d, + torch.nn.Linear, + torch.nn.Embedding, + # TODO(aryan): look into torch.nn.LayerNorm, torch.nn.GroupNorm later, seems to be causing some issues with CogVideoX + # because of double invocation of the same norm layer in CogVideoXLayerNorm +) + + +def _get_submodule_from_fqn(module: torch.nn.Module, fqn: str) -> torch.nn.Module | None: + for submodule_name, submodule in module.named_modules(): + if submodule_name == fqn: + return submodule + return None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/_helpers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/_helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..0267dd481a897912b0a4c8f15e431f37804b55ec --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/_helpers.py @@ -0,0 +1,381 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from dataclasses import dataclass +from typing import Any, Callable, Type + + +@dataclass +class AttentionProcessorMetadata: + skip_processor_output_fn: Callable[[Any], Any] + + +@dataclass +class TransformerBlockMetadata: + return_hidden_states_index: int = None + return_encoder_hidden_states_index: int = None + hidden_states_argument_name: str = "hidden_states" + + _cls: Type = None + _cached_parameter_indices: dict[str, int] = None + + def _get_parameter_from_args_kwargs(self, identifier: str, args=(), kwargs=None): + kwargs = kwargs or {} + if identifier in kwargs: + return kwargs[identifier] + if self._cached_parameter_indices is not None: + return args[self._cached_parameter_indices[identifier]] + if self._cls is None: + raise ValueError("Model class is not set for metadata.") + parameters = list(inspect.signature(self._cls.forward).parameters.keys()) + parameters = parameters[1:] # skip `self` + self._cached_parameter_indices = {param: i for i, param in enumerate(parameters)} + if identifier not in self._cached_parameter_indices: + raise ValueError(f"Parameter '{identifier}' not found in function signature but was requested.") + index = self._cached_parameter_indices[identifier] + if index >= len(args): + raise ValueError(f"Expected {index} arguments but got {len(args)}.") + return args[index] + + +class AttentionProcessorRegistry: + _registry = {} + # TODO(aryan): this is only required for the time being because we need to do the registrations + # for classes. If we do it eagerly, i.e. call the functions in global scope, we will get circular + # import errors because of the models imported in this file. + _is_registered = False + + @classmethod + def register(cls, model_class: Type, metadata: AttentionProcessorMetadata): + cls._register() + cls._registry[model_class] = metadata + + @classmethod + def get(cls, model_class: Type) -> AttentionProcessorMetadata: + cls._register() + if model_class not in cls._registry: + raise ValueError(f"Model class {model_class} not registered.") + return cls._registry[model_class] + + @classmethod + def _register(cls): + if cls._is_registered: + return + cls._is_registered = True + _register_attention_processors_metadata() + + +class TransformerBlockRegistry: + _registry = {} + # TODO(aryan): this is only required for the time being because we need to do the registrations + # for classes. If we do it eagerly, i.e. call the functions in global scope, we will get circular + # import errors because of the models imported in this file. + _is_registered = False + + @classmethod + def register(cls, model_class: Type, metadata: TransformerBlockMetadata): + cls._register() + metadata._cls = model_class + cls._registry[model_class] = metadata + + @classmethod + def get(cls, model_class: Type) -> TransformerBlockMetadata: + cls._register() + if model_class not in cls._registry: + raise ValueError(f"Model class {model_class} not registered.") + return cls._registry[model_class] + + @classmethod + def _register(cls): + if cls._is_registered: + return + cls._is_registered = True + _register_transformer_blocks_metadata() + + +def _register_attention_processors_metadata(): + from ..models.attention_processor import AttnProcessor2_0 + from ..models.transformers.transformer_cogview4 import CogView4AttnProcessor + from ..models.transformers.transformer_flux import FluxAttnProcessor + from ..models.transformers.transformer_hunyuanimage import HunyuanImageAttnProcessor + from ..models.transformers.transformer_qwenimage import QwenDoubleStreamAttnProcessor2_0 + from ..models.transformers.transformer_wan import WanAttnProcessor2_0 + from ..models.transformers.transformer_z_image import ZSingleStreamAttnProcessor + + # AttnProcessor2_0 + AttentionProcessorRegistry.register( + model_class=AttnProcessor2_0, + metadata=AttentionProcessorMetadata( + skip_processor_output_fn=_skip_proc_output_fn_Attention_AttnProcessor2_0, + ), + ) + + # CogView4AttnProcessor + AttentionProcessorRegistry.register( + model_class=CogView4AttnProcessor, + metadata=AttentionProcessorMetadata( + skip_processor_output_fn=_skip_proc_output_fn_Attention_CogView4AttnProcessor, + ), + ) + + # WanAttnProcessor2_0 + AttentionProcessorRegistry.register( + model_class=WanAttnProcessor2_0, + metadata=AttentionProcessorMetadata( + skip_processor_output_fn=_skip_proc_output_fn_Attention_WanAttnProcessor2_0, + ), + ) + + # FluxAttnProcessor + AttentionProcessorRegistry.register( + model_class=FluxAttnProcessor, + metadata=AttentionProcessorMetadata(skip_processor_output_fn=_skip_proc_output_fn_Attention_FluxAttnProcessor), + ) + + # QwenDoubleStreamAttnProcessor2 + AttentionProcessorRegistry.register( + model_class=QwenDoubleStreamAttnProcessor2_0, + metadata=AttentionProcessorMetadata( + skip_processor_output_fn=_skip_proc_output_fn_Attention_QwenDoubleStreamAttnProcessor2_0 + ), + ) + + # HunyuanImageAttnProcessor + AttentionProcessorRegistry.register( + model_class=HunyuanImageAttnProcessor, + metadata=AttentionProcessorMetadata( + skip_processor_output_fn=_skip_proc_output_fn_Attention_HunyuanImageAttnProcessor, + ), + ) + + # ZSingleStreamAttnProcessor + AttentionProcessorRegistry.register( + model_class=ZSingleStreamAttnProcessor, + metadata=AttentionProcessorMetadata( + skip_processor_output_fn=_skip_proc_output_fn_Attention_ZSingleStreamAttnProcessor, + ), + ) + + +def _register_transformer_blocks_metadata(): + from ..models.attention import BasicTransformerBlock, JointTransformerBlock + from ..models.transformers.cogvideox_transformer_3d import CogVideoXBlock + from ..models.transformers.transformer_bria import BriaTransformerBlock + from ..models.transformers.transformer_cogview4 import CogView4TransformerBlock + from ..models.transformers.transformer_flux import FluxSingleTransformerBlock, FluxTransformerBlock + from ..models.transformers.transformer_hunyuan_video import ( + HunyuanVideoSingleTransformerBlock, + HunyuanVideoTokenReplaceSingleTransformerBlock, + HunyuanVideoTokenReplaceTransformerBlock, + HunyuanVideoTransformerBlock, + ) + from ..models.transformers.transformer_hunyuanimage import ( + HunyuanImageSingleTransformerBlock, + HunyuanImageTransformerBlock, + ) + from ..models.transformers.transformer_kandinsky import Kandinsky5TransformerDecoderBlock + from ..models.transformers.transformer_ltx import LTXVideoTransformerBlock + from ..models.transformers.transformer_mochi import MochiTransformerBlock + from ..models.transformers.transformer_qwenimage import QwenImageTransformerBlock + from ..models.transformers.transformer_wan import WanTransformerBlock + from ..models.transformers.transformer_z_image import ZImageTransformerBlock + + # BasicTransformerBlock + TransformerBlockRegistry.register( + model_class=BasicTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=None, + ), + ) + TransformerBlockRegistry.register( + model_class=BriaTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=None, + ), + ) + + # CogVideoX + TransformerBlockRegistry.register( + model_class=CogVideoXBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + + # CogView4 + TransformerBlockRegistry.register( + model_class=CogView4TransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + + # Flux + TransformerBlockRegistry.register( + model_class=FluxTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=1, + return_encoder_hidden_states_index=0, + ), + ) + TransformerBlockRegistry.register( + model_class=FluxSingleTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=1, + return_encoder_hidden_states_index=0, + ), + ) + + # HunyuanVideo + TransformerBlockRegistry.register( + model_class=HunyuanVideoTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + TransformerBlockRegistry.register( + model_class=HunyuanVideoSingleTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + TransformerBlockRegistry.register( + model_class=HunyuanVideoTokenReplaceTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + TransformerBlockRegistry.register( + model_class=HunyuanVideoTokenReplaceSingleTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + + # LTXVideo + TransformerBlockRegistry.register( + model_class=LTXVideoTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=None, + ), + ) + + # Mochi + TransformerBlockRegistry.register( + model_class=MochiTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + + # Wan + TransformerBlockRegistry.register( + model_class=WanTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=None, + ), + ) + + # QwenImage + TransformerBlockRegistry.register( + model_class=QwenImageTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=1, + return_encoder_hidden_states_index=0, + ), + ) + + # HunyuanImage2.1 + TransformerBlockRegistry.register( + model_class=HunyuanImageTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + TransformerBlockRegistry.register( + model_class=HunyuanImageSingleTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=1, + ), + ) + + # ZImage + TransformerBlockRegistry.register( + model_class=ZImageTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=None, + ), + ) + + TransformerBlockRegistry.register( + model_class=JointTransformerBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=1, + return_encoder_hidden_states_index=0, + ), + ) + + # Kandinsky 5.0 (Kandinsky5TransformerDecoderBlock) + TransformerBlockRegistry.register( + model_class=Kandinsky5TransformerDecoderBlock, + metadata=TransformerBlockMetadata( + return_hidden_states_index=0, + return_encoder_hidden_states_index=None, + hidden_states_argument_name="visual_embed", + ), + ) + + +# fmt: off +def _skip_attention___ret___hidden_states(self, *args, **kwargs): + hidden_states = kwargs.get("hidden_states", None) + if hidden_states is None and len(args) > 0: + hidden_states = args[0] + return hidden_states + + +def _skip_attention___ret___hidden_states___encoder_hidden_states(self, *args, **kwargs): + hidden_states = kwargs.get("hidden_states", None) + encoder_hidden_states = kwargs.get("encoder_hidden_states", None) + if hidden_states is None and len(args) > 0: + hidden_states = args[0] + if encoder_hidden_states is None and len(args) > 1: + encoder_hidden_states = args[1] + return hidden_states, encoder_hidden_states + + +_skip_proc_output_fn_Attention_AttnProcessor2_0 = _skip_attention___ret___hidden_states +_skip_proc_output_fn_Attention_CogView4AttnProcessor = _skip_attention___ret___hidden_states___encoder_hidden_states +_skip_proc_output_fn_Attention_WanAttnProcessor2_0 = _skip_attention___ret___hidden_states +# not sure what this is yet. +_skip_proc_output_fn_Attention_FluxAttnProcessor = _skip_attention___ret___hidden_states +_skip_proc_output_fn_Attention_QwenDoubleStreamAttnProcessor2_0 = _skip_attention___ret___hidden_states +_skip_proc_output_fn_Attention_HunyuanImageAttnProcessor = _skip_attention___ret___hidden_states +_skip_proc_output_fn_Attention_ZSingleStreamAttnProcessor = _skip_attention___ret___hidden_states +# fmt: on diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/context_parallel.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/context_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..6130be2b8290716e7cc34154bb6ac616e1857f20 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/context_parallel.py @@ -0,0 +1,383 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +import functools +import inspect +from dataclasses import dataclass +from typing import Type + +import torch +import torch.distributed as dist + + +if torch.distributed.is_available(): + import torch.distributed._functional_collectives as funcol + +from ..models._modeling_parallel import ( + ContextParallelConfig, + ContextParallelInput, + ContextParallelModelPlan, + ContextParallelOutput, + gather_size_by_comm, +) +from ..utils import get_logger +from ..utils.torch_utils import maybe_allow_in_graph, unwrap_module +from .hooks import HookRegistry, ModelHook + + +logger = get_logger(__name__) # pylint: disable=invalid-name + +_CONTEXT_PARALLEL_INPUT_HOOK_TEMPLATE = "cp_input---{}" +_CONTEXT_PARALLEL_OUTPUT_HOOK_TEMPLATE = "cp_output---{}" + + +# TODO(aryan): consolidate with ._helpers.TransformerBlockMetadata +@dataclass +class ModuleForwardMetadata: + cached_parameter_indices: dict[str, int] = None + _cls: Type = None + + def _get_parameter_from_args_kwargs(self, identifier: str, args=(), kwargs=None): + kwargs = kwargs or {} + + if identifier in kwargs: + return kwargs[identifier], True, None + + if self.cached_parameter_indices is not None: + index = self.cached_parameter_indices.get(identifier, None) + if index is None: + raise ValueError(f"Parameter '{identifier}' not found in cached indices.") + return args[index], False, index + + if self._cls is None: + raise ValueError("Model class is not set for metadata.") + + parameters = list(inspect.signature(self._cls.forward).parameters.keys()) + parameters = parameters[1:] # skip `self` + self.cached_parameter_indices = {param: i for i, param in enumerate(parameters)} + + if identifier not in self.cached_parameter_indices: + raise ValueError(f"Parameter '{identifier}' not found in function signature but was requested.") + + index = self.cached_parameter_indices[identifier] + + if index >= len(args): + raise ValueError(f"Expected {index} arguments but got {len(args)}.") + + return args[index], False, index + + +def apply_context_parallel( + module: torch.nn.Module, + parallel_config: ContextParallelConfig, + plan: dict[str, ContextParallelModelPlan], +) -> None: + """Apply context parallel on a model.""" + logger.debug(f"Applying context parallel with CP mesh: {parallel_config._mesh} and plan: {plan}") + + for module_id, cp_model_plan in plan.items(): + submodule = _get_submodule_by_name(module, module_id) + if not isinstance(submodule, list): + submodule = [submodule] + + logger.debug(f"Applying ContextParallelHook to {module_id=} identifying a total of {len(submodule)} modules") + + for m in submodule: + if isinstance(cp_model_plan, dict): + hook = ContextParallelSplitHook(cp_model_plan, parallel_config) + hook_name = _CONTEXT_PARALLEL_INPUT_HOOK_TEMPLATE.format(module_id) + elif isinstance(cp_model_plan, (ContextParallelOutput, list, tuple)): + if isinstance(cp_model_plan, ContextParallelOutput): + cp_model_plan = [cp_model_plan] + if not all(isinstance(x, ContextParallelOutput) for x in cp_model_plan): + raise ValueError(f"Expected all elements of cp_model_plan to be CPOutput, but got {cp_model_plan}") + hook = ContextParallelGatherHook(cp_model_plan, parallel_config) + hook_name = _CONTEXT_PARALLEL_OUTPUT_HOOK_TEMPLATE.format(module_id) + else: + raise ValueError(f"Unsupported context parallel model plan type: {type(cp_model_plan)}") + registry = HookRegistry.check_if_exists_or_initialize(m) + registry.register_hook(hook, hook_name) + + +def remove_context_parallel(module: torch.nn.Module, plan: dict[str, ContextParallelModelPlan]) -> None: + for module_id, cp_model_plan in plan.items(): + submodule = _get_submodule_by_name(module, module_id) + if not isinstance(submodule, list): + submodule = [submodule] + + for m in submodule: + registry = HookRegistry.check_if_exists_or_initialize(m) + if isinstance(cp_model_plan, dict): + hook_name = _CONTEXT_PARALLEL_INPUT_HOOK_TEMPLATE.format(module_id) + elif isinstance(cp_model_plan, (ContextParallelOutput, list, tuple)): + hook_name = _CONTEXT_PARALLEL_OUTPUT_HOOK_TEMPLATE.format(module_id) + else: + raise ValueError(f"Unsupported context parallel model plan type: {type(cp_model_plan)}") + registry.remove_hook(hook_name) + + +class ContextParallelSplitHook(ModelHook): + def __init__(self, metadata: ContextParallelModelPlan, parallel_config: ContextParallelConfig) -> None: + super().__init__() + self.metadata = metadata + self.parallel_config = parallel_config + self.module_forward_metadata = None + + def initialize_hook(self, module): + cls = unwrap_module(module).__class__ + self.module_forward_metadata = ModuleForwardMetadata(_cls=cls) + return module + + def pre_forward(self, module, *args, **kwargs): + args_list = list(args) + + for name, cpm in self.metadata.items(): + if isinstance(cpm, ContextParallelInput) and cpm.split_output: + continue + + # Maybe the parameter was passed as a keyword argument + input_val, is_kwarg, index = self.module_forward_metadata._get_parameter_from_args_kwargs( + name, args_list, kwargs + ) + + if input_val is None: + continue + + # The input_val may be a tensor or list/tuple of tensors. In certain cases, user may specify to shard + # the output instead of input for a particular layer by setting split_output=True + if isinstance(input_val, torch.Tensor): + input_val = self._prepare_cp_input(input_val, cpm) + elif isinstance(input_val, (list, tuple)): + if len(input_val) != len(cpm): + raise ValueError( + f"Expected input model plan to have {len(input_val)} elements, but got {len(cpm)}." + ) + sharded_input_val = [] + for i, x in enumerate(input_val): + if torch.is_tensor(x) and not cpm[i].split_output: + x = self._prepare_cp_input(x, cpm[i]) + sharded_input_val.append(x) + input_val = sharded_input_val + else: + raise ValueError(f"Unsupported input type: {type(input_val)}") + + if is_kwarg: + kwargs[name] = input_val + elif index is not None and index < len(args_list): + args_list[index] = input_val + else: + raise ValueError( + f"An unexpected error occurred while processing the input '{name}'. Please open an " + f"issue at https://github.com/huggingface/diffusers/issues and provide a minimal reproducible " + f"example along with the full stack trace." + ) + + return tuple(args_list), kwargs + + def post_forward(self, module, output): + is_tensor = isinstance(output, torch.Tensor) + is_tensor_list = isinstance(output, (list, tuple)) and all(isinstance(x, torch.Tensor) for x in output) + + if not is_tensor and not is_tensor_list: + raise ValueError(f"Expected output to be a tensor or a list/tuple of tensors, but got {type(output)}.") + + output = [output] if is_tensor else list(output) + for index, cpm in self.metadata.items(): + if not isinstance(cpm, ContextParallelInput) or not cpm.split_output: + continue + if index >= len(output): + raise ValueError(f"Index {index} out of bounds for output of length {len(output)}.") + current_output = output[index] + current_output = self._prepare_cp_input(current_output, cpm) + output[index] = current_output + + return output[0] if is_tensor else tuple(output) + + def _prepare_cp_input(self, x: torch.Tensor, cp_input: ContextParallelInput) -> torch.Tensor: + if cp_input.expected_dims is not None and x.dim() != cp_input.expected_dims: + logger.warning_once( + f"Expected input tensor to have {cp_input.expected_dims} dimensions, but got {x.dim()} dimensions, split will not be applied." + ) + return x + else: + if self.parallel_config.ulysses_anything: + return PartitionAnythingSharder.shard_anything( + x, cp_input.split_dim, self.parallel_config._flattened_mesh + ) + return EquipartitionSharder.shard(x, cp_input.split_dim, self.parallel_config._flattened_mesh) + + +class ContextParallelGatherHook(ModelHook): + def __init__(self, metadata: ContextParallelModelPlan, parallel_config: ContextParallelConfig) -> None: + super().__init__() + self.metadata = metadata + self.parallel_config = parallel_config + + def post_forward(self, module, output): + is_tensor = isinstance(output, torch.Tensor) + + if is_tensor: + output = [output] + elif not (isinstance(output, (list, tuple)) and all(isinstance(x, torch.Tensor) for x in output)): + raise ValueError(f"Expected output to be a tensor or a list/tuple of tensors, but got {type(output)}.") + + output = list(output) + + if len(output) != len(self.metadata): + raise ValueError(f"Expected output to have {len(self.metadata)} elements, but got {len(output)}.") + + for i, cpm in enumerate(self.metadata): + if cpm is None: + continue + if self.parallel_config.ulysses_anything: + output[i] = PartitionAnythingSharder.unshard_anything( + output[i], cpm.gather_dim, self.parallel_config._flattened_mesh + ) + else: + output[i] = EquipartitionSharder.unshard( + output[i], cpm.gather_dim, self.parallel_config._flattened_mesh + ) + + return output[0] if is_tensor else tuple(output) + + +class AllGatherFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, tensor, dim, group): + ctx.dim = dim + ctx.group = group + ctx.world_size = torch.distributed.get_world_size(group) + ctx.rank = torch.distributed.get_rank(group) + return funcol.all_gather_tensor(tensor, dim, group=group) + + @staticmethod + def backward(ctx, grad_output): + grad_chunks = torch.chunk(grad_output, ctx.world_size, dim=ctx.dim) + return grad_chunks[ctx.rank], None, None + + +class EquipartitionSharder: + @classmethod + def shard(cls, tensor: torch.Tensor, dim: int, mesh: torch.distributed.device_mesh.DeviceMesh) -> torch.Tensor: + # NOTE: the following assertion does not have to be true in general. We simply enforce it for now + # because the alternate case has not yet been tested/required for any model. + assert tensor.size()[dim] % mesh.size() == 0, ( + "Tensor size along dimension to be sharded must be divisible by mesh size" + ) + + # The following is not fullgraph compatible with Dynamo (fails in DeviceMesh.get_rank) + # return tensor.chunk(mesh.size(), dim=dim)[mesh.get_rank()] + + return tensor.chunk(mesh.size(), dim=dim)[torch.distributed.get_rank(mesh.get_group())] + + @classmethod + def unshard(cls, tensor: torch.Tensor, dim: int, mesh: torch.distributed.device_mesh.DeviceMesh) -> torch.Tensor: + tensor = tensor.contiguous() + tensor = AllGatherFunction.apply(tensor, dim, mesh.get_group()) + return tensor + + +class AllGatherAnythingFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, tensor: torch.Tensor, dim: int, group: dist.device_mesh.DeviceMesh): + ctx.dim = dim + ctx.group = group + ctx.world_size = dist.get_world_size(group) + ctx.rank = dist.get_rank(group) + gathered_tensor = _all_gather_anything(tensor, dim, group) + return gathered_tensor + + @staticmethod + def backward(ctx, grad_output): + # NOTE: We use `tensor_split` instead of chunk, because the `chunk` + # function may return fewer than the specified number of chunks! + grad_splits = torch.tensor_split(grad_output, ctx.world_size, dim=ctx.dim) + return grad_splits[ctx.rank], None, None + + +class PartitionAnythingSharder: + @classmethod + def shard_anything( + cls, tensor: torch.Tensor, dim: int, mesh: torch.distributed.device_mesh.DeviceMesh + ) -> torch.Tensor: + assert tensor.size()[dim] >= mesh.size(), ( + f"Cannot shard tensor of size {tensor.size()} along dim {dim} across mesh of size {mesh.size()}." + ) + # NOTE: We use `tensor_split` instead of chunk, because the `chunk` + # function may return fewer than the specified number of chunks! + return tensor.tensor_split(mesh.size(), dim=dim)[dist.get_rank(mesh.get_group())] + + @classmethod + def unshard_anything( + cls, tensor: torch.Tensor, dim: int, mesh: torch.distributed.device_mesh.DeviceMesh + ) -> torch.Tensor: + tensor = tensor.contiguous() + tensor = AllGatherAnythingFunction.apply(tensor, dim, mesh.get_group()) + return tensor + + +@functools.lru_cache(maxsize=64) +def _fill_gather_shapes(shape: tuple[int], gather_dims: tuple[int], dim: int, world_size: int) -> list[list[int]]: + gather_shapes = [] + for i in range(world_size): + rank_shape = list(copy.deepcopy(shape)) + rank_shape[dim] = gather_dims[i] + gather_shapes.append(rank_shape) + return gather_shapes + + +@maybe_allow_in_graph +def _all_gather_anything(tensor: torch.Tensor, dim: int, group: dist.device_mesh.DeviceMesh) -> torch.Tensor: + world_size = dist.get_world_size(group=group) + + tensor = tensor.contiguous() + shape = tensor.shape + rank_dim = shape[dim] + gather_dims = gather_size_by_comm(rank_dim, group) + + gather_shapes = _fill_gather_shapes(tuple(shape), tuple(gather_dims), dim, world_size) + + gathered_tensors = [torch.empty(shape, device=tensor.device, dtype=tensor.dtype) for shape in gather_shapes] + + dist.all_gather(gathered_tensors, tensor, group=group) + gathered_tensor = torch.cat(gathered_tensors, dim=dim) + return gathered_tensor + + +def _get_submodule_by_name(model: torch.nn.Module, name: str) -> torch.nn.Module | list[torch.nn.Module]: + if name.count("*") > 1: + raise ValueError("Wildcard '*' can only be used once in the name") + return _find_submodule_by_name(model, name) + + +def _find_submodule_by_name(model: torch.nn.Module, name: str) -> torch.nn.Module | list[torch.nn.Module]: + if name == "": + return model + first_atom, remaining_name = name.split(".", 1) if "." in name else (name, "") + if first_atom == "*": + if not isinstance(model, torch.nn.ModuleList): + raise ValueError("Wildcard '*' can only be used with ModuleList") + submodules = [] + for submodule in model: + subsubmodules = _find_submodule_by_name(submodule, remaining_name) + if not isinstance(subsubmodules, list): + subsubmodules = [subsubmodules] + submodules.extend(subsubmodules) + return submodules + else: + if hasattr(model, first_atom): + submodule = getattr(model, first_atom) + return _find_submodule_by_name(submodule, remaining_name) + else: + raise ValueError(f"'{first_atom}' is not a submodule of '{model.__class__.__name__}'") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/faster_cache.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/faster_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..682cebe36c7d17b0ab8744318ead78ba94260c66 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/faster_cache.py @@ -0,0 +1,654 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from dataclasses import dataclass +from typing import Any, Callable + +import torch + +from ..models.attention import AttentionModuleMixin +from ..models.modeling_outputs import Transformer2DModelOutput +from ..utils import logging +from ._common import _ATTENTION_CLASSES +from .hooks import HookRegistry, ModelHook + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +_FASTER_CACHE_DENOISER_HOOK = "faster_cache_denoiser" +_FASTER_CACHE_BLOCK_HOOK = "faster_cache_block" +_SPATIAL_ATTENTION_BLOCK_IDENTIFIERS = ( + "^blocks.*attn", + "^transformer_blocks.*attn", + "^single_transformer_blocks.*attn", +) +_TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS = ("^temporal_transformer_blocks.*attn",) +_TRANSFORMER_BLOCK_IDENTIFIERS = _SPATIAL_ATTENTION_BLOCK_IDENTIFIERS + _TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS +_UNCOND_COND_INPUT_KWARGS_IDENTIFIERS = ( + "hidden_states", + "encoder_hidden_states", + "timestep", + "attention_mask", + "encoder_attention_mask", +) + + +@dataclass +class FasterCacheConfig: + r""" + Configuration for [FasterCache](https://huggingface.co/papers/2410.19355). + + Attributes: + spatial_attention_block_skip_range (`int`, defaults to `2`): + Calculate the attention states every `N` iterations. If this is set to `N`, the attention computation will + be skipped `N - 1` times (i.e., cached attention states will be reused) before computing the new attention + states again. + temporal_attention_block_skip_range (`int`, *optional*, defaults to `None`): + Calculate the attention states every `N` iterations. If this is set to `N`, the attention computation will + be skipped `N - 1` times (i.e., cached attention states will be reused) before computing the new attention + states again. + spatial_attention_timestep_skip_range (`tuple[float, float]`, defaults to `(-1, 681)`): + The timestep range within which the spatial attention computation can be skipped without a significant loss + in quality. This is to be determined by the user based on the underlying model. The first value in the + tuple is the lower bound and the second value is the upper bound. Typically, diffusion timesteps for + denoising are in the reversed range of 0 to 1000 (i.e. denoising starts at timestep 1000 and ends at + timestep 0). For the default values, this would mean that the spatial attention computation skipping will + be applicable only after denoising timestep 681 is reached, and continue until the end of the denoising + process. + temporal_attention_timestep_skip_range (`tuple[float, float]`, *optional*, defaults to `None`): + The timestep range within which the temporal attention computation can be skipped without a significant + loss in quality. This is to be determined by the user based on the underlying model. The first value in the + tuple is the lower bound and the second value is the upper bound. Typically, diffusion timesteps for + denoising are in the reversed range of 0 to 1000 (i.e. denoising starts at timestep 1000 and ends at + timestep 0). + low_frequency_weight_update_timestep_range (`tuple[int, int]`, defaults to `(99, 901)`): + The timestep range within which the low frequency weight scaling update is applied. The first value in the + tuple is the lower bound and the second value is the upper bound of the timestep range. The callback + function for the update is called only within this range. + high_frequency_weight_update_timestep_range (`tuple[int, int]`, defaults to `(-1, 301)`): + The timestep range within which the high frequency weight scaling update is applied. The first value in the + tuple is the lower bound and the second value is the upper bound of the timestep range. The callback + function for the update is called only within this range. + alpha_low_frequency (`float`, defaults to `1.1`): + The weight to scale the low frequency updates by. This is used to approximate the unconditional branch from + the conditional branch outputs. + alpha_high_frequency (`float`, defaults to `1.1`): + The weight to scale the high frequency updates by. This is used to approximate the unconditional branch + from the conditional branch outputs. + unconditional_batch_skip_range (`int`, defaults to `5`): + Process the unconditional branch every `N` iterations. If this is set to `N`, the unconditional branch + computation will be skipped `N - 1` times (i.e., cached unconditional branch states will be reused) before + computing the new unconditional branch states again. + unconditional_batch_timestep_skip_range (`tuple[float, float]`, defaults to `(-1, 641)`): + The timestep range within which the unconditional branch computation can be skipped without a significant + loss in quality. This is to be determined by the user based on the underlying model. The first value in the + tuple is the lower bound and the second value is the upper bound. + spatial_attention_block_identifiers (`tuple[str, ...]`, defaults to `("blocks.*attn1", "transformer_blocks.*attn1", "single_transformer_blocks.*attn1")`): + The identifiers to match the spatial attention blocks in the model. If the name of the block contains any + of these identifiers, FasterCache will be applied to that block. This can either be the full layer names, + partial layer names, or regex patterns. Matching will always be done using a regex match. + temporal_attention_block_identifiers (`tuple[str, ...]`, defaults to `("temporal_transformer_blocks.*attn1",)`): + The identifiers to match the temporal attention blocks in the model. If the name of the block contains any + of these identifiers, FasterCache will be applied to that block. This can either be the full layer names, + partial layer names, or regex patterns. Matching will always be done using a regex match. + attention_weight_callback (`Callable[[torch.nn.Module], float]`, defaults to `None`): + The callback function to determine the weight to scale the attention outputs by. This function should take + the attention module as input and return a float value. This is used to approximate the unconditional + branch from the conditional branch outputs. If not provided, the default weight is 0.5 for all timesteps. + Typically, as described in the paper, this weight should gradually increase from 0 to 1 as the inference + progresses. Users are encouraged to experiment and provide custom weight schedules that take into account + the number of inference steps and underlying model behaviour as denoising progresses. + low_frequency_weight_callback (`Callable[[torch.nn.Module], float]`, defaults to `None`): + The callback function to determine the weight to scale the low frequency updates by. If not provided, the + default weight is 1.1 for timesteps within the range specified (as described in the paper). + high_frequency_weight_callback (`Callable[[torch.nn.Module], float]`, defaults to `None`): + The callback function to determine the weight to scale the high frequency updates by. If not provided, the + default weight is 1.1 for timesteps within the range specified (as described in the paper). + tensor_format (`str`, defaults to `"BCFHW"`): + The format of the input tensors. This should be one of `"BCFHW"`, `"BFCHW"`, or `"BCHW"`. The format is + used to split individual latent frames in order for low and high frequency components to be computed. + is_guidance_distilled (`bool`, defaults to `False`): + Whether the model is guidance distilled or not. If the model is guidance distilled, FasterCache will not be + applied at the denoiser-level to skip the unconditional branch computation (as there is none). + _unconditional_conditional_input_kwargs_identifiers (`list[str]`, defaults to `("hidden_states", "encoder_hidden_states", "timestep", "attention_mask", "encoder_attention_mask")`): + The identifiers to match the input kwargs that contain the batchwise-concatenated unconditional and + conditional inputs. If the name of the input kwargs contains any of these identifiers, FasterCache will + split the inputs into unconditional and conditional branches. This must be a list of exact input kwargs + names that contain the batchwise-concatenated unconditional and conditional inputs. + """ + + # In the paper and codebase, they hardcode these values to 2. However, it can be made configurable + # after some testing. We default to 2 if these parameters are not provided. + spatial_attention_block_skip_range: int = 2 + temporal_attention_block_skip_range: int | None = None + + spatial_attention_timestep_skip_range: tuple[int, int] = (-1, 681) + temporal_attention_timestep_skip_range: tuple[int, int] = (-1, 681) + + # Indicator functions for low/high frequency as mentioned in Equation 11 of the paper + low_frequency_weight_update_timestep_range: tuple[int, int] = (99, 901) + high_frequency_weight_update_timestep_range: tuple[int, int] = (-1, 301) + + # ⍺1 and ⍺2 as mentioned in Equation 11 of the paper + alpha_low_frequency: float = 1.1 + alpha_high_frequency: float = 1.1 + + # n as described in CFG-Cache explanation in the paper - dependent on the model + unconditional_batch_skip_range: int = 5 + unconditional_batch_timestep_skip_range: tuple[int, int] = (-1, 641) + + spatial_attention_block_identifiers: tuple[str, ...] = _SPATIAL_ATTENTION_BLOCK_IDENTIFIERS + temporal_attention_block_identifiers: tuple[str, ...] = _TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS + + attention_weight_callback: Callable[[torch.nn.Module], float] = None + low_frequency_weight_callback: Callable[[torch.nn.Module], float] = None + high_frequency_weight_callback: Callable[[torch.nn.Module], float] = None + + tensor_format: str = "BCFHW" + is_guidance_distilled: bool = False + + current_timestep_callback: Callable[[], int] = None + + _unconditional_conditional_input_kwargs_identifiers: list[str] = _UNCOND_COND_INPUT_KWARGS_IDENTIFIERS + + def __repr__(self) -> str: + return ( + f"FasterCacheConfig(\n" + f" spatial_attention_block_skip_range={self.spatial_attention_block_skip_range},\n" + f" temporal_attention_block_skip_range={self.temporal_attention_block_skip_range},\n" + f" spatial_attention_timestep_skip_range={self.spatial_attention_timestep_skip_range},\n" + f" temporal_attention_timestep_skip_range={self.temporal_attention_timestep_skip_range},\n" + f" low_frequency_weight_update_timestep_range={self.low_frequency_weight_update_timestep_range},\n" + f" high_frequency_weight_update_timestep_range={self.high_frequency_weight_update_timestep_range},\n" + f" alpha_low_frequency={self.alpha_low_frequency},\n" + f" alpha_high_frequency={self.alpha_high_frequency},\n" + f" unconditional_batch_skip_range={self.unconditional_batch_skip_range},\n" + f" unconditional_batch_timestep_skip_range={self.unconditional_batch_timestep_skip_range},\n" + f" spatial_attention_block_identifiers={self.spatial_attention_block_identifiers},\n" + f" temporal_attention_block_identifiers={self.temporal_attention_block_identifiers},\n" + f" tensor_format={self.tensor_format},\n" + f")" + ) + + +class FasterCacheDenoiserState: + r""" + State for [FasterCache](https://huggingface.co/papers/2410.19355) top-level denoiser module. + """ + + def __init__(self) -> None: + self.iteration: int = 0 + self.low_frequency_delta: torch.Tensor = None + self.high_frequency_delta: torch.Tensor = None + + def reset(self): + self.iteration = 0 + self.low_frequency_delta = None + self.high_frequency_delta = None + + +class FasterCacheBlockState: + r""" + State for [FasterCache](https://huggingface.co/papers/2410.19355). Every underlying block that FasterCache is + applied to will have an instance of this state. + """ + + def __init__(self) -> None: + self.iteration: int = 0 + self.batch_size: int = None + self.cache: tuple[torch.Tensor, torch.Tensor] = None + + def reset(self): + self.iteration = 0 + self.batch_size = None + self.cache = None + + +class FasterCacheDenoiserHook(ModelHook): + _is_stateful = True + + def __init__( + self, + unconditional_batch_skip_range: int, + unconditional_batch_timestep_skip_range: tuple[int, int], + tensor_format: str, + is_guidance_distilled: bool, + uncond_cond_input_kwargs_identifiers: list[str], + current_timestep_callback: Callable[[], int], + low_frequency_weight_callback: Callable[[torch.nn.Module], torch.Tensor], + high_frequency_weight_callback: Callable[[torch.nn.Module], torch.Tensor], + ) -> None: + super().__init__() + + self.unconditional_batch_skip_range = unconditional_batch_skip_range + self.unconditional_batch_timestep_skip_range = unconditional_batch_timestep_skip_range + # We can't easily detect what args are to be split in unconditional and conditional branches. We + # can only do it for kwargs, hence they are the only ones we split. The args are passed as-is. + # If a model is to be made compatible with FasterCache, the user must ensure that the inputs that + # contain batchwise-concatenated unconditional and conditional inputs are passed as kwargs. + self.uncond_cond_input_kwargs_identifiers = uncond_cond_input_kwargs_identifiers + self.tensor_format = tensor_format + self.is_guidance_distilled = is_guidance_distilled + + self.current_timestep_callback = current_timestep_callback + self.low_frequency_weight_callback = low_frequency_weight_callback + self.high_frequency_weight_callback = high_frequency_weight_callback + + def initialize_hook(self, module): + self.state = FasterCacheDenoiserState() + return module + + @staticmethod + def _get_cond_input(input: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + # Note: this method assumes that the input tensor is batchwise-concatenated with unconditional inputs + # followed by conditional inputs. + _, cond = input.chunk(2, dim=0) + return cond + + def new_forward(self, module: torch.nn.Module, *args, **kwargs) -> Any: + # Split the unconditional and conditional inputs. We only want to infer the conditional branch if the + # requirements for skipping the unconditional branch are met as described in the paper. + # We skip the unconditional branch only if the following conditions are met: + # 1. We have completed at least one iteration of the denoiser + # 2. The current timestep is within the range specified by the user. This is the optimal timestep range + # where approximating the unconditional branch from the computation of the conditional branch is possible + # without a significant loss in quality. + # 3. The current iteration is not a multiple of the unconditional batch skip range. This is done so that + # we compute the unconditional branch at least once every few iterations to ensure minimal quality loss. + is_within_timestep_range = ( + self.unconditional_batch_timestep_skip_range[0] + < self.current_timestep_callback() + < self.unconditional_batch_timestep_skip_range[1] + ) + should_skip_uncond = ( + self.state.iteration > 0 + and is_within_timestep_range + and self.state.iteration % self.unconditional_batch_skip_range != 0 + and not self.is_guidance_distilled + ) + + if should_skip_uncond: + is_any_kwarg_uncond = any(k in self.uncond_cond_input_kwargs_identifiers for k in kwargs.keys()) + if is_any_kwarg_uncond: + logger.debug("FasterCache - Skipping unconditional branch computation") + args = tuple([self._get_cond_input(arg) if torch.is_tensor(arg) else arg for arg in args]) + kwargs = { + k: v if k not in self.uncond_cond_input_kwargs_identifiers else self._get_cond_input(v) + for k, v in kwargs.items() + } + + output = self.fn_ref.original_forward(*args, **kwargs) + + if self.is_guidance_distilled: + self.state.iteration += 1 + return output + + if torch.is_tensor(output): + hidden_states = output + elif isinstance(output, (tuple, Transformer2DModelOutput)): + hidden_states = output[0] + + batch_size = hidden_states.size(0) + + if should_skip_uncond: + self.state.low_frequency_delta = self.state.low_frequency_delta * self.low_frequency_weight_callback( + module + ) + self.state.high_frequency_delta = self.state.high_frequency_delta * self.high_frequency_weight_callback( + module + ) + + if self.tensor_format == "BCFHW": + hidden_states = hidden_states.permute(0, 2, 1, 3, 4) + if self.tensor_format == "BCFHW" or self.tensor_format == "BFCHW": + hidden_states = hidden_states.flatten(0, 1) + + low_freq_cond, high_freq_cond = _split_low_high_freq(hidden_states.float()) + + # Approximate/compute the unconditional branch outputs as described in Equation 9 and 10 of the paper + low_freq_uncond = self.state.low_frequency_delta + low_freq_cond + high_freq_uncond = self.state.high_frequency_delta + high_freq_cond + uncond_freq = low_freq_uncond + high_freq_uncond + + uncond_states = torch.fft.ifftshift(uncond_freq) + uncond_states = torch.fft.ifft2(uncond_states).real + + if self.tensor_format == "BCFHW" or self.tensor_format == "BFCHW": + uncond_states = uncond_states.unflatten(0, (batch_size, -1)) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)) + if self.tensor_format == "BCFHW": + uncond_states = uncond_states.permute(0, 2, 1, 3, 4) + hidden_states = hidden_states.permute(0, 2, 1, 3, 4) + + # Concatenate the approximated unconditional and predicted conditional branches + uncond_states = uncond_states.to(hidden_states.dtype) + hidden_states = torch.cat([uncond_states, hidden_states], dim=0) + else: + uncond_states, cond_states = hidden_states.chunk(2, dim=0) + if self.tensor_format == "BCFHW": + uncond_states = uncond_states.permute(0, 2, 1, 3, 4) + cond_states = cond_states.permute(0, 2, 1, 3, 4) + if self.tensor_format == "BCFHW" or self.tensor_format == "BFCHW": + uncond_states = uncond_states.flatten(0, 1) + cond_states = cond_states.flatten(0, 1) + + low_freq_uncond, high_freq_uncond = _split_low_high_freq(uncond_states.float()) + low_freq_cond, high_freq_cond = _split_low_high_freq(cond_states.float()) + self.state.low_frequency_delta = low_freq_uncond - low_freq_cond + self.state.high_frequency_delta = high_freq_uncond - high_freq_cond + + self.state.iteration += 1 + if torch.is_tensor(output): + output = hidden_states + elif isinstance(output, tuple): + output = (hidden_states, *output[1:]) + else: + output.sample = hidden_states + + return output + + def reset_state(self, module: torch.nn.Module) -> torch.nn.Module: + self.state.reset() + return module + + +class FasterCacheBlockHook(ModelHook): + _is_stateful = True + + def __init__( + self, + block_skip_range: int, + timestep_skip_range: tuple[int, int], + is_guidance_distilled: bool, + weight_callback: Callable[[torch.nn.Module], float], + current_timestep_callback: Callable[[], int], + ) -> None: + super().__init__() + + self.block_skip_range = block_skip_range + self.timestep_skip_range = timestep_skip_range + self.is_guidance_distilled = is_guidance_distilled + + self.weight_callback = weight_callback + self.current_timestep_callback = current_timestep_callback + + def initialize_hook(self, module): + self.state = FasterCacheBlockState() + return module + + def _compute_approximated_attention_output( + self, t_2_output: torch.Tensor, t_output: torch.Tensor, weight: float, batch_size: int + ) -> torch.Tensor: + if t_2_output.size(0) != batch_size: + # The cache t_2_output contains both batchwise-concatenated unconditional-conditional branch outputs. Just + # take the conditional branch outputs. + assert t_2_output.size(0) == 2 * batch_size + t_2_output = t_2_output[batch_size:] + if t_output.size(0) != batch_size: + # The cache t_output contains both batchwise-concatenated unconditional-conditional branch outputs. Just + # take the conditional branch outputs. + assert t_output.size(0) == 2 * batch_size + t_output = t_output[batch_size:] + return t_output + (t_output - t_2_output) * weight + + def new_forward(self, module: torch.nn.Module, *args, **kwargs) -> Any: + batch_size = [ + *[arg.size(0) for arg in args if torch.is_tensor(arg)], + *[v.size(0) for v in kwargs.values() if torch.is_tensor(v)], + ][0] + if self.state.batch_size is None: + # Will be updated on first forward pass through the denoiser + self.state.batch_size = batch_size + + # If we have to skip due to the skip conditions, then let's skip as expected. + # But, we can't skip if the denoiser wants to infer both unconditional and conditional branches. This + # is because the expected output shapes of attention layer will not match if we only return values from + # the cache (which only caches conditional branch outputs). So, if state.batch_size (which is the true + # unconditional-conditional batch size) is same as the current batch size, we don't perform the layer + # skip. Otherwise, we conditionally skip the layer based on what state.skip_callback returns. + is_within_timestep_range = ( + self.timestep_skip_range[0] < self.current_timestep_callback() < self.timestep_skip_range[1] + ) + if not is_within_timestep_range: + should_skip_attention = False + else: + should_compute_attention = self.state.iteration > 0 and self.state.iteration % self.block_skip_range == 0 + should_skip_attention = not should_compute_attention + if should_skip_attention: + should_skip_attention = self.is_guidance_distilled or self.state.batch_size != batch_size + + if should_skip_attention: + logger.debug("FasterCache - Skipping attention and using approximation") + if torch.is_tensor(self.state.cache[-1]): + t_2_output, t_output = self.state.cache + weight = self.weight_callback(module) + output = self._compute_approximated_attention_output(t_2_output, t_output, weight, batch_size) + else: + # The cache contains multiple tensors from past N iterations (N=2 for FasterCache). We need to handle all of them. + # Diffusers blocks can return multiple tensors - let's call them [A, B, C, ...] for simplicity. + # In our cache, we would have [[A_1, B_1, C_1, ...], [A_2, B_2, C_2, ...], ...] where each list is the output from + # a forward pass of the block. We need to compute the approximated output for each of these tensors. + # The zip(*state.cache) operation will give us [(A_1, A_2, ...), (B_1, B_2, ...), (C_1, C_2, ...), ...] which + # allows us to compute the approximated attention output for each tensor in the cache. + output = () + for t_2_output, t_output in zip(*self.state.cache): + result = self._compute_approximated_attention_output( + t_2_output, t_output, self.weight_callback(module), batch_size + ) + output += (result,) + else: + logger.debug("FasterCache - Computing attention") + output = self.fn_ref.original_forward(*args, **kwargs) + + # Note that the following condition for getting hidden_states should suffice since Diffusers blocks either return + # a single hidden_states tensor, or a tuple of (hidden_states, encoder_hidden_states) tensors. We need to handle + # both cases. + if torch.is_tensor(output): + cache_output = output + if not self.is_guidance_distilled and cache_output.size(0) == self.state.batch_size: + # The output here can be both unconditional-conditional branch outputs or just conditional branch outputs. + # This is determined at the higher-level denoiser module. We only want to cache the conditional branch outputs. + cache_output = cache_output.chunk(2, dim=0)[1] + else: + # Cache all return values and perform the same operation as above + cache_output = () + for out in output: + if not self.is_guidance_distilled and out.size(0) == self.state.batch_size: + out = out.chunk(2, dim=0)[1] + cache_output += (out,) + + if self.state.cache is None: + self.state.cache = [cache_output, cache_output] + else: + self.state.cache = [self.state.cache[-1], cache_output] + + self.state.iteration += 1 + return output + + def reset_state(self, module: torch.nn.Module) -> torch.nn.Module: + self.state.reset() + return module + + +def apply_faster_cache(module: torch.nn.Module, config: FasterCacheConfig) -> None: + r""" + Applies [FasterCache](https://huggingface.co/papers/2410.19355) to a given pipeline. + + Args: + module (`torch.nn.Module`): + The pytorch module to apply FasterCache to. Typically, this should be a transformer architecture supported + in Diffusers, such as `CogVideoXTransformer3DModel`, but external implementations may also work. + config (`FasterCacheConfig`): + The configuration to use for FasterCache. + + Example: + ```python + >>> import torch + >>> from diffusers import CogVideoXPipeline, FasterCacheConfig, apply_faster_cache + + >>> pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16) + >>> pipe.to("cuda") + + >>> config = FasterCacheConfig( + ... spatial_attention_block_skip_range=2, + ... spatial_attention_timestep_skip_range=(-1, 681), + ... low_frequency_weight_update_timestep_range=(99, 641), + ... high_frequency_weight_update_timestep_range=(-1, 301), + ... spatial_attention_block_identifiers=["transformer_blocks"], + ... attention_weight_callback=lambda _: 0.3, + ... tensor_format="BFCHW", + ... ) + >>> apply_faster_cache(pipe.transformer, config) + ``` + """ + + logger.warning( + "FasterCache is a purely experimental feature and may not work as expected. Not all models support FasterCache. " + "The API is subject to change in future releases, with no guarantee of backward compatibility. Please report any issues at " + "https://github.com/huggingface/diffusers/issues." + ) + + if config.attention_weight_callback is None: + # If the user has not provided a weight callback, we default to 0.5 for all timesteps. + # In the paper, they recommend using a gradually increasing weight from 0 to 1 as the inference progresses, but + # this depends from model-to-model. It is required by the user to provide a weight callback if they want to + # use a different weight function. Defaulting to 0.5 works well in practice for most cases. + logger.warning( + "No `attention_weight_callback` provided when enabling FasterCache. Defaulting to using a weight of 0.5 for all timesteps." + ) + config.attention_weight_callback = lambda _: 0.5 + + if config.low_frequency_weight_callback is None: + logger.debug( + "Low frequency weight callback not provided when enabling FasterCache. Defaulting to behaviour described in the paper." + ) + + def low_frequency_weight_callback(module: torch.nn.Module) -> float: + is_within_range = ( + config.low_frequency_weight_update_timestep_range[0] + < config.current_timestep_callback() + < config.low_frequency_weight_update_timestep_range[1] + ) + return config.alpha_low_frequency if is_within_range else 1.0 + + config.low_frequency_weight_callback = low_frequency_weight_callback + + if config.high_frequency_weight_callback is None: + logger.debug( + "High frequency weight callback not provided when enabling FasterCache. Defaulting to behaviour described in the paper." + ) + + def high_frequency_weight_callback(module: torch.nn.Module) -> float: + is_within_range = ( + config.high_frequency_weight_update_timestep_range[0] + < config.current_timestep_callback() + < config.high_frequency_weight_update_timestep_range[1] + ) + return config.alpha_high_frequency if is_within_range else 1.0 + + config.high_frequency_weight_callback = high_frequency_weight_callback + + supported_tensor_formats = ["BCFHW", "BFCHW", "BCHW"] # TODO(aryan): Support BSC for LTX Video + if config.tensor_format not in supported_tensor_formats: + raise ValueError(f"`tensor_format` must be one of {supported_tensor_formats}, but got {config.tensor_format}.") + + _apply_faster_cache_on_denoiser(module, config) + + for name, submodule in module.named_modules(): + if not isinstance(submodule, _ATTENTION_CLASSES): + continue + if any(re.search(identifier, name) is not None for identifier in _TRANSFORMER_BLOCK_IDENTIFIERS): + _apply_faster_cache_on_attention_class(name, submodule, config) + + +def _apply_faster_cache_on_denoiser(module: torch.nn.Module, config: FasterCacheConfig) -> None: + hook = FasterCacheDenoiserHook( + config.unconditional_batch_skip_range, + config.unconditional_batch_timestep_skip_range, + config.tensor_format, + config.is_guidance_distilled, + config._unconditional_conditional_input_kwargs_identifiers, + config.current_timestep_callback, + config.low_frequency_weight_callback, + config.high_frequency_weight_callback, + ) + registry = HookRegistry.check_if_exists_or_initialize(module) + registry.register_hook(hook, _FASTER_CACHE_DENOISER_HOOK) + + +def _apply_faster_cache_on_attention_class(name: str, module: AttentionModuleMixin, config: FasterCacheConfig) -> None: + is_spatial_self_attention = ( + any(re.search(identifier, name) is not None for identifier in config.spatial_attention_block_identifiers) + and config.spatial_attention_block_skip_range is not None + and not getattr(module, "is_cross_attention", False) + ) + is_temporal_self_attention = ( + any(re.search(identifier, name) is not None for identifier in config.temporal_attention_block_identifiers) + and config.temporal_attention_block_skip_range is not None + and not module.is_cross_attention + ) + + block_skip_range, timestep_skip_range, block_type = None, None, None + if is_spatial_self_attention: + block_skip_range = config.spatial_attention_block_skip_range + timestep_skip_range = config.spatial_attention_timestep_skip_range + block_type = "spatial" + elif is_temporal_self_attention: + block_skip_range = config.temporal_attention_block_skip_range + timestep_skip_range = config.temporal_attention_timestep_skip_range + block_type = "temporal" + + if block_skip_range is None or timestep_skip_range is None: + logger.debug( + f'Unable to apply FasterCache to the selected layer: "{name}" because it does ' + f"not match any of the required criteria for spatial or temporal attention layers. Note, " + f"however, that this layer may still be valid for applying PAB. Please specify the correct " + f"block identifiers in the configuration or use the specialized `apply_faster_cache_on_module` " + f"function to apply FasterCache to this layer." + ) + return + + logger.debug(f"Enabling FasterCache ({block_type}) for layer: {name}") + hook = FasterCacheBlockHook( + block_skip_range, + timestep_skip_range, + config.is_guidance_distilled, + config.attention_weight_callback, + config.current_timestep_callback, + ) + registry = HookRegistry.check_if_exists_or_initialize(module) + registry.register_hook(hook, _FASTER_CACHE_BLOCK_HOOK) + + +# Reference: https://github.com/Vchitect/FasterCache/blob/fab32c15014636dc854948319c0a9a8d92c7acb4/scripts/latte/faster_cache_sample_latte.py#L127C1-L143C39 +@torch.no_grad() +def _split_low_high_freq(x): + fft = torch.fft.fft2(x) + fft_shifted = torch.fft.fftshift(fft) + height, width = x.shape[-2:] + radius = min(height, width) // 5 + + y_grid, x_grid = torch.meshgrid(torch.arange(height), torch.arange(width)) + center_x, center_y = width // 2, height // 2 + mask = (x_grid - center_x) ** 2 + (y_grid - center_y) ** 2 <= radius**2 + + low_freq_mask = mask.unsqueeze(0).unsqueeze(0).to(x.device) + high_freq_mask = ~low_freq_mask + + low_freq_fft = fft_shifted * low_freq_mask + high_freq_fft = fft_shifted * high_freq_mask + + return low_freq_fft, high_freq_fft diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/first_block_cache.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/first_block_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..685ccd3836742d140cfdacd69c604d24fb2284b3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/first_block_cache.py @@ -0,0 +1,258 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +import torch + +from ..utils import get_logger +from ..utils.torch_utils import unwrap_module +from ._common import _ALL_TRANSFORMER_BLOCK_IDENTIFIERS +from ._helpers import TransformerBlockRegistry +from .hooks import BaseState, HookRegistry, ModelHook, StateManager + + +logger = get_logger(__name__) # pylint: disable=invalid-name + +_FBC_LEADER_BLOCK_HOOK = "fbc_leader_block_hook" +_FBC_BLOCK_HOOK = "fbc_block_hook" + + +@dataclass +class FirstBlockCacheConfig: + r""" + Configuration for [First Block + Cache](https://github.com/chengzeyi/ParaAttention/blob/7a266123671b55e7e5a2fe9af3121f07a36afc78/README.md#first-block-cache-our-dynamic-caching). + + Args: + threshold (`float`, defaults to `0.05`): + The threshold to determine whether or not a forward pass through all layers of the model is required. A + higher threshold usually results in a forward pass through a lower number of layers and faster inference, + but might lead to poorer generation quality. A lower threshold may not result in significant generation + speedup. The threshold is compared against the absmean difference of the residuals between the current and + cached outputs from the first transformer block. If the difference is below the threshold, the forward pass + is skipped. + """ + + threshold: float = 0.05 + + +class FBCSharedBlockState(BaseState): + def __init__(self) -> None: + super().__init__() + + self.head_block_output: torch.Tensor | tuple[torch.Tensor, ...] = None + self.head_block_residual: torch.Tensor = None + self.tail_block_residuals: torch.Tensor | tuple[torch.Tensor, ...] = None + self.should_compute: bool = True + + def reset(self): + self.tail_block_residuals = None + self.should_compute = True + + +class FBCHeadBlockHook(ModelHook): + _is_stateful = True + + def __init__(self, state_manager: StateManager, threshold: float): + self.state_manager = state_manager + self.threshold = threshold + self._metadata = None + + def initialize_hook(self, module): + unwrapped_module = unwrap_module(module) + self._metadata = TransformerBlockRegistry.get(unwrapped_module.__class__) + return module + + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + original_hidden_states = self._metadata._get_parameter_from_args_kwargs("hidden_states", args, kwargs) + + output = self.fn_ref.original_forward(*args, **kwargs) + is_output_tuple = isinstance(output, tuple) + + if is_output_tuple: + hidden_states_residual = output[self._metadata.return_hidden_states_index] - original_hidden_states + else: + hidden_states_residual = output - original_hidden_states + + shared_state: FBCSharedBlockState = self.state_manager.get_state() + hidden_states = encoder_hidden_states = None + should_compute = self._should_compute_remaining_blocks(hidden_states_residual) + shared_state.should_compute = should_compute + + if not should_compute: + # Apply caching + if is_output_tuple: + hidden_states = ( + shared_state.tail_block_residuals[0] + output[self._metadata.return_hidden_states_index] + ) + else: + hidden_states = shared_state.tail_block_residuals[0] + output + + if self._metadata.return_encoder_hidden_states_index is not None: + assert is_output_tuple + encoder_hidden_states = ( + shared_state.tail_block_residuals[1] + output[self._metadata.return_encoder_hidden_states_index] + ) + + if is_output_tuple: + return_output = [None] * len(output) + return_output[self._metadata.return_hidden_states_index] = hidden_states + return_output[self._metadata.return_encoder_hidden_states_index] = encoder_hidden_states + return_output = tuple(return_output) + else: + return_output = hidden_states + output = return_output + else: + if is_output_tuple: + head_block_output = [None] * len(output) + head_block_output[0] = output[self._metadata.return_hidden_states_index] + head_block_output[1] = output[self._metadata.return_encoder_hidden_states_index] + else: + head_block_output = output + shared_state.head_block_output = head_block_output + shared_state.head_block_residual = hidden_states_residual + + return output + + def reset_state(self, module): + self.state_manager.reset() + return module + + @torch.compiler.disable + def _should_compute_remaining_blocks(self, hidden_states_residual: torch.Tensor) -> bool: + shared_state = self.state_manager.get_state() + if shared_state.head_block_residual is None: + return True + prev_hidden_states_residual = shared_state.head_block_residual + absmean = (hidden_states_residual - prev_hidden_states_residual).abs().mean() + prev_hidden_states_absmean = prev_hidden_states_residual.abs().mean() + diff = (absmean / prev_hidden_states_absmean).item() + return diff > self.threshold + + +class FBCBlockHook(ModelHook): + def __init__(self, state_manager: StateManager, is_tail: bool = False): + super().__init__() + self.state_manager = state_manager + self.is_tail = is_tail + self._metadata = None + + def initialize_hook(self, module): + unwrapped_module = unwrap_module(module) + self._metadata = TransformerBlockRegistry.get(unwrapped_module.__class__) + return module + + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + original_hidden_states = self._metadata._get_parameter_from_args_kwargs("hidden_states", args, kwargs) + original_encoder_hidden_states = None + if self._metadata.return_encoder_hidden_states_index is not None: + original_encoder_hidden_states = self._metadata._get_parameter_from_args_kwargs( + "encoder_hidden_states", args, kwargs + ) + + shared_state = self.state_manager.get_state() + + if shared_state.should_compute: + output = self.fn_ref.original_forward(*args, **kwargs) + if self.is_tail: + hidden_states_residual = encoder_hidden_states_residual = None + if isinstance(output, tuple): + hidden_states_residual = ( + output[self._metadata.return_hidden_states_index] - shared_state.head_block_output[0] + ) + encoder_hidden_states_residual = ( + output[self._metadata.return_encoder_hidden_states_index] - shared_state.head_block_output[1] + ) + else: + hidden_states_residual = output - shared_state.head_block_output + shared_state.tail_block_residuals = (hidden_states_residual, encoder_hidden_states_residual) + return output + + if original_encoder_hidden_states is None: + return_output = original_hidden_states + else: + return_output = [None, None] + return_output[self._metadata.return_hidden_states_index] = original_hidden_states + return_output[self._metadata.return_encoder_hidden_states_index] = original_encoder_hidden_states + return_output = tuple(return_output) + return return_output + + +def apply_first_block_cache(module: torch.nn.Module, config: FirstBlockCacheConfig) -> None: + """ + Applies [First Block + Cache](https://github.com/chengzeyi/ParaAttention/blob/4de137c5b96416489f06e43e19f2c14a772e28fd/README.md#first-block-cache-our-dynamic-caching) + to a given module. + + First Block Cache builds on the ideas of [TeaCache](https://huggingface.co/papers/2411.19108). It is much simpler + to implement generically for a wide range of models and has been integrated first for experimental purposes. + + Args: + module (`torch.nn.Module`): + The pytorch module to apply FBCache to. Typically, this should be a transformer architecture supported in + Diffusers, such as `CogVideoXTransformer3DModel`, but external implementations may also work. + config (`FirstBlockCacheConfig`): + The configuration to use for applying the FBCache method. + + Example: + ```python + >>> import torch + >>> from diffusers import CogView4Pipeline + >>> from diffusers.hooks import apply_first_block_cache, FirstBlockCacheConfig + + >>> pipe = CogView4Pipeline.from_pretrained("THUDM/CogView4-6B", torch_dtype=torch.bfloat16) + >>> pipe.to("cuda") + + >>> apply_first_block_cache(pipe.transformer, FirstBlockCacheConfig(threshold=0.2)) + + >>> prompt = "A photo of an astronaut riding a horse on mars" + >>> image = pipe(prompt, generator=torch.Generator().manual_seed(42)).images[0] + >>> image.save("output.png") + ``` + """ + + state_manager = StateManager(FBCSharedBlockState, (), {}) + remaining_blocks = [] + + for name, submodule in module.named_children(): + if name not in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS or not isinstance(submodule, torch.nn.ModuleList): + continue + for index, block in enumerate(submodule): + remaining_blocks.append((f"{name}.{index}", block)) + + head_block_name, head_block = remaining_blocks.pop(0) + tail_block_name, tail_block = remaining_blocks.pop(-1) + + logger.debug(f"Applying FBCHeadBlockHook to '{head_block_name}'") + _apply_fbc_head_block_hook(head_block, state_manager, config.threshold) + + for name, block in remaining_blocks: + logger.debug(f"Applying FBCBlockHook to '{name}'") + _apply_fbc_block_hook(block, state_manager) + + logger.debug(f"Applying FBCBlockHook to tail block '{tail_block_name}'") + _apply_fbc_block_hook(tail_block, state_manager, is_tail=True) + + +def _apply_fbc_head_block_hook(block: torch.nn.Module, state_manager: StateManager, threshold: float) -> None: + registry = HookRegistry.check_if_exists_or_initialize(block) + hook = FBCHeadBlockHook(state_manager, threshold) + registry.register_hook(hook, _FBC_LEADER_BLOCK_HOOK) + + +def _apply_fbc_block_hook(block: torch.nn.Module, state_manager: StateManager, is_tail: bool = False) -> None: + registry = HookRegistry.check_if_exists_or_initialize(block) + hook = FBCBlockHook(state_manager, is_tail) + registry.register_hook(hook, _FBC_BLOCK_HOOK) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/group_offloading.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/group_offloading.py new file mode 100644 index 0000000000000000000000000000000000000000..891ac28455af0a8846a5538f752497f2be1d2de6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/group_offloading.py @@ -0,0 +1,966 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import os +from contextlib import contextmanager, nullcontext +from dataclasses import dataclass, replace +from enum import Enum +from typing import Set + +import safetensors.torch +import torch + +from ..utils import get_logger, is_accelerate_available +from ._common import _GO_LC_SUPPORTED_PYTORCH_LAYERS +from .hooks import HookRegistry, ModelHook + + +if is_accelerate_available(): + from accelerate.hooks import AlignDevicesHook, CpuOffload + from accelerate.utils import send_to_device + + +logger = get_logger(__name__) # pylint: disable=invalid-name + + +# fmt: off +_GROUP_OFFLOADING = "group_offloading" +_LAYER_EXECUTION_TRACKER = "layer_execution_tracker" +_LAZY_PREFETCH_GROUP_OFFLOADING = "lazy_prefetch_group_offloading" +_GROUP_ID_LAZY_LEAF = "lazy_leafs" +# fmt: on + + +class GroupOffloadingType(str, Enum): + BLOCK_LEVEL = "block_level" + LEAF_LEVEL = "leaf_level" + + +@dataclass +class GroupOffloadingConfig: + onload_device: torch.device + offload_device: torch.device + offload_type: GroupOffloadingType + non_blocking: bool + record_stream: bool + low_cpu_mem_usage: bool + num_blocks_per_group: int | None = None + offload_to_disk_path: str | None = None + stream: torch.cuda.Stream | torch.Stream | None = None + block_modules: list[str] | None = None + exclude_kwargs: list[str] | None = None + module_prefix: str = "" + + +class ModuleGroup: + def __init__( + self, + modules: list[torch.nn.Module], + offload_device: torch.device, + onload_device: torch.device, + offload_leader: torch.nn.Module, + onload_leader: torch.nn.Module | None = None, + parameters: list[torch.nn.Parameter] | None = None, + buffers: list[torch.Tensor] | None = None, + non_blocking: bool = False, + stream: torch.cuda.Stream | torch.Stream | None = None, + record_stream: bool | None = False, + low_cpu_mem_usage: bool = False, + onload_self: bool = True, + offload_to_disk_path: str | None = None, + group_id: int | str | None = None, + ) -> None: + self.modules = modules + self.offload_device = offload_device + self.onload_device = onload_device + self.offload_leader = offload_leader + self.onload_leader = onload_leader + self.parameters = parameters or [] + self.buffers = buffers or [] + self.non_blocking = non_blocking or stream is not None + self.stream = stream + self.record_stream = record_stream + self.onload_self = onload_self + self.low_cpu_mem_usage = low_cpu_mem_usage + + self.offload_to_disk_path = offload_to_disk_path + self._is_offloaded_to_disk = False + + if self.offload_to_disk_path is not None: + # Instead of `group_id or str(id(self))` we do this because `group_id` can be "" as well. + self.group_id = group_id if group_id is not None else str(id(self)) + short_hash = _compute_group_hash(self.group_id) + self.safetensors_file_path = os.path.join(self.offload_to_disk_path, f"group_{short_hash}.safetensors") + + all_tensors = [] + for module in self.modules: + all_tensors.extend(list(module.parameters())) + all_tensors.extend(list(module.buffers())) + all_tensors.extend(self.parameters) + all_tensors.extend(self.buffers) + all_tensors = list(dict.fromkeys(all_tensors)) # Remove duplicates + + self.tensor_to_key = {tensor: f"tensor_{i}" for i, tensor in enumerate(all_tensors)} + self.key_to_tensor = {v: k for k, v in self.tensor_to_key.items()} + self.cpu_param_dict = {} + else: + self.cpu_param_dict = self._init_cpu_param_dict() + + self._torch_accelerator_module = ( + getattr(torch, torch.accelerator.current_accelerator().type) + if hasattr(torch, "accelerator") + else torch.cuda + ) + + def _init_cpu_param_dict(self): + cpu_param_dict = {} + if self.stream is None: + return cpu_param_dict + + for module in self.modules: + for param in module.parameters(): + cpu_param_dict[param] = param.data.cpu() if self.low_cpu_mem_usage else param.data.cpu().pin_memory() + for buffer in module.buffers(): + cpu_param_dict[buffer] = ( + buffer.data.cpu() if self.low_cpu_mem_usage else buffer.data.cpu().pin_memory() + ) + + for param in self.parameters: + cpu_param_dict[param] = param.data.cpu() if self.low_cpu_mem_usage else param.data.cpu().pin_memory() + + for buffer in self.buffers: + cpu_param_dict[buffer] = buffer.data.cpu() if self.low_cpu_mem_usage else buffer.data.cpu().pin_memory() + + return cpu_param_dict + + @contextmanager + def _pinned_memory_tensors(self): + try: + pinned_dict = { + param: tensor.pin_memory() if not tensor.is_pinned() else tensor + for param, tensor in self.cpu_param_dict.items() + } + yield pinned_dict + finally: + pinned_dict = None + + def _transfer_tensor_to_device(self, tensor, source_tensor, default_stream): + tensor.data = source_tensor.to(self.onload_device, non_blocking=self.non_blocking) + if self.record_stream: + tensor.data.record_stream(default_stream) + + def _process_tensors_from_modules(self, pinned_memory=None, default_stream=None): + for group_module in self.modules: + for param in group_module.parameters(): + source = pinned_memory[param] if pinned_memory else param.data + self._transfer_tensor_to_device(param, source, default_stream) + for buffer in group_module.buffers(): + source = pinned_memory[buffer] if pinned_memory else buffer.data + self._transfer_tensor_to_device(buffer, source, default_stream) + + for param in self.parameters: + source = pinned_memory[param] if pinned_memory else param.data + self._transfer_tensor_to_device(param, source, default_stream) + + for buffer in self.buffers: + source = pinned_memory[buffer] if pinned_memory else buffer.data + self._transfer_tensor_to_device(buffer, source, default_stream) + + def _onload_from_disk(self): + if self.stream is not None: + # Wait for previous Host->Device transfer to complete + self.stream.synchronize() + + context = nullcontext() if self.stream is None else self._torch_accelerator_module.stream(self.stream) + current_stream = self._torch_accelerator_module.current_stream() if self.record_stream else None + + with context: + # Load to CPU (if using streams) or directly to target device, pin, and async copy to device + device = str(self.onload_device) if self.stream is None else "cpu" + loaded_tensors = safetensors.torch.load_file(self.safetensors_file_path, device=device) + + if self.stream is not None: + for key, tensor_obj in self.key_to_tensor.items(): + pinned_tensor = loaded_tensors[key].pin_memory() + tensor_obj.data = pinned_tensor.to(self.onload_device, non_blocking=self.non_blocking) + if self.record_stream: + tensor_obj.data.record_stream(current_stream) + else: + onload_device = ( + self.onload_device.type if isinstance(self.onload_device, torch.device) else self.onload_device + ) + loaded_tensors = safetensors.torch.load_file(self.safetensors_file_path, device=onload_device) + for key, tensor_obj in self.key_to_tensor.items(): + tensor_obj.data = loaded_tensors[key] + + def _onload_from_memory(self): + if self.stream is not None: + # Wait for previous Host->Device transfer to complete + self.stream.synchronize() + + context = nullcontext() if self.stream is None else self._torch_accelerator_module.stream(self.stream) + default_stream = self._torch_accelerator_module.current_stream() if self.stream is not None else None + + with context: + if self.stream is not None: + with self._pinned_memory_tensors() as pinned_memory: + self._process_tensors_from_modules(pinned_memory, default_stream=default_stream) + else: + self._process_tensors_from_modules(None) + + def _offload_to_disk(self): + # TODO: we can potentially optimize this code path by checking if the _all_ the desired + # safetensor files exist on the disk and if so, skip this step entirely, reducing IO + # overhead. Currently, we just check if the given `safetensors_file_path` exists and if not + # we perform a write. + # Check if the file has been saved in this session or if it already exists on disk. + if not self._is_offloaded_to_disk and not os.path.exists(self.safetensors_file_path): + os.makedirs(os.path.dirname(self.safetensors_file_path), exist_ok=True) + tensors_to_save = {key: tensor.data.to(self.offload_device) for tensor, key in self.tensor_to_key.items()} + safetensors.torch.save_file(tensors_to_save, self.safetensors_file_path) + + # The group is now considered offloaded to disk for the rest of the session. + self._is_offloaded_to_disk = True + + # We do this to free up the RAM which is still holding the up tensor data. + for tensor_obj in self.tensor_to_key.keys(): + tensor_obj.data = torch.empty_like(tensor_obj.data, device=self.offload_device) + + def _offload_to_memory(self): + if self.stream is not None: + if not self.record_stream: + self._torch_accelerator_module.current_stream().synchronize() + + for group_module in self.modules: + for param in group_module.parameters(): + param.data = self.cpu_param_dict[param] + for param in self.parameters: + param.data = self.cpu_param_dict[param] + for buffer in self.buffers: + buffer.data = self.cpu_param_dict[buffer] + else: + for group_module in self.modules: + group_module.to(self.offload_device, non_blocking=False) + for param in self.parameters: + param.data = param.data.to(self.offload_device, non_blocking=False) + for buffer in self.buffers: + buffer.data = buffer.data.to(self.offload_device, non_blocking=False) + + @torch.compiler.disable() + def onload_(self): + r"""Onloads the group of parameters to the onload_device.""" + if self.offload_to_disk_path is not None: + self._onload_from_disk() + else: + self._onload_from_memory() + + @torch.compiler.disable() + def offload_(self): + r"""Offloads the group of parameters to the offload_device.""" + if self.offload_to_disk_path: + self._offload_to_disk() + else: + self._offload_to_memory() + + +class GroupOffloadingHook(ModelHook): + r""" + A hook that offloads groups of torch.nn.Module to the CPU for storage and onloads to accelerator device for + computation. Each group has one "onload leader" module that is responsible for onloading, and an "offload leader" + module that is responsible for offloading. If prefetching is enabled, the onload leader of the previous module + group is responsible for onloading the current module group. + """ + + _is_stateful = False + + def __init__(self, group: ModuleGroup, *, config: GroupOffloadingConfig) -> None: + self.group = group + self.next_group: ModuleGroup | None = None + self.config = config + + def initialize_hook(self, module: torch.nn.Module) -> torch.nn.Module: + if self.group.offload_leader == module: + self.group.offload_() + return module + + def pre_forward(self, module: torch.nn.Module, *args, **kwargs): + # If there wasn't an onload_leader assigned, we assume that the submodule that first called its forward + # method is the onload_leader of the group. + if self.group.onload_leader is None: + self.group.onload_leader = module + + # If the current module is the onload_leader of the group, we onload the group if it is supposed + # to onload itself. In the case of using prefetching with streams, we onload the next group if + # it is not supposed to onload itself. + if self.group.onload_leader == module: + if self.group.onload_self: + self.group.onload_() + else: + # onload_self=False means this group relies on prefetching from a previous group. + # However, for conditionally-executed modules (e.g. patch_short/patch_mid/patch_long in Helios), + # the prefetch chain may not cover them if they were absent during the first forward pass + # when the execution order was traced. In that case, their weights remain on offload_device, + # so we fall back to a synchronous onload here. + params = [p for m in self.group.modules for p in m.parameters()] + list(self.group.parameters) + if params and params[0].device == self.group.offload_device: + self.group.onload_() + if self.group.stream is not None: + self.group.stream.synchronize() + + should_onload_next_group = self.next_group is not None and not self.next_group.onload_self + if should_onload_next_group: + self.next_group.onload_() + + should_synchronize = ( + not self.group.onload_self and self.group.stream is not None and not should_onload_next_group + ) + if should_synchronize: + # If this group didn't onload itself, it means it was asynchronously onloaded by the + # previous group. We need to synchronize the side stream to ensure parameters + # are completely loaded to proceed with forward pass. Without this, uninitialized + # weights will be used in the computation, leading to incorrect results + # Also, we should only do this synchronization if we don't already do it from the sync call in + # self.next_group.onload_, hence the `not should_onload_next_group` check. + self.group.stream.synchronize() + + args = send_to_device(args, self.group.onload_device, non_blocking=self.group.non_blocking) + + # Some Autoencoder models use a feature cache that is passed through submodules + # and modified in place. The `send_to_device` call returns a copy of this feature cache object + # which breaks the inplace updates. Use `exclude_kwargs` to mark these cache features + exclude_kwargs = self.config.exclude_kwargs or [] + if exclude_kwargs: + moved_kwargs = send_to_device( + {k: v for k, v in kwargs.items() if k not in exclude_kwargs}, + self.group.onload_device, + non_blocking=self.group.non_blocking, + ) + kwargs.update(moved_kwargs) + else: + kwargs = send_to_device(kwargs, self.group.onload_device, non_blocking=self.group.non_blocking) + + return args, kwargs + + def post_forward(self, module: torch.nn.Module, output): + if self.group.offload_leader == module: + self.group.offload_() + return output + + +class LazyPrefetchGroupOffloadingHook(ModelHook): + r""" + A hook, used in conjunction with GroupOffloadingHook, that applies lazy prefetching to groups of torch.nn.Module. + This hook is used to determine the order in which the layers are executed during the forward pass. Once the layer + invocation order is known, assignments of the next_group attribute for prefetching can be made, which allows + prefetching groups in the correct order. + """ + + _is_stateful = False + + def __init__(self): + self.execution_order: list[tuple[str, torch.nn.Module]] = [] + self._layer_execution_tracker_module_names = set() + + def initialize_hook(self, module): + def make_execution_order_update_callback(current_name, current_submodule): + def callback(): + if not torch.compiler.is_compiling(): + logger.debug(f"Adding {current_name} to the execution order") + self.execution_order.append((current_name, current_submodule)) + + return callback + + # To every submodule that contains a group offloading hook (at this point, no prefetching is enabled for any + # of the groups), we add a layer execution tracker hook that will be used to determine the order in which the + # layers are executed during the forward pass. + for name, submodule in module.named_modules(): + if name == "" or not hasattr(submodule, "_diffusers_hook"): + continue + + registry = HookRegistry.check_if_exists_or_initialize(submodule) + group_offloading_hook = registry.get_hook(_GROUP_OFFLOADING) + + if group_offloading_hook is not None: + # For the first forward pass, we have to load in a blocking manner + group_offloading_hook.group.non_blocking = False + layer_tracker_hook = LayerExecutionTrackerHook(make_execution_order_update_callback(name, submodule)) + registry.register_hook(layer_tracker_hook, _LAYER_EXECUTION_TRACKER) + self._layer_execution_tracker_module_names.add(name) + + return module + + def post_forward(self, module, output): + # At this point, for the current modules' submodules, we know the execution order of the layers. We can now + # remove the layer execution tracker hooks and apply prefetching by setting the next_group attribute for each + # group offloading hook. + num_executed = len(self.execution_order) + execution_order_module_names = {name for name, _ in self.execution_order} + + # It may be possible that some layers were not executed during the forward pass. This can happen if the layer + # is not used in the forward pass, or if the layer is not executed due to some other reason. In such cases, we + # may not be able to apply prefetching in the correct order, which can lead to device-mismatch related errors + # if the missing layers end up being executed in the future. + if execution_order_module_names != self._layer_execution_tracker_module_names: + unexecuted_layers = list(self._layer_execution_tracker_module_names - execution_order_module_names) + if not torch.compiler.is_compiling(): + logger.warning( + "It seems like some layers were not executed during the forward pass. This may lead to problems when " + "applying lazy prefetching with automatic tracing and lead to device-mismatch related errors. Please " + "make sure that all layers are executed during the forward pass. The following layers were not executed:\n" + f"{unexecuted_layers=}" + ) + + # Remove the layer execution tracker hooks from the submodules + base_module_registry = module._diffusers_hook + registries = [submodule._diffusers_hook for _, submodule in self.execution_order] + group_offloading_hooks = [registry.get_hook(_GROUP_OFFLOADING) for registry in registries] + + for i in range(num_executed): + registries[i].remove_hook(_LAYER_EXECUTION_TRACKER, recurse=False) + + # Remove the current lazy prefetch group offloading hook so that it doesn't interfere with the next forward pass + base_module_registry.remove_hook(_LAZY_PREFETCH_GROUP_OFFLOADING, recurse=False) + + # LazyPrefetchGroupOffloadingHook is only used with streams, so we know that non_blocking should be True. + # We disable non_blocking for the first forward pass, but need to enable it for the subsequent passes to + # see the benefits of prefetching. + for hook in group_offloading_hooks: + hook.group.non_blocking = True + + # Set required attributes for prefetching + if num_executed > 0: + base_module_group_offloading_hook = base_module_registry.get_hook(_GROUP_OFFLOADING) + base_module_group_offloading_hook.next_group = group_offloading_hooks[0].group + base_module_group_offloading_hook.next_group.onload_self = False + + for i in range(num_executed - 1): + name1, _ = self.execution_order[i] + name2, _ = self.execution_order[i + 1] + if not torch.compiler.is_compiling(): + logger.debug(f"Applying lazy prefetch group offloading from {name1} to {name2}") + group_offloading_hooks[i].next_group = group_offloading_hooks[i + 1].group + group_offloading_hooks[i].next_group.onload_self = False + + return output + + +class LayerExecutionTrackerHook(ModelHook): + r""" + A hook that tracks the order in which the layers are executed during the forward pass by calling back to the + LazyPrefetchGroupOffloadingHook to update the execution order. + """ + + _is_stateful = False + + def __init__(self, execution_order_update_callback): + self.execution_order_update_callback = execution_order_update_callback + + def pre_forward(self, module, *args, **kwargs): + self.execution_order_update_callback() + return args, kwargs + + +def apply_group_offloading( + module: torch.nn.Module, + onload_device: str | torch.device, + offload_device: str | torch.device = torch.device("cpu"), + offload_type: str | GroupOffloadingType = "block_level", + num_blocks_per_group: int | None = None, + non_blocking: bool = False, + use_stream: bool = False, + record_stream: bool = False, + low_cpu_mem_usage: bool = False, + offload_to_disk_path: str | None = None, + block_modules: list[str] | None = None, + exclude_kwargs: list[str] | None = None, +) -> None: + r""" + Applies group offloading to the internal layers of a torch.nn.Module. To understand what group offloading is, and + where it is beneficial, we need to first provide some context on how other supported offloading methods work. + + Typically, offloading is done at two levels: + - Module-level: In Diffusers, this can be enabled using the `ModelMixin::enable_model_cpu_offload()` method. It + works by offloading each component of a pipeline to the CPU for storage, and onloading to the accelerator device + when needed for computation. This method is more memory-efficient than keeping all components on the accelerator, + but the memory requirements are still quite high. For this method to work, one needs memory equivalent to size of + the model in runtime dtype + size of largest intermediate activation tensors to be able to complete the forward + pass. + - Leaf-level: In Diffusers, this can be enabled using the `ModelMixin::enable_sequential_cpu_offload()` method. It + works by offloading the lowest leaf-level parameters of the computation graph to the CPU for storage, and + onloading only the leafs to the accelerator device for computation. This uses the lowest amount of accelerator + memory, but can be slower due to the excessive number of device synchronizations. + + Group offloading is a middle ground between the two methods. It works by offloading groups of internal layers, + (either `torch.nn.ModuleList` or `torch.nn.Sequential`). This method uses lower memory than module-level + offloading. It is also faster than leaf-level/sequential offloading, as the number of device synchronizations is + reduced. + + Another supported feature (for CUDA devices with support for asynchronous data transfer streams) is the ability to + overlap data transfer and computation to reduce the overall execution time compared to sequential offloading. This + is enabled using layer prefetching with streams, i.e., the layer that is to be executed next starts onloading to + the accelerator device while the current layer is being executed - this increases the memory requirements slightly. + Note that this implementation also supports leaf-level offloading but can be made much faster when using streams. + + Args: + module (`torch.nn.Module`): + The module to which group offloading is applied. + onload_device (`torch.device`): + The device to which the group of modules are onloaded. + offload_device (`torch.device`, defaults to `torch.device("cpu")`): + The device to which the group of modules are offloaded. This should typically be the CPU. Default is CPU. + offload_type (`str` or `GroupOffloadingType`, defaults to "block_level"): + The type of offloading to be applied. Can be one of "block_level" or "leaf_level". Default is + "block_level". + offload_to_disk_path (`str`, *optional*, defaults to `None`): + The path to the directory where parameters will be offloaded. Setting this option can be useful in limited + RAM environment settings where a reasonable speed-memory trade-off is desired. + num_blocks_per_group (`int`, *optional*): + The number of blocks per group when using offload_type="block_level". This is required when using + offload_type="block_level". + non_blocking (`bool`, defaults to `False`): + If True, offloading and onloading is done with non-blocking data transfer. + use_stream (`bool`, defaults to `False`): + If True, offloading and onloading is done asynchronously using a CUDA stream. This can be useful for + overlapping computation and data transfer. + record_stream (`bool`, defaults to `False`): When enabled with `use_stream`, it marks the current tensor + as having been used by this stream. It is faster at the expense of slightly more memory usage. Refer to the + [PyTorch official docs](https://pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html) more + details. + low_cpu_mem_usage (`bool`, defaults to `False`): + If True, the CPU memory usage is minimized by pinning tensors on-the-fly instead of pre-pinning them. This + option only matters when using streamed CPU offloading (i.e. `use_stream=True`). This can be useful when + the CPU memory is a bottleneck but may counteract the benefits of using streams. + block_modules (`list[str]`, *optional*): + List of module names that should be treated as blocks for offloading. If provided, only these modules will + be considered for block-level offloading. If not provided, the default block detection logic will be used. + exclude_kwargs (`list[str]`, *optional*): + List of kwarg keys that should not be processed by send_to_device. This is useful for mutable state like + caching lists that need to maintain their object identity across forward passes. If not provided, will be + inferred from the module's `_skip_keys` attribute if it exists. + + Example: + ```python + >>> from diffusers import CogVideoXTransformer3DModel + >>> from diffusers.hooks import apply_group_offloading + + >>> transformer = CogVideoXTransformer3DModel.from_pretrained( + ... "THUDM/CogVideoX-5b", subfolder="transformer", torch_dtype=torch.bfloat16 + ... ) + + >>> apply_group_offloading( + ... transformer, + ... onload_device=torch.device("cuda"), + ... offload_device=torch.device("cpu"), + ... offload_type="block_level", + ... num_blocks_per_group=2, + ... use_stream=True, + ... ) + ``` + """ + + onload_device = torch.device(onload_device) if isinstance(onload_device, str) else onload_device + offload_device = torch.device(offload_device) if isinstance(offload_device, str) else offload_device + offload_type = GroupOffloadingType(offload_type) + + stream = None + if use_stream: + if torch.cuda.is_available(): + stream = torch.cuda.Stream() + elif hasattr(torch, "xpu") and torch.xpu.is_available(): + stream = torch.Stream() + else: + raise ValueError("Using streams for data transfer requires a CUDA device, or an Intel XPU device.") + + if not use_stream and record_stream: + raise ValueError("`record_stream` cannot be True when `use_stream=False`.") + if offload_type == GroupOffloadingType.BLOCK_LEVEL and num_blocks_per_group is None: + raise ValueError("`num_blocks_per_group` must be provided when using `offload_type='block_level'.") + + _raise_error_if_accelerate_model_or_sequential_hook_present(module) + + if block_modules is None: + block_modules = getattr(module, "_group_offload_block_modules", None) + + if exclude_kwargs is None: + exclude_kwargs = getattr(module, "_skip_keys", None) + + config = GroupOffloadingConfig( + onload_device=onload_device, + offload_device=offload_device, + offload_type=offload_type, + num_blocks_per_group=num_blocks_per_group, + non_blocking=non_blocking, + stream=stream, + record_stream=record_stream, + low_cpu_mem_usage=low_cpu_mem_usage, + offload_to_disk_path=offload_to_disk_path, + block_modules=block_modules, + exclude_kwargs=exclude_kwargs, + ) + _apply_group_offloading(module, config) + + +def _apply_group_offloading(module: torch.nn.Module, config: GroupOffloadingConfig) -> None: + if config.offload_type == GroupOffloadingType.BLOCK_LEVEL: + _apply_group_offloading_block_level(module, config) + elif config.offload_type == GroupOffloadingType.LEAF_LEVEL: + _apply_group_offloading_leaf_level(module, config) + else: + assert False + + +def _apply_group_offloading_block_level(module: torch.nn.Module, config: GroupOffloadingConfig) -> None: + r""" + This function applies offloading to groups of torch.nn.ModuleList or torch.nn.Sequential blocks, and explicitly + defined block modules. In comparison to the "leaf_level" offloading, which is more fine-grained, this offloading is + done at the top-level blocks and modules specified in block_modules. + + When block_modules is provided, only those modules will be treated as blocks for offloading. For each specified + module, recursively apply block offloading to it. + """ + if config.stream is not None and config.num_blocks_per_group != 1: + logger.warning( + f"Using streams is only supported for num_blocks_per_group=1. Got {config.num_blocks_per_group=}. Setting it to 1." + ) + config.num_blocks_per_group = 1 + + block_modules = set(config.block_modules) if config.block_modules is not None else set() + + # Create module groups for ModuleList and Sequential blocks, and explicitly defined block modules + modules_with_group_offloading = set() + unmatched_modules = [] + matched_module_groups = [] + + for name, submodule in module.named_children(): + # Check if this is an explicitly defined block module + if name in block_modules: + # Track submodule using a prefix to avoid filename collisions during disk offload. + # Without this, submodules sharing the same model class would be assigned identical + # filenames (derived from the class name). + prefix = f"{config.module_prefix}{name}." if config.module_prefix else f"{name}." + submodule_config = replace(config, module_prefix=prefix) + + _apply_group_offloading_block_level(submodule, submodule_config) + modules_with_group_offloading.add(name) + + elif isinstance(submodule, (torch.nn.ModuleList, torch.nn.Sequential)): + # Handle ModuleList and Sequential blocks as before + for i in range(0, len(submodule), config.num_blocks_per_group): + current_modules = list(submodule[i : i + config.num_blocks_per_group]) + if len(current_modules) == 0: + continue + + group_id = f"{config.module_prefix}{name}_{i}_{i + len(current_modules) - 1}" + group = ModuleGroup( + modules=current_modules, + offload_device=config.offload_device, + onload_device=config.onload_device, + offload_to_disk_path=config.offload_to_disk_path, + offload_leader=current_modules[-1], + onload_leader=current_modules[0], + non_blocking=config.non_blocking, + stream=config.stream, + record_stream=config.record_stream, + low_cpu_mem_usage=config.low_cpu_mem_usage, + onload_self=True, + group_id=group_id, + ) + matched_module_groups.append(group) + for j in range(i, i + len(current_modules)): + modules_with_group_offloading.add(f"{name}.{j}") + else: + # This is an unmatched module + unmatched_modules.append((name, submodule)) + + # Apply group offloading hooks to the module groups + for i, group in enumerate(matched_module_groups): + for group_module in group.modules: + _apply_group_offloading_hook(group_module, group, config=config) + + # Parameters and Buffers of the top-level module need to be offloaded/onloaded separately + # when the forward pass of this module is called. This is because the top-level module is not + # part of any group (as doing so would lead to no VRAM savings). + parameters = _gather_parameters_with_no_group_offloading_parent(module, modules_with_group_offloading) + buffers = _gather_buffers_with_no_group_offloading_parent(module, modules_with_group_offloading) + parameters = [param for _, param in parameters] + buffers = [buffer for _, buffer in buffers] + + # Create a group for the remaining unmatched submodules of the top-level + # module so that they are on the correct device when the forward pass is called. + unmatched_modules = [unmatched_module for _, unmatched_module in unmatched_modules] + if len(unmatched_modules) > 0 or len(parameters) > 0 or len(buffers) > 0: + unmatched_group = ModuleGroup( + modules=unmatched_modules, + offload_device=config.offload_device, + onload_device=config.onload_device, + offload_to_disk_path=config.offload_to_disk_path, + offload_leader=module, + onload_leader=module, + parameters=parameters, + buffers=buffers, + non_blocking=False, + stream=None, + record_stream=False, + onload_self=True, + group_id=f"{config.module_prefix}{module.__class__.__name__}_unmatched_group", + ) + if config.stream is None: + _apply_group_offloading_hook(module, unmatched_group, config=config) + else: + _apply_lazy_group_offloading_hook(module, unmatched_group, config=config) + + +def _apply_group_offloading_leaf_level(module: torch.nn.Module, config: GroupOffloadingConfig) -> None: + r""" + This function applies offloading to groups of leaf modules in a torch.nn.Module. This method has minimal memory + requirements. However, it can be slower compared to other offloading methods due to the excessive number of device + synchronizations. When using devices that support streams to overlap data transfer and computation, this method can + reduce memory usage without any performance degradation. + """ + # Create module groups for leaf modules and apply group offloading hooks + modules_with_group_offloading = set() + for name, submodule in module.named_modules(): + if not isinstance(submodule, _GO_LC_SUPPORTED_PYTORCH_LAYERS): + continue + group = ModuleGroup( + modules=[submodule], + offload_device=config.offload_device, + onload_device=config.onload_device, + offload_to_disk_path=config.offload_to_disk_path, + offload_leader=submodule, + onload_leader=submodule, + non_blocking=config.non_blocking, + stream=config.stream, + record_stream=config.record_stream, + low_cpu_mem_usage=config.low_cpu_mem_usage, + onload_self=True, + group_id=name, + ) + _apply_group_offloading_hook(submodule, group, config=config) + modules_with_group_offloading.add(name) + + # Parameters and Buffers at all non-leaf levels need to be offloaded/onloaded separately when the forward pass + # of the module is called + module_dict = dict(module.named_modules()) + parameters = _gather_parameters_with_no_group_offloading_parent(module, modules_with_group_offloading) + buffers = _gather_buffers_with_no_group_offloading_parent(module, modules_with_group_offloading) + + # Find closest module parent for each parameter and buffer, and attach group hooks + parent_to_parameters = {} + for name, param in parameters: + parent_name = _find_parent_module_in_module_dict(name, module_dict) + if parent_name in parent_to_parameters: + parent_to_parameters[parent_name].append(param) + else: + parent_to_parameters[parent_name] = [param] + + parent_to_buffers = {} + for name, buffer in buffers: + parent_name = _find_parent_module_in_module_dict(name, module_dict) + if parent_name in parent_to_buffers: + parent_to_buffers[parent_name].append(buffer) + else: + parent_to_buffers[parent_name] = [buffer] + + parent_names = set(parent_to_parameters.keys()) | set(parent_to_buffers.keys()) + for name in parent_names: + parameters = parent_to_parameters.get(name, []) + buffers = parent_to_buffers.get(name, []) + parent_module = module_dict[name] + group = ModuleGroup( + modules=[], + offload_device=config.offload_device, + onload_device=config.onload_device, + offload_leader=parent_module, + onload_leader=parent_module, + offload_to_disk_path=config.offload_to_disk_path, + parameters=parameters, + buffers=buffers, + non_blocking=config.non_blocking, + stream=config.stream, + record_stream=config.record_stream, + low_cpu_mem_usage=config.low_cpu_mem_usage, + onload_self=True, + group_id=name, + ) + _apply_group_offloading_hook(parent_module, group, config=config) + + if config.stream is not None: + # When using streams, we need to know the layer execution order for applying prefetching (to overlap data transfer + # and computation). Since we don't know the order beforehand, we apply a lazy prefetching hook that will find the + # execution order and apply prefetching in the correct order. + unmatched_group = ModuleGroup( + modules=[], + offload_device=config.offload_device, + onload_device=config.onload_device, + offload_to_disk_path=config.offload_to_disk_path, + offload_leader=module, + onload_leader=module, + parameters=None, + buffers=None, + non_blocking=False, + stream=None, + record_stream=False, + low_cpu_mem_usage=config.low_cpu_mem_usage, + onload_self=True, + group_id=_GROUP_ID_LAZY_LEAF, + ) + _apply_lazy_group_offloading_hook(module, unmatched_group, config=config) + + +def _apply_group_offloading_hook( + module: torch.nn.Module, + group: ModuleGroup, + *, + config: GroupOffloadingConfig, +) -> None: + registry = HookRegistry.check_if_exists_or_initialize(module) + + # We may have already registered a group offloading hook if the module had a torch.nn.Parameter whose parent + # is the current module. In such cases, we don't want to overwrite the existing group offloading hook. + if registry.get_hook(_GROUP_OFFLOADING) is None: + hook = GroupOffloadingHook(group, config=config) + registry.register_hook(hook, _GROUP_OFFLOADING) + + +def _apply_lazy_group_offloading_hook( + module: torch.nn.Module, + group: ModuleGroup, + *, + config: GroupOffloadingConfig, +) -> None: + registry = HookRegistry.check_if_exists_or_initialize(module) + + # We may have already registered a group offloading hook if the module had a torch.nn.Parameter whose parent + # is the current module. In such cases, we don't want to overwrite the existing group offloading hook. + if registry.get_hook(_GROUP_OFFLOADING) is None: + hook = GroupOffloadingHook(group, config=config) + registry.register_hook(hook, _GROUP_OFFLOADING) + + lazy_prefetch_hook = LazyPrefetchGroupOffloadingHook() + registry.register_hook(lazy_prefetch_hook, _LAZY_PREFETCH_GROUP_OFFLOADING) + + +def _gather_parameters_with_no_group_offloading_parent( + module: torch.nn.Module, modules_with_group_offloading: Set[str] +) -> list[torch.nn.Parameter]: + parameters = [] + for name, parameter in module.named_parameters(): + has_parent_with_group_offloading = False + atoms = name.split(".") + while len(atoms) > 0: + parent_name = ".".join(atoms) + if parent_name in modules_with_group_offloading: + has_parent_with_group_offloading = True + break + atoms.pop() + if not has_parent_with_group_offloading: + parameters.append((name, parameter)) + return parameters + + +def _gather_buffers_with_no_group_offloading_parent( + module: torch.nn.Module, modules_with_group_offloading: Set[str] +) -> list[torch.Tensor]: + buffers = [] + for name, buffer in module.named_buffers(): + has_parent_with_group_offloading = False + atoms = name.split(".") + while len(atoms) > 0: + parent_name = ".".join(atoms) + if parent_name in modules_with_group_offloading: + has_parent_with_group_offloading = True + break + atoms.pop() + if not has_parent_with_group_offloading: + buffers.append((name, buffer)) + return buffers + + +def _find_parent_module_in_module_dict(name: str, module_dict: dict[str, torch.nn.Module]) -> str: + atoms = name.split(".") + while len(atoms) > 0: + parent_name = ".".join(atoms) + if parent_name in module_dict: + return parent_name + atoms.pop() + return "" + + +def _raise_error_if_accelerate_model_or_sequential_hook_present(module: torch.nn.Module) -> None: + if not is_accelerate_available(): + return + for name, submodule in module.named_modules(): + if not hasattr(submodule, "_hf_hook"): + continue + if isinstance(submodule._hf_hook, (AlignDevicesHook, CpuOffload)): + raise ValueError( + f"Cannot apply group offloading to a module that is already applying an alternative " + f"offloading strategy from Accelerate. If you want to apply group offloading, please " + f"disable the existing offloading strategy first. Offending module: {name} ({type(submodule)})" + ) + + +def _get_top_level_group_offload_hook(module: torch.nn.Module) -> GroupOffloadingHook | None: + for submodule in module.modules(): + if hasattr(submodule, "_diffusers_hook"): + group_offloading_hook = submodule._diffusers_hook.get_hook(_GROUP_OFFLOADING) + if group_offloading_hook is not None: + return group_offloading_hook + return None + + +def _is_group_offload_enabled(module: torch.nn.Module) -> bool: + top_level_group_offload_hook = _get_top_level_group_offload_hook(module) + return top_level_group_offload_hook is not None + + +def _get_group_onload_device(module: torch.nn.Module) -> torch.device: + top_level_group_offload_hook = _get_top_level_group_offload_hook(module) + if top_level_group_offload_hook is not None: + return top_level_group_offload_hook.config.onload_device + raise ValueError("Group offloading is not enabled for the provided module.") + + +def _compute_group_hash(group_id): + hashed_id = hashlib.sha256(group_id.encode("utf-8")).hexdigest() + # first 16 characters for a reasonably short but unique name + return hashed_id[:16] + + +def _maybe_remove_and_reapply_group_offloading(module: torch.nn.Module) -> None: + r""" + Removes the group offloading hook from the module and re-applies it. This is useful when the module has been + modified in-place and the group offloading hook references-to-tensors needs to be updated. The in-place + modification can happen in a number of ways, for example, fusing QKV or unloading/loading LoRAs on-the-fly. + + In this implementation, we make an assumption that group offloading has only been applied at the top-level module, + and therefore all submodules have the same onload and offload devices. If this assumption is not true, say in the + case where user has applied group offloading at multiple levels, this function will not work as expected. + + There is some performance penalty associated with doing this when non-default streams are used, because we need to + retrace the execution order of the layers with `LazyPrefetchGroupOffloadingHook`. + """ + top_level_group_offload_hook = _get_top_level_group_offload_hook(module) + + if top_level_group_offload_hook is None: + return + + registry = HookRegistry.check_if_exists_or_initialize(module) + registry.remove_hook(_GROUP_OFFLOADING, recurse=True) + registry.remove_hook(_LAYER_EXECUTION_TRACKER, recurse=True) + registry.remove_hook(_LAZY_PREFETCH_GROUP_OFFLOADING, recurse=True) + + _apply_group_offloading(module, top_level_group_offload_hook.config) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/hooks.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..2d575b85427c65caff46d60e2004fa2745a22f76 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/hooks.py @@ -0,0 +1,291 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from typing import Any + +import torch + +from ..utils.logging import get_logger +from ..utils.torch_utils import unwrap_module + + +logger = get_logger(__name__) # pylint: disable=invalid-name + + +class BaseState: + def reset(self, *args, **kwargs) -> None: + raise NotImplementedError( + "BaseState::reset is not implemented. Please implement this method in the derived class." + ) + + +class StateManager: + def __init__(self, state_cls: BaseState, init_args=None, init_kwargs=None): + self._state_cls = state_cls + self._init_args = init_args if init_args is not None else () + self._init_kwargs = init_kwargs if init_kwargs is not None else {} + self._state_cache = {} + self._current_context = None + + def get_state(self): + if self._current_context is None: + raise ValueError("No context is set. Please set a context before retrieving the state.") + if self._current_context not in self._state_cache.keys(): + self._state_cache[self._current_context] = self._state_cls(*self._init_args, **self._init_kwargs) + return self._state_cache[self._current_context] + + def set_context(self, name: str) -> None: + self._current_context = name + + def reset(self, *args, **kwargs) -> None: + for name, state in list(self._state_cache.items()): + state.reset(*args, **kwargs) + self._state_cache.pop(name) + self._current_context = None + + +class ModelHook: + r""" + A hook that contains callbacks to be executed just before and after the forward method of a model. + """ + + _is_stateful = False + + def __init__(self): + self.fn_ref: "HookFunctionReference" = None + + def initialize_hook(self, module: torch.nn.Module) -> torch.nn.Module: + r""" + Hook that is executed when a model is initialized. + + Args: + module (`torch.nn.Module`): + The module attached to this hook. + """ + return module + + def deinitalize_hook(self, module: torch.nn.Module) -> torch.nn.Module: + r""" + Hook that is executed when a model is deinitialized. + + Args: + module (`torch.nn.Module`): + The module attached to this hook. + """ + return module + + def pre_forward(self, module: torch.nn.Module, *args, **kwargs) -> tuple[tuple[Any], dict[str, Any]]: + r""" + Hook that is executed just before the forward method of the model. + + Args: + module (`torch.nn.Module`): + The module whose forward pass will be executed just after this event. + args (`tuple[Any]`): + The positional arguments passed to the module. + kwargs (`dict[Str, Any]`): + The keyword arguments passed to the module. + Returns: + `tuple[tuple[Any], dict[Str, Any]]`: + A tuple with the treated `args` and `kwargs`. + """ + return args, kwargs + + def post_forward(self, module: torch.nn.Module, output: Any) -> Any: + r""" + Hook that is executed just after the forward method of the model. + + Args: + module (`torch.nn.Module`): + The module whose forward pass been executed just before this event. + output (`Any`): + The output of the module. + Returns: + `Any`: The processed `output`. + """ + return output + + def detach_hook(self, module: torch.nn.Module) -> torch.nn.Module: + r""" + Hook that is executed when the hook is detached from a module. + + Args: + module (`torch.nn.Module`): + The module detached from this hook. + """ + return module + + def reset_state(self, module: torch.nn.Module): + if self._is_stateful: + raise NotImplementedError("This hook is stateful and needs to implement the `reset_state` method.") + return module + + def _set_context(self, module: torch.nn.Module, name: str) -> None: + # Iterate over all attributes of the hook to see if any of them have the type `StateManager`. If so, call `set_context` on them. + for attr_name in dir(self): + attr = getattr(self, attr_name) + if isinstance(attr, StateManager): + attr.set_context(name) + return module + + +class HookFunctionReference: + def __init__(self) -> None: + """A container class that maintains mutable references to forward pass functions in a hook chain. + + Its mutable nature allows the hook system to modify the execution chain dynamically without rebuilding the + entire forward pass structure. + + Attributes: + pre_forward: A callable that processes inputs before the main forward pass. + post_forward: A callable that processes outputs after the main forward pass. + forward: The current forward function in the hook chain. + original_forward: The original forward function, stored when a hook provides a custom new_forward. + + The class enables hook removal by allowing updates to the forward chain through reference modification rather + than requiring reconstruction of the entire chain. When a hook is removed, only the relevant references need to + be updated, preserving the execution order of the remaining hooks. + """ + self.pre_forward = None + self.post_forward = None + self.forward = None + self.original_forward = None + + +class HookRegistry: + def __init__(self, module_ref: torch.nn.Module) -> None: + super().__init__() + + self.hooks: dict[str, ModelHook] = {} + + self._module_ref = module_ref + self._hook_order = [] + self._fn_refs = [] + + def register_hook(self, hook: ModelHook, name: str) -> None: + if name in self.hooks.keys(): + raise ValueError( + f"Hook with name {name} already exists in the registry. Please use a different name or " + f"first remove the existing hook and then add a new one." + ) + + self._module_ref = hook.initialize_hook(self._module_ref) + + def create_new_forward(function_reference: HookFunctionReference): + def new_forward(module, *args, **kwargs): + args, kwargs = function_reference.pre_forward(module, *args, **kwargs) + output = function_reference.forward(*args, **kwargs) + return function_reference.post_forward(module, output) + + return new_forward + + forward = self._module_ref.forward + + fn_ref = HookFunctionReference() + fn_ref.pre_forward = hook.pre_forward + fn_ref.post_forward = hook.post_forward + fn_ref.forward = forward + + if hasattr(hook, "new_forward"): + fn_ref.original_forward = forward + fn_ref.forward = functools.update_wrapper( + functools.partial(hook.new_forward, self._module_ref), hook.new_forward + ) + + rewritten_forward = create_new_forward(fn_ref) + self._module_ref.forward = functools.update_wrapper( + functools.partial(rewritten_forward, self._module_ref), rewritten_forward + ) + + hook.fn_ref = fn_ref + self.hooks[name] = hook + self._hook_order.append(name) + self._fn_refs.append(fn_ref) + + def get_hook(self, name: str) -> ModelHook | None: + return self.hooks.get(name, None) + + def remove_hook(self, name: str, recurse: bool = True) -> None: + if name in self.hooks.keys(): + num_hooks = len(self._hook_order) + hook = self.hooks[name] + index = self._hook_order.index(name) + fn_ref = self._fn_refs[index] + + old_forward = fn_ref.forward + if fn_ref.original_forward is not None: + old_forward = fn_ref.original_forward + + if index == num_hooks - 1: + self._module_ref.forward = old_forward + else: + self._fn_refs[index + 1].forward = old_forward + + self._module_ref = hook.deinitalize_hook(self._module_ref) + del self.hooks[name] + self._hook_order.pop(index) + self._fn_refs.pop(index) + + if recurse: + for module_name, module in self._module_ref.named_modules(): + if module_name == "": + continue + if hasattr(module, "_diffusers_hook"): + module._diffusers_hook.remove_hook(name, recurse=False) + + def reset_stateful_hooks(self, recurse: bool = True) -> None: + for hook_name in reversed(self._hook_order): + hook = self.hooks[hook_name] + if hook._is_stateful: + hook.reset_state(self._module_ref) + + if recurse: + for module_name, module in unwrap_module(self._module_ref).named_modules(): + if module_name == "": + continue + module = unwrap_module(module) + if hasattr(module, "_diffusers_hook"): + module._diffusers_hook.reset_stateful_hooks(recurse=False) + + @classmethod + def check_if_exists_or_initialize(cls, module: torch.nn.Module) -> "HookRegistry": + if not hasattr(module, "_diffusers_hook"): + module._diffusers_hook = cls(module) + return module._diffusers_hook + + def _set_context(self, name: str | None = None) -> None: + for hook_name in reversed(self._hook_order): + hook = self.hooks[hook_name] + if hook._is_stateful: + hook._set_context(self._module_ref, name) + + for module_name, module in unwrap_module(self._module_ref).named_modules(): + if module_name == "": + continue + module = unwrap_module(module) + if hasattr(module, "_diffusers_hook"): + module._diffusers_hook._set_context(name) + + def __repr__(self) -> str: + registry_repr = "" + for i, hook_name in enumerate(self._hook_order): + if self.hooks[hook_name].__class__.__repr__ is not object.__repr__: + hook_repr = self.hooks[hook_name].__repr__() + else: + hook_repr = self.hooks[hook_name].__class__.__name__ + registry_repr += f" ({i}) {hook_name} - {hook_repr}" + if i < len(self._hook_order) - 1: + registry_repr += "\n" + return f"HookRegistry(\n{registry_repr}\n)" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/layer_skip.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/layer_skip.py new file mode 100644 index 0000000000000000000000000000000000000000..112edfa2f79bbf1f91bb944ffd5a0c1ccbe4134b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/layer_skip.py @@ -0,0 +1,263 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import asdict, dataclass +from typing import Callable + +import torch + +from ..utils import get_logger +from ..utils.torch_utils import unwrap_module +from ._common import ( + _ALL_TRANSFORMER_BLOCK_IDENTIFIERS, + _ATTENTION_CLASSES, + _FEEDFORWARD_CLASSES, + _get_submodule_from_fqn, +) +from ._helpers import AttentionProcessorRegistry, TransformerBlockRegistry +from .hooks import HookRegistry, ModelHook + + +logger = get_logger(__name__) # pylint: disable=invalid-name + +_LAYER_SKIP_HOOK = "layer_skip_hook" + + +# Aryan/YiYi TODO: we need to make guider class a config mixin so I think this is not needed +# either remove or make it serializable +@dataclass +class LayerSkipConfig: + r""" + Configuration for skipping internal transformer blocks when executing a transformer model. + + Args: + indices (`list[int]`): + The indices of the layer to skip. This is typically the first layer in the transformer block. + fqn (`str`, defaults to `"auto"`): + The fully qualified name identifying the stack of transformer blocks. Typically, this is + `transformer_blocks`, `single_transformer_blocks`, `blocks`, `layers`, or `temporal_transformer_blocks`. + For automatic detection, set this to `"auto"`. "auto" only works on DiT models. For UNet models, you must + provide the correct fqn. + skip_attention (`bool`, defaults to `True`): + Whether to skip attention blocks. + skip_ff (`bool`, defaults to `True`): + Whether to skip feed-forward blocks. + skip_attention_scores (`bool`, defaults to `False`): + Whether to skip attention score computation in the attention blocks. This is equivalent to using `value` + projections as the output of scaled dot product attention. + dropout (`float`, defaults to `1.0`): + The dropout probability for dropping the outputs of the skipped layers. By default, this is set to `1.0`, + meaning that the outputs of the skipped layers are completely ignored. If set to `0.0`, the outputs of the + skipped layers are fully retained, which is equivalent to not skipping any layers. + """ + + indices: list[int] + fqn: str = "auto" + skip_attention: bool = True + skip_attention_scores: bool = False + skip_ff: bool = True + dropout: float = 1.0 + + def __post_init__(self): + if not (0 <= self.dropout <= 1): + raise ValueError(f"Expected `dropout` to be between 0.0 and 1.0, but got {self.dropout}.") + if not math.isclose(self.dropout, 1.0) and self.skip_attention_scores: + raise ValueError( + "Cannot set `skip_attention_scores` to True when `dropout` is not 1.0. Please set `dropout` to 1.0." + ) + + def to_dict(self): + return asdict(self) + + @staticmethod + def from_dict(data: dict) -> "LayerSkipConfig": + return LayerSkipConfig(**data) + + +class AttentionScoreSkipFunctionMode(torch.overrides.TorchFunctionMode): + def __torch_function__(self, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + if func is torch.nn.functional.scaled_dot_product_attention: + query = kwargs.get("query", None) + key = kwargs.get("key", None) + value = kwargs.get("value", None) + query = query if query is not None else args[0] + key = key if key is not None else args[1] + value = value if value is not None else args[2] + # If the Q sequence length does not match KV sequence length, methods like + # Perturbed Attention Guidance cannot be used (because the caller expects + # the same sequence length as Q, but if we return V here, it will not match). + # When Q.shape[2] != V.shape[2], PAG will essentially not be applied and + # the overall effect would that be of normal CFG with a scale of (guidance_scale + perturbed_guidance_scale). + if query.shape[2] == value.shape[2]: + return value + return func(*args, **kwargs) + + +class AttentionProcessorSkipHook(ModelHook): + def __init__(self, skip_processor_output_fn: Callable, skip_attention_scores: bool = False, dropout: float = 1.0): + self.skip_processor_output_fn = skip_processor_output_fn + self.skip_attention_scores = skip_attention_scores + self.dropout = dropout + + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + if self.skip_attention_scores: + if not math.isclose(self.dropout, 1.0): + raise ValueError( + "Cannot set `skip_attention_scores` to True when `dropout` is not 1.0. Please set `dropout` to 1.0." + ) + with AttentionScoreSkipFunctionMode(): + output = self.fn_ref.original_forward(*args, **kwargs) + else: + if math.isclose(self.dropout, 1.0): + output = self.skip_processor_output_fn(module, *args, **kwargs) + else: + output = self.fn_ref.original_forward(*args, **kwargs) + output = torch.nn.functional.dropout(output, p=self.dropout) + return output + + +class FeedForwardSkipHook(ModelHook): + def __init__(self, dropout: float): + super().__init__() + self.dropout = dropout + + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + if math.isclose(self.dropout, 1.0): + output = kwargs.get("hidden_states", None) + if output is None: + output = kwargs.get("x", None) + if output is None and len(args) > 0: + output = args[0] + else: + output = self.fn_ref.original_forward(*args, **kwargs) + output = torch.nn.functional.dropout(output, p=self.dropout) + return output + + +class TransformerBlockSkipHook(ModelHook): + def __init__(self, dropout: float): + super().__init__() + self.dropout = dropout + + def initialize_hook(self, module): + self._metadata = TransformerBlockRegistry.get(unwrap_module(module).__class__) + return module + + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + if math.isclose(self.dropout, 1.0): + original_hidden_states = self._metadata._get_parameter_from_args_kwargs("hidden_states", args, kwargs) + if self._metadata.return_encoder_hidden_states_index is None: + output = original_hidden_states + else: + original_encoder_hidden_states = self._metadata._get_parameter_from_args_kwargs( + "encoder_hidden_states", args, kwargs + ) + output = (original_hidden_states, original_encoder_hidden_states) + else: + output = self.fn_ref.original_forward(*args, **kwargs) + output = torch.nn.functional.dropout(output, p=self.dropout) + return output + + +def apply_layer_skip(module: torch.nn.Module, config: LayerSkipConfig) -> None: + r""" + Apply layer skipping to internal layers of a transformer. + + Args: + module (`torch.nn.Module`): + The transformer model to which the layer skip hook should be applied. + config (`LayerSkipConfig`): + The configuration for the layer skip hook. + + Example: + + ```python + >>> from diffusers import apply_layer_skip_hook, CogVideoXTransformer3DModel, LayerSkipConfig + + >>> transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16) + >>> config = LayerSkipConfig(layer_index=[10, 20], fqn="transformer_blocks") + >>> apply_layer_skip_hook(transformer, config) + ``` + """ + _apply_layer_skip_hook(module, config) + + +def _apply_layer_skip_hook(module: torch.nn.Module, config: LayerSkipConfig, name: str | None = None) -> None: + name = name or _LAYER_SKIP_HOOK + + if config.skip_attention and config.skip_attention_scores: + raise ValueError("Cannot set both `skip_attention` and `skip_attention_scores` to True. Please choose one.") + if not math.isclose(config.dropout, 1.0) and config.skip_attention_scores: + raise ValueError( + "Cannot set `skip_attention_scores` to True when `dropout` is not 1.0. Please set `dropout` to 1.0." + ) + + if config.fqn == "auto": + for identifier in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS: + if hasattr(module, identifier): + config.fqn = identifier + break + else: + raise ValueError( + "Could not find a suitable identifier for the transformer blocks automatically. Please provide a valid " + "`fqn` (fully qualified name) that identifies a stack of transformer blocks." + ) + + transformer_blocks = _get_submodule_from_fqn(module, config.fqn) + if transformer_blocks is None or not isinstance(transformer_blocks, torch.nn.ModuleList): + raise ValueError( + f"Could not find {config.fqn} in the provided module, or configured `fqn` (fully qualified name) does not identify " + f"a `torch.nn.ModuleList`. Please provide a valid `fqn` that identifies a stack of transformer blocks." + ) + if len(config.indices) == 0: + raise ValueError("Layer index list is empty. Please provide a non-empty list of layer indices to skip.") + + blocks_found = False + for i, block in enumerate(transformer_blocks): + if i not in config.indices: + continue + + blocks_found = True + + if config.skip_attention and config.skip_ff: + logger.debug(f"Applying TransformerBlockSkipHook to '{config.fqn}.{i}'") + registry = HookRegistry.check_if_exists_or_initialize(block) + hook = TransformerBlockSkipHook(config.dropout) + registry.register_hook(hook, name) + + elif config.skip_attention or config.skip_attention_scores: + for submodule_name, submodule in block.named_modules(): + if isinstance(submodule, _ATTENTION_CLASSES) and not submodule.is_cross_attention: + logger.debug(f"Applying AttentionProcessorSkipHook to '{config.fqn}.{i}.{submodule_name}'") + output_fn = AttentionProcessorRegistry.get(submodule.processor.__class__).skip_processor_output_fn + registry = HookRegistry.check_if_exists_or_initialize(submodule) + hook = AttentionProcessorSkipHook(output_fn, config.skip_attention_scores, config.dropout) + registry.register_hook(hook, name) + + if config.skip_ff: + for submodule_name, submodule in block.named_modules(): + if isinstance(submodule, _FEEDFORWARD_CLASSES): + logger.debug(f"Applying FeedForwardSkipHook to '{config.fqn}.{i}.{submodule_name}'") + registry = HookRegistry.check_if_exists_or_initialize(submodule) + hook = FeedForwardSkipHook(config.dropout) + registry.register_hook(hook, name) + + if not blocks_found: + raise ValueError( + f"Could not find any transformer blocks matching the provided indices {config.indices} and " + f"fully qualified name '{config.fqn}'. Please check the indices and fqn for correctness." + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/layerwise_casting.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/layerwise_casting.py new file mode 100644 index 0000000000000000000000000000000000000000..1edff7805fcc36eda54d1be3a7153b50f332f618 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/layerwise_casting.py @@ -0,0 +1,240 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from typing import Type + +import torch + +from ..utils import get_logger, is_peft_available, is_peft_version +from ._common import _GO_LC_SUPPORTED_PYTORCH_LAYERS +from .hooks import HookRegistry, ModelHook + + +logger = get_logger(__name__) # pylint: disable=invalid-name + + +# fmt: off +_LAYERWISE_CASTING_HOOK = "layerwise_casting" +_PEFT_AUTOCAST_DISABLE_HOOK = "peft_autocast_disable" +DEFAULT_SKIP_MODULES_PATTERN = ("pos_embed", "patch_embed", "norm", "^proj_in$", "^proj_out$") +# fmt: on + +_SHOULD_DISABLE_PEFT_INPUT_AUTOCAST = is_peft_available() and is_peft_version(">", "0.14.0") +if _SHOULD_DISABLE_PEFT_INPUT_AUTOCAST: + from peft.helpers import disable_input_dtype_casting + from peft.tuners.tuners_utils import BaseTunerLayer + + +class LayerwiseCastingHook(ModelHook): + r""" + A hook that casts the weights of a module to a high precision dtype for computation, and to a low precision dtype + for storage. This process may lead to quality loss in the output, but can significantly reduce the memory + footprint. + """ + + _is_stateful = False + + def __init__(self, storage_dtype: torch.dtype, compute_dtype: torch.dtype, non_blocking: bool) -> None: + self.storage_dtype = storage_dtype + self.compute_dtype = compute_dtype + self.non_blocking = non_blocking + + def initialize_hook(self, module: torch.nn.Module): + module.to(dtype=self.storage_dtype, non_blocking=self.non_blocking) + return module + + def deinitalize_hook(self, module: torch.nn.Module): + raise NotImplementedError( + "LayerwiseCastingHook does not support deinitialization. A model once enabled with layerwise casting will " + "have casted its weights to a lower precision dtype for storage. Casting this back to the original dtype " + "will lead to precision loss, which might have an impact on the model's generation quality. The model should " + "be re-initialized and loaded in the original dtype." + ) + + def pre_forward(self, module: torch.nn.Module, *args, **kwargs): + module.to(dtype=self.compute_dtype, non_blocking=self.non_blocking) + return args, kwargs + + def post_forward(self, module: torch.nn.Module, output): + module.to(dtype=self.storage_dtype, non_blocking=self.non_blocking) + return output + + +class PeftInputAutocastDisableHook(ModelHook): + r""" + A hook that disables the casting of inputs to the module weight dtype during the forward pass. By default, PEFT + casts the inputs to the weight dtype of the module, which can lead to precision loss. + + The reasons for needing this are: + - If we don't add PEFT layers' weight names to `skip_modules_pattern` when applying layerwise casting, the + inputs will be casted to the, possibly lower precision, storage dtype. Reference: + https://github.com/huggingface/peft/blob/0facdebf6208139cbd8f3586875acb378813dd97/src/peft/tuners/lora/layer.py#L706 + - We can, on our end, use something like accelerate's `send_to_device` but for dtypes. This way, we can ensure + that the inputs are casted to the computation dtype correctly always. However, there are two goals we are + hoping to achieve: + 1. Making forward implementations independent of device/dtype casting operations as much as possible. + 2. Performing inference without losing information from casting to different precisions. With the current + PEFT implementation (as linked in the reference above), and assuming running layerwise casting inference + with storage_dtype=torch.float8_e4m3fn and compute_dtype=torch.bfloat16, inputs are cast to + torch.float8_e4m3fn in the lora layer. We will then upcast back to torch.bfloat16 when we continue the + forward pass in PEFT linear forward or Diffusers layer forward, with a `send_to_dtype` operation from + LayerwiseCastingHook. This will be a lossy operation and result in poorer generation quality. + """ + + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + with disable_input_dtype_casting(module): + return self.fn_ref.original_forward(*args, **kwargs) + + +def apply_layerwise_casting( + module: torch.nn.Module, + storage_dtype: torch.dtype, + compute_dtype: torch.dtype, + skip_modules_pattern: str | tuple[str, ...] = "auto", + skip_modules_classes: tuple[Type[torch.nn.Module], ...] | None = None, + non_blocking: bool = False, +) -> None: + r""" + Applies layerwise casting to a given module. The module expected here is a Diffusers ModelMixin but it can be any + nn.Module using diffusers layers or pytorch primitives. + + Example: + + ```python + >>> import torch + >>> from diffusers import CogVideoXTransformer3DModel + + >>> transformer = CogVideoXTransformer3DModel.from_pretrained( + ... model_id, subfolder="transformer", torch_dtype=torch.bfloat16 + ... ) + + >>> apply_layerwise_casting( + ... transformer, + ... storage_dtype=torch.float8_e4m3fn, + ... compute_dtype=torch.bfloat16, + ... skip_modules_pattern=["patch_embed", "norm", "proj_out"], + ... non_blocking=True, + ... ) + ``` + + Args: + module (`torch.nn.Module`): + The module whose leaf modules will be cast to a high precision dtype for computation, and to a low + precision dtype for storage. + storage_dtype (`torch.dtype`): + The dtype to cast the module to before/after the forward pass for storage. + compute_dtype (`torch.dtype`): + The dtype to cast the module to during the forward pass for computation. + skip_modules_pattern (`tuple[str, ...]`, defaults to `"auto"`): + A list of patterns to match the names of the modules to skip during the layerwise casting process. If set + to `"auto"`, the default patterns are used. If set to `None`, no modules are skipped. If set to `None` + alongside `skip_modules_classes` being `None`, the layerwise casting is applied directly to the module + instead of its internal submodules. + skip_modules_classes (`tuple[Type[torch.nn.Module], ...]`, defaults to `None`): + A list of module classes to skip during the layerwise casting process. + non_blocking (`bool`, defaults to `False`): + If `True`, the weight casting operations are non-blocking. + """ + if skip_modules_pattern == "auto": + skip_modules_pattern = DEFAULT_SKIP_MODULES_PATTERN + + if skip_modules_classes is None and skip_modules_pattern is None: + apply_layerwise_casting_hook(module, storage_dtype, compute_dtype, non_blocking) + return + + _apply_layerwise_casting( + module, + storage_dtype, + compute_dtype, + skip_modules_pattern, + skip_modules_classes, + non_blocking, + ) + _disable_peft_input_autocast(module) + + +def _apply_layerwise_casting( + module: torch.nn.Module, + storage_dtype: torch.dtype, + compute_dtype: torch.dtype, + skip_modules_pattern: tuple[str, ...] | None = None, + skip_modules_classes: tuple[Type[torch.nn.Module], ...] | None = None, + non_blocking: bool = False, + _prefix: str = "", +) -> None: + should_skip = (skip_modules_classes is not None and isinstance(module, skip_modules_classes)) or ( + skip_modules_pattern is not None and any(re.search(pattern, _prefix) for pattern in skip_modules_pattern) + ) + if should_skip: + logger.debug(f'Skipping layerwise casting for layer "{_prefix}"') + return + + if isinstance(module, _GO_LC_SUPPORTED_PYTORCH_LAYERS): + logger.debug(f'Applying layerwise casting to layer "{_prefix}"') + apply_layerwise_casting_hook(module, storage_dtype, compute_dtype, non_blocking) + return + + for name, submodule in module.named_children(): + layer_name = f"{_prefix}.{name}" if _prefix else name + _apply_layerwise_casting( + submodule, + storage_dtype, + compute_dtype, + skip_modules_pattern, + skip_modules_classes, + non_blocking, + _prefix=layer_name, + ) + + +def apply_layerwise_casting_hook( + module: torch.nn.Module, storage_dtype: torch.dtype, compute_dtype: torch.dtype, non_blocking: bool +) -> None: + r""" + Applies a `LayerwiseCastingHook` to a given module. + + Args: + module (`torch.nn.Module`): + The module to attach the hook to. + storage_dtype (`torch.dtype`): + The dtype to cast the module to before the forward pass. + compute_dtype (`torch.dtype`): + The dtype to cast the module to during the forward pass. + non_blocking (`bool`): + If `True`, the weight casting operations are non-blocking. + """ + registry = HookRegistry.check_if_exists_or_initialize(module) + hook = LayerwiseCastingHook(storage_dtype, compute_dtype, non_blocking) + registry.register_hook(hook, _LAYERWISE_CASTING_HOOK) + + +def _is_layerwise_casting_active(module: torch.nn.Module) -> bool: + for submodule in module.modules(): + if ( + hasattr(submodule, "_diffusers_hook") + and submodule._diffusers_hook.get_hook(_LAYERWISE_CASTING_HOOK) is not None + ): + return True + return False + + +def _disable_peft_input_autocast(module: torch.nn.Module) -> None: + if not _SHOULD_DISABLE_PEFT_INPUT_AUTOCAST: + return + for submodule in module.modules(): + if isinstance(submodule, BaseTunerLayer) and _is_layerwise_casting_active(submodule): + registry = HookRegistry.check_if_exists_or_initialize(submodule) + hook = PeftInputAutocastDisableHook() + registry.register_hook(hook, _PEFT_AUTOCAST_DISABLE_HOOK) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/mag_cache.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/mag_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..d28cd2d793b61e5a8d0496614bb4be445a3057e2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/mag_cache.py @@ -0,0 +1,468 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import List, Optional, Tuple, Union + +import torch + +from ..utils import get_logger +from ..utils.torch_utils import unwrap_module +from ._common import _ALL_TRANSFORMER_BLOCK_IDENTIFIERS +from ._helpers import TransformerBlockRegistry +from .hooks import BaseState, HookRegistry, ModelHook, StateManager + + +logger = get_logger(__name__) # pylint: disable=invalid-name + +_MAG_CACHE_LEADER_BLOCK_HOOK = "mag_cache_leader_block_hook" +_MAG_CACHE_BLOCK_HOOK = "mag_cache_block_hook" + +# Default Mag Ratios for Flux models (Dev/Schnell) are provided for convenience. +# Users must explicitly pass these to the config if using Flux. +# Reference: https://github.com/Zehong-Ma/MagCache +FLUX_MAG_RATIOS = torch.tensor( + [1.0] + + [ + 1.21094, + 1.11719, + 1.07812, + 1.0625, + 1.03906, + 1.03125, + 1.03906, + 1.02344, + 1.03125, + 1.02344, + 0.98047, + 1.01562, + 1.00781, + 1.0, + 1.00781, + 1.0, + 1.00781, + 1.0, + 1.0, + 0.99609, + 0.99609, + 0.98047, + 0.98828, + 0.96484, + 0.95703, + 0.93359, + 0.89062, + ] +) + + +def nearest_interp(src_array: torch.Tensor, target_length: int) -> torch.Tensor: + """ + Interpolate the source array to the target length using nearest neighbor interpolation. + """ + src_length = len(src_array) + if target_length == 1: + return src_array[-1:] + + scale = (src_length - 1) / (target_length - 1) + grid = torch.arange(target_length, device=src_array.device, dtype=torch.float32) + mapped_indices = torch.round(grid * scale).long() + return src_array[mapped_indices] + + +@dataclass +class MagCacheConfig: + r""" + Configuration for [MagCache](https://github.com/Zehong-Ma/MagCache). + + Args: + threshold (`float`, defaults to `0.06`): + The threshold for the accumulated error. If the accumulated error is below this threshold, the block + computation is skipped. A higher threshold allows for more aggressive skipping (faster) but may degrade + quality. + max_skip_steps (`int`, defaults to `3`): + The maximum number of consecutive steps that can be skipped (K in the paper). + retention_ratio (`float`, defaults to `0.2`): + The fraction of initial steps during which skipping is disabled to ensure stability. For example, if + `num_inference_steps` is 28 and `retention_ratio` is 0.2, the first 6 steps will never be skipped. + num_inference_steps (`int`, defaults to `28`): + The number of inference steps used in the pipeline. This is required to interpolate `mag_ratios` correctly. + mag_ratios (`torch.Tensor`, *optional*): + The pre-computed magnitude ratios for the model. These are checkpoint-dependent. If not provided, you must + set `calibrate=True` to calculate them for your specific model. For Flux models, you can use + `diffusers.hooks.mag_cache.FLUX_MAG_RATIOS`. + calibrate (`bool`, defaults to `False`): + If True, enables calibration mode. In this mode, no blocks are skipped. Instead, the hook calculates the + magnitude ratios for the current run and logs them at the end. Use this to obtain `mag_ratios` for new + models or schedulers. + """ + + threshold: float = 0.06 + max_skip_steps: int = 3 + retention_ratio: float = 0.2 + num_inference_steps: int = 28 + mag_ratios: Optional[Union[torch.Tensor, List[float]]] = None + calibrate: bool = False + + def __post_init__(self): + # User MUST provide ratios OR enable calibration. + if self.mag_ratios is None and not self.calibrate: + raise ValueError( + " `mag_ratios` must be provided for MagCache inference because these ratios are model-dependent.\n" + "To get them for your model:\n" + "1. Initialize `MagCacheConfig(calibrate=True, ...)`\n" + "2. Run inference on your model once.\n" + "3. Copy the printed ratios array and pass it to `mag_ratios` in the config.\n" + "For Flux models, you can import `FLUX_MAG_RATIOS` from `diffusers.hooks.mag_cache`." + ) + + if not self.calibrate and self.mag_ratios is not None: + if not torch.is_tensor(self.mag_ratios): + self.mag_ratios = torch.tensor(self.mag_ratios) + + if len(self.mag_ratios) != self.num_inference_steps: + logger.debug( + f"Interpolating mag_ratios from length {len(self.mag_ratios)} to {self.num_inference_steps}" + ) + self.mag_ratios = nearest_interp(self.mag_ratios, self.num_inference_steps) + + +class MagCacheState(BaseState): + def __init__(self) -> None: + super().__init__() + # Cache for the residual (output - input) from the *previous* timestep + self.previous_residual: torch.Tensor = None + + # State inputs/outputs for the current forward pass + self.head_block_input: Union[torch.Tensor, Tuple[torch.Tensor, ...]] = None + self.should_compute: bool = True + + # MagCache accumulators + self.accumulated_ratio: float = 1.0 + self.accumulated_err: float = 0.0 + self.accumulated_steps: int = 0 + + # Current step counter (timestep index) + self.step_index: int = 0 + + # Calibration storage + self.calibration_ratios: List[float] = [] + + def reset(self): + self.previous_residual = None + self.should_compute = True + self.accumulated_ratio = 1.0 + self.accumulated_err = 0.0 + self.accumulated_steps = 0 + self.step_index = 0 + self.calibration_ratios = [] + + +class MagCacheHeadHook(ModelHook): + _is_stateful = True + + def __init__(self, state_manager: StateManager, config: MagCacheConfig): + self.state_manager = state_manager + self.config = config + self._metadata = None + + def initialize_hook(self, module): + unwrapped_module = unwrap_module(module) + self._metadata = TransformerBlockRegistry.get(unwrapped_module.__class__) + return module + + @torch.compiler.disable + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + if self.state_manager._current_context is None: + self.state_manager.set_context("inference") + + arg_name = self._metadata.hidden_states_argument_name + hidden_states = self._metadata._get_parameter_from_args_kwargs(arg_name, args, kwargs) + + state: MagCacheState = self.state_manager.get_state() + state.head_block_input = hidden_states + + should_compute = True + + if self.config.calibrate: + # Never skip during calibration + should_compute = True + else: + # MagCache Logic + current_step = state.step_index + if current_step >= len(self.config.mag_ratios): + current_scale = 1.0 + else: + current_scale = self.config.mag_ratios[current_step] + + retention_step = int(self.config.retention_ratio * self.config.num_inference_steps + 0.5) + + if current_step >= retention_step: + state.accumulated_ratio *= current_scale + state.accumulated_steps += 1 + state.accumulated_err += abs(1.0 - state.accumulated_ratio) + + if ( + state.previous_residual is not None + and state.accumulated_err <= self.config.threshold + and state.accumulated_steps <= self.config.max_skip_steps + ): + should_compute = False + else: + state.accumulated_ratio = 1.0 + state.accumulated_steps = 0 + state.accumulated_err = 0.0 + + state.should_compute = should_compute + + if not should_compute: + logger.debug(f"MagCache: Skipping step {state.step_index}") + # Apply MagCache: Output = Input + Previous Residual + + output = hidden_states + res = state.previous_residual + + if res.device != output.device: + res = res.to(output.device) + + # Attempt to apply residual handling shape mismatches (e.g., text+image vs image only) + if res.shape == output.shape: + output = output + res + elif ( + output.ndim == 3 + and res.ndim == 3 + and output.shape[0] == res.shape[0] + and output.shape[2] == res.shape[2] + ): + # Assuming concatenation where image part is at the end (standard in Flux/SD3) + diff = output.shape[1] - res.shape[1] + if diff > 0: + output = output.clone() + output[:, diff:, :] = output[:, diff:, :] + res + else: + logger.warning( + f"MagCache: Dimension mismatch. Input {output.shape}, Residual {res.shape}. " + "Cannot apply residual safely. Returning input without residual." + ) + else: + logger.warning( + f"MagCache: Dimension mismatch. Input {output.shape}, Residual {res.shape}. " + "Cannot apply residual safely. Returning input without residual." + ) + + if self._metadata.return_encoder_hidden_states_index is not None: + original_encoder_hidden_states = self._metadata._get_parameter_from_args_kwargs( + "encoder_hidden_states", args, kwargs + ) + max_idx = max( + self._metadata.return_hidden_states_index, self._metadata.return_encoder_hidden_states_index + ) + ret_list = [None] * (max_idx + 1) + ret_list[self._metadata.return_hidden_states_index] = output + ret_list[self._metadata.return_encoder_hidden_states_index] = original_encoder_hidden_states + return tuple(ret_list) + else: + return output + + else: + # Compute original forward + output = self.fn_ref.original_forward(*args, **kwargs) + return output + + def reset_state(self, module): + self.state_manager.reset() + return module + + +class MagCacheBlockHook(ModelHook): + def __init__(self, state_manager: StateManager, is_tail: bool = False, config: MagCacheConfig = None): + super().__init__() + self.state_manager = state_manager + self.is_tail = is_tail + self.config = config + self._metadata = None + + def initialize_hook(self, module): + unwrapped_module = unwrap_module(module) + self._metadata = TransformerBlockRegistry.get(unwrapped_module.__class__) + return module + + @torch.compiler.disable + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + if self.state_manager._current_context is None: + self.state_manager.set_context("inference") + state: MagCacheState = self.state_manager.get_state() + + if not state.should_compute: + arg_name = self._metadata.hidden_states_argument_name + hidden_states = self._metadata._get_parameter_from_args_kwargs(arg_name, args, kwargs) + + if self.is_tail: + # Still need to advance step index even if we skip + self._advance_step(state) + + if self._metadata.return_encoder_hidden_states_index is not None: + encoder_hidden_states = self._metadata._get_parameter_from_args_kwargs( + "encoder_hidden_states", args, kwargs + ) + max_idx = max( + self._metadata.return_hidden_states_index, self._metadata.return_encoder_hidden_states_index + ) + ret_list = [None] * (max_idx + 1) + ret_list[self._metadata.return_hidden_states_index] = hidden_states + ret_list[self._metadata.return_encoder_hidden_states_index] = encoder_hidden_states + return tuple(ret_list) + + return hidden_states + + output = self.fn_ref.original_forward(*args, **kwargs) + + if self.is_tail: + # Calculate residual for next steps + if isinstance(output, tuple): + out_hidden = output[self._metadata.return_hidden_states_index] + else: + out_hidden = output + + in_hidden = state.head_block_input + + if in_hidden is None: + return output + + # Determine residual + if out_hidden.shape == in_hidden.shape: + residual = out_hidden - in_hidden + elif out_hidden.ndim == 3 and in_hidden.ndim == 3 and out_hidden.shape[2] == in_hidden.shape[2]: + diff = in_hidden.shape[1] - out_hidden.shape[1] + if diff == 0: + residual = out_hidden - in_hidden + else: + residual = out_hidden - in_hidden # Fallback to matching tail + else: + # Fallback for completely mismatched shapes + residual = out_hidden + + if self.config.calibrate: + self._perform_calibration_step(state, residual) + + state.previous_residual = residual + self._advance_step(state) + + return output + + def _perform_calibration_step(self, state: MagCacheState, current_residual: torch.Tensor): + if state.previous_residual is None: + # First step has no previous residual to compare against. + # log 1.0 as a neutral starting point. + ratio = 1.0 + else: + # MagCache Calibration Formula: mean(norm(curr) / norm(prev)) + # norm(dim=-1) gives magnitude of each token vector + curr_norm = torch.linalg.norm(current_residual.float(), dim=-1) + prev_norm = torch.linalg.norm(state.previous_residual.float(), dim=-1) + + # Avoid division by zero + ratio = (curr_norm / (prev_norm + 1e-8)).mean().item() + + state.calibration_ratios.append(ratio) + + def _advance_step(self, state: MagCacheState): + state.step_index += 1 + if state.step_index >= self.config.num_inference_steps: + # End of inference loop + if self.config.calibrate: + print("\n[MagCache] Calibration Complete. Copy these values to MagCacheConfig(mag_ratios=...):") + print(f"{state.calibration_ratios}\n") + logger.info(f"MagCache Calibration Results: {state.calibration_ratios}") + + # Reset state + state.step_index = 0 + state.accumulated_ratio = 1.0 + state.accumulated_steps = 0 + state.accumulated_err = 0.0 + state.previous_residual = None + state.calibration_ratios = [] + + +def apply_mag_cache(module: torch.nn.Module, config: MagCacheConfig) -> None: + """ + Applies MagCache to a given module (typically a Transformer). + + Args: + module (`torch.nn.Module`): + The module to apply MagCache to. + config (`MagCacheConfig`): + The configuration for MagCache. + """ + # Initialize registry on the root module so the Pipeline can set context. + HookRegistry.check_if_exists_or_initialize(module) + + state_manager = StateManager(MagCacheState, (), {}) + remaining_blocks = [] + + for name, submodule in module.named_children(): + if name not in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS or not isinstance(submodule, torch.nn.ModuleList): + continue + for index, block in enumerate(submodule): + remaining_blocks.append((f"{name}.{index}", block)) + + if not remaining_blocks: + logger.warning("MagCache: No transformer blocks found to apply hooks.") + return + + # Handle single-block models + if len(remaining_blocks) == 1: + name, block = remaining_blocks[0] + logger.info(f"MagCache: Applying Head+Tail Hooks to single block '{name}'") + _apply_mag_cache_block_hook(block, state_manager, config, is_tail=True) + _apply_mag_cache_head_hook(block, state_manager, config) + return + + head_block_name, head_block = remaining_blocks.pop(0) + tail_block_name, tail_block = remaining_blocks.pop(-1) + + logger.info(f"MagCache: Applying Head Hook to {head_block_name}") + _apply_mag_cache_head_hook(head_block, state_manager, config) + + for name, block in remaining_blocks: + _apply_mag_cache_block_hook(block, state_manager, config) + + logger.info(f"MagCache: Applying Tail Hook to {tail_block_name}") + _apply_mag_cache_block_hook(tail_block, state_manager, config, is_tail=True) + + +def _apply_mag_cache_head_hook(block: torch.nn.Module, state_manager: StateManager, config: MagCacheConfig) -> None: + registry = HookRegistry.check_if_exists_or_initialize(block) + + # Automatically remove existing hook to allow re-application (e.g. switching modes) + if registry.get_hook(_MAG_CACHE_LEADER_BLOCK_HOOK) is not None: + registry.remove_hook(_MAG_CACHE_LEADER_BLOCK_HOOK) + + hook = MagCacheHeadHook(state_manager, config) + registry.register_hook(hook, _MAG_CACHE_LEADER_BLOCK_HOOK) + + +def _apply_mag_cache_block_hook( + block: torch.nn.Module, + state_manager: StateManager, + config: MagCacheConfig, + is_tail: bool = False, +) -> None: + registry = HookRegistry.check_if_exists_or_initialize(block) + + # Automatically remove existing hook to allow re-application + if registry.get_hook(_MAG_CACHE_BLOCK_HOOK) is not None: + registry.remove_hook(_MAG_CACHE_BLOCK_HOOK) + + hook = MagCacheBlockHook(state_manager, is_tail, config) + registry.register_hook(hook, _MAG_CACHE_BLOCK_HOOK) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/pyramid_attention_broadcast.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/pyramid_attention_broadcast.py new file mode 100644 index 0000000000000000000000000000000000000000..ed5bd24dea01790f0e0926e1942ee90cd3327481 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/pyramid_attention_broadcast.py @@ -0,0 +1,314 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from dataclasses import dataclass +from typing import Any, Callable + +import torch + +from ..models.attention import AttentionModuleMixin +from ..models.attention_processor import Attention, MochiAttention +from ..utils import logging +from ._common import ( + _ATTENTION_CLASSES, + _CROSS_TRANSFORMER_BLOCK_IDENTIFIERS, + _SPATIAL_TRANSFORMER_BLOCK_IDENTIFIERS, + _TEMPORAL_TRANSFORMER_BLOCK_IDENTIFIERS, +) +from .hooks import HookRegistry, ModelHook + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +_PYRAMID_ATTENTION_BROADCAST_HOOK = "pyramid_attention_broadcast" + + +@dataclass +class PyramidAttentionBroadcastConfig: + r""" + Configuration for Pyramid Attention Broadcast. + + Args: + spatial_attention_block_skip_range (`int`, *optional*, defaults to `None`): + The number of times a specific spatial attention broadcast is skipped before computing the attention states + to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e., + old attention states will be reused) before computing the new attention states again. + temporal_attention_block_skip_range (`int`, *optional*, defaults to `None`): + The number of times a specific temporal attention broadcast is skipped before computing the attention + states to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times + (i.e., old attention states will be reused) before computing the new attention states again. + cross_attention_block_skip_range (`int`, *optional*, defaults to `None`): + The number of times a specific cross-attention broadcast is skipped before computing the attention states + to re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e., + old attention states will be reused) before computing the new attention states again. + spatial_attention_timestep_skip_range (`tuple[int, int]`, defaults to `(100, 800)`): + The range of timesteps to skip in the spatial attention layer. The attention computations will be + conditionally skipped if the current timestep is within the specified range. + temporal_attention_timestep_skip_range (`tuple[int, int]`, defaults to `(100, 800)`): + The range of timesteps to skip in the temporal attention layer. The attention computations will be + conditionally skipped if the current timestep is within the specified range. + cross_attention_timestep_skip_range (`tuple[int, int]`, defaults to `(100, 800)`): + The range of timesteps to skip in the cross-attention layer. The attention computations will be + conditionally skipped if the current timestep is within the specified range. + spatial_attention_block_identifiers (`tuple[str, ...]`): + The identifiers to match against the layer names to determine if the layer is a spatial attention layer. + temporal_attention_block_identifiers (`tuple[str, ...]`): + The identifiers to match against the layer names to determine if the layer is a temporal attention layer. + cross_attention_block_identifiers (`tuple[str, ...]`): + The identifiers to match against the layer names to determine if the layer is a cross-attention layer. + """ + + spatial_attention_block_skip_range: int | None = None + temporal_attention_block_skip_range: int | None = None + cross_attention_block_skip_range: int | None = None + + spatial_attention_timestep_skip_range: tuple[int, int] = (100, 800) + temporal_attention_timestep_skip_range: tuple[int, int] = (100, 800) + cross_attention_timestep_skip_range: tuple[int, int] = (100, 800) + + spatial_attention_block_identifiers: tuple[str, ...] = _SPATIAL_TRANSFORMER_BLOCK_IDENTIFIERS + temporal_attention_block_identifiers: tuple[str, ...] = _TEMPORAL_TRANSFORMER_BLOCK_IDENTIFIERS + cross_attention_block_identifiers: tuple[str, ...] = _CROSS_TRANSFORMER_BLOCK_IDENTIFIERS + + current_timestep_callback: Callable[[], int] = None + + # TODO(aryan): add PAB for MLP layers (very limited speedup from testing with original codebase + # so not added for now) + + def __repr__(self) -> str: + return ( + f"PyramidAttentionBroadcastConfig(\n" + f" spatial_attention_block_skip_range={self.spatial_attention_block_skip_range},\n" + f" temporal_attention_block_skip_range={self.temporal_attention_block_skip_range},\n" + f" cross_attention_block_skip_range={self.cross_attention_block_skip_range},\n" + f" spatial_attention_timestep_skip_range={self.spatial_attention_timestep_skip_range},\n" + f" temporal_attention_timestep_skip_range={self.temporal_attention_timestep_skip_range},\n" + f" cross_attention_timestep_skip_range={self.cross_attention_timestep_skip_range},\n" + f" spatial_attention_block_identifiers={self.spatial_attention_block_identifiers},\n" + f" temporal_attention_block_identifiers={self.temporal_attention_block_identifiers},\n" + f" cross_attention_block_identifiers={self.cross_attention_block_identifiers},\n" + f" current_timestep_callback={self.current_timestep_callback}\n" + ")" + ) + + +class PyramidAttentionBroadcastState: + r""" + State for Pyramid Attention Broadcast. + + Attributes: + iteration (`int`): + The current iteration of the Pyramid Attention Broadcast. It is necessary to ensure that `reset_state` is + called before starting a new inference forward pass for PAB to work correctly. + cache (`Any`): + The cached output from the previous forward pass. This is used to re-use the attention states when the + attention computation is skipped. It is either a tensor or a tuple of tensors, depending on the module. + """ + + def __init__(self) -> None: + self.iteration = 0 + self.cache = None + + def reset(self): + self.iteration = 0 + self.cache = None + + def __repr__(self): + cache_repr = "" + if self.cache is None: + cache_repr = "None" + else: + cache_repr = f"Tensor(shape={self.cache.shape}, dtype={self.cache.dtype})" + return f"PyramidAttentionBroadcastState(iteration={self.iteration}, cache={cache_repr})" + + +class PyramidAttentionBroadcastHook(ModelHook): + r"""A hook that applies Pyramid Attention Broadcast to a given module.""" + + _is_stateful = True + + def __init__( + self, timestep_skip_range: tuple[int, int], block_skip_range: int, current_timestep_callback: Callable[[], int] + ) -> None: + super().__init__() + + self.timestep_skip_range = timestep_skip_range + self.block_skip_range = block_skip_range + self.current_timestep_callback = current_timestep_callback + + def initialize_hook(self, module): + self.state = PyramidAttentionBroadcastState() + return module + + def new_forward(self, module: torch.nn.Module, *args, **kwargs) -> Any: + is_within_timestep_range = ( + self.timestep_skip_range[0] < self.current_timestep_callback() < self.timestep_skip_range[1] + ) + should_compute_attention = ( + self.state.cache is None + or self.state.iteration == 0 + or not is_within_timestep_range + or self.state.iteration % self.block_skip_range == 0 + ) + + if should_compute_attention: + output = self.fn_ref.original_forward(*args, **kwargs) + else: + output = self.state.cache + + self.state.cache = output + self.state.iteration += 1 + return output + + def reset_state(self, module: torch.nn.Module) -> None: + self.state.reset() + return module + + +def apply_pyramid_attention_broadcast(module: torch.nn.Module, config: PyramidAttentionBroadcastConfig): + r""" + Apply [Pyramid Attention Broadcast](https://huggingface.co/papers/2408.12588) to a given pipeline. + + PAB is an attention approximation method that leverages the similarity in attention states between timesteps to + reduce the computational cost of attention computation. The key takeaway from the paper is that the attention + similarity in the cross-attention layers between timesteps is high, followed by less similarity in the temporal and + spatial layers. This allows for the skipping of attention computation in the cross-attention layers more frequently + than in the temporal and spatial layers. Applying PAB will, therefore, speedup the inference process. + + Args: + module (`torch.nn.Module`): + The module to apply Pyramid Attention Broadcast to. + config (`PyramidAttentionBroadcastConfig | None`, `optional`, defaults to `None`): + The configuration to use for Pyramid Attention Broadcast. + + Example: + + ```python + >>> import torch + >>> from diffusers import CogVideoXPipeline, PyramidAttentionBroadcastConfig, apply_pyramid_attention_broadcast + >>> from diffusers.utils import export_to_video + + >>> pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16) + >>> pipe.to("cuda") + + >>> config = PyramidAttentionBroadcastConfig( + ... spatial_attention_block_skip_range=2, + ... spatial_attention_timestep_skip_range=(100, 800), + ... current_timestep_callback=lambda: pipe.current_timestep, + ... ) + >>> apply_pyramid_attention_broadcast(pipe.transformer, config) + ``` + """ + if config.current_timestep_callback is None: + raise ValueError( + "The `current_timestep_callback` function must be provided in the configuration to apply Pyramid Attention Broadcast." + ) + + if ( + config.spatial_attention_block_skip_range is None + and config.temporal_attention_block_skip_range is None + and config.cross_attention_block_skip_range is None + ): + logger.warning( + "Pyramid Attention Broadcast requires one or more of `spatial_attention_block_skip_range`, `temporal_attention_block_skip_range` " + "or `cross_attention_block_skip_range` parameters to be set to an integer, not `None`. Defaulting to using `spatial_attention_block_skip_range=2`. " + "To avoid this warning, please set one of the above parameters." + ) + config.spatial_attention_block_skip_range = 2 + + for name, submodule in module.named_modules(): + if not isinstance(submodule, (*_ATTENTION_CLASSES, AttentionModuleMixin)): + # PAB has been implemented specific to Diffusers' Attention classes. However, this does not mean that PAB + # cannot be applied to this layer. For custom layers, users can extend this functionality and implement + # their own PAB logic similar to `_apply_pyramid_attention_broadcast_on_attention_class`. + continue + _apply_pyramid_attention_broadcast_on_attention_class(name, submodule, config) + + +def _apply_pyramid_attention_broadcast_on_attention_class( + name: str, module: Attention, config: PyramidAttentionBroadcastConfig +) -> bool: + is_spatial_self_attention = ( + any(re.search(identifier, name) is not None for identifier in config.spatial_attention_block_identifiers) + and config.spatial_attention_block_skip_range is not None + and not getattr(module, "is_cross_attention", False) + ) + is_temporal_self_attention = ( + any(re.search(identifier, name) is not None for identifier in config.temporal_attention_block_identifiers) + and config.temporal_attention_block_skip_range is not None + and not getattr(module, "is_cross_attention", False) + ) + is_cross_attention = ( + any(re.search(identifier, name) is not None for identifier in config.cross_attention_block_identifiers) + and config.cross_attention_block_skip_range is not None + and getattr(module, "is_cross_attention", False) + ) + + block_skip_range, timestep_skip_range, block_type = None, None, None + if is_spatial_self_attention: + block_skip_range = config.spatial_attention_block_skip_range + timestep_skip_range = config.spatial_attention_timestep_skip_range + block_type = "spatial" + elif is_temporal_self_attention: + block_skip_range = config.temporal_attention_block_skip_range + timestep_skip_range = config.temporal_attention_timestep_skip_range + block_type = "temporal" + elif is_cross_attention: + block_skip_range = config.cross_attention_block_skip_range + timestep_skip_range = config.cross_attention_timestep_skip_range + block_type = "cross" + + if block_skip_range is None or timestep_skip_range is None: + logger.info( + f'Unable to apply Pyramid Attention Broadcast to the selected layer: "{name}" because it does ' + f"not match any of the required criteria for spatial, temporal or cross attention layers. Note, " + f"however, that this layer may still be valid for applying PAB. Please specify the correct " + f"block identifiers in the configuration." + ) + return False + + logger.debug(f"Enabling Pyramid Attention Broadcast ({block_type}) in layer: {name}") + _apply_pyramid_attention_broadcast_hook( + module, timestep_skip_range, block_skip_range, config.current_timestep_callback + ) + return True + + +def _apply_pyramid_attention_broadcast_hook( + module: Attention | MochiAttention, + timestep_skip_range: tuple[int, int], + block_skip_range: int, + current_timestep_callback: Callable[[], int], +): + r""" + Apply [Pyramid Attention Broadcast](https://huggingface.co/papers/2408.12588) to a given torch.nn.Module. + + Args: + module (`torch.nn.Module`): + The module to apply Pyramid Attention Broadcast to. + timestep_skip_range (`tuple[int, int]`): + The range of timesteps to skip in the attention layer. The attention computations will be conditionally + skipped if the current timestep is within the specified range. + block_skip_range (`int`): + The number of times a specific attention broadcast is skipped before computing the attention states to + re-use. If this is set to the value `N`, the attention computation will be skipped `N - 1` times (i.e., old + attention states will be reused) before computing the new attention states again. + current_timestep_callback (`Callable[[], int]`): + A callback function that returns the current inference timestep. + """ + registry = HookRegistry.check_if_exists_or_initialize(module) + hook = PyramidAttentionBroadcastHook(timestep_skip_range, block_skip_range, current_timestep_callback) + registry.register_hook(hook, _PYRAMID_ATTENTION_BROADCAST_HOOK) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/smoothed_energy_guidance_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/smoothed_energy_guidance_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f413b6376d7c400d7c64e801bac9c7ef6b38b23c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/smoothed_energy_guidance_utils.py @@ -0,0 +1,166 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import asdict, dataclass + +import torch +import torch.nn.functional as F + +from ..utils import get_logger +from ._common import _ALL_TRANSFORMER_BLOCK_IDENTIFIERS, _ATTENTION_CLASSES, _get_submodule_from_fqn +from .hooks import HookRegistry, ModelHook + + +logger = get_logger(__name__) # pylint: disable=invalid-name + +_SMOOTHED_ENERGY_GUIDANCE_HOOK = "smoothed_energy_guidance_hook" + + +@dataclass +class SmoothedEnergyGuidanceConfig: + r""" + Configuration for skipping internal transformer blocks when executing a transformer model. + + Args: + indices (`list[int]`): + The indices of the layer to skip. This is typically the first layer in the transformer block. + fqn (`str`, defaults to `"auto"`): + The fully qualified name identifying the stack of transformer blocks. Typically, this is + `transformer_blocks`, `single_transformer_blocks`, `blocks`, `layers`, or `temporal_transformer_blocks`. + For automatic detection, set this to `"auto"`. "auto" only works on DiT models. For UNet models, you must + provide the correct fqn. + _query_proj_identifiers (`list[str]`, defaults to `None`): + The identifiers for the query projection layers. Typically, these are `to_q`, `query`, or `q_proj`. If + `None`, `to_q` is used by default. + """ + + indices: list[int] + fqn: str = "auto" + _query_proj_identifiers: list[str] = None + + def to_dict(self): + return asdict(self) + + @staticmethod + def from_dict(data: dict) -> "SmoothedEnergyGuidanceConfig": + return SmoothedEnergyGuidanceConfig(**data) + + +class SmoothedEnergyGuidanceHook(ModelHook): + def __init__(self, blur_sigma: float = 1.0, blur_threshold_inf: float = 9999.9) -> None: + super().__init__() + self.blur_sigma = blur_sigma + self.blur_threshold_inf = blur_threshold_inf + + def post_forward(self, module: torch.nn.Module, output: torch.Tensor) -> torch.Tensor: + # Copied from https://github.com/SusungHong/SEG-SDXL/blob/cf8256d640d5373541cfea3b3b6caf93272cf986/pipeline_seg.py#L172C31-L172C102 + kernel_size = math.ceil(6 * self.blur_sigma) + 1 - math.ceil(6 * self.blur_sigma) % 2 + smoothed_output = _gaussian_blur_2d(output, kernel_size, self.blur_sigma, self.blur_threshold_inf) + return smoothed_output + + +def _apply_smoothed_energy_guidance_hook( + module: torch.nn.Module, config: SmoothedEnergyGuidanceConfig, blur_sigma: float, name: str | None = None +) -> None: + name = name or _SMOOTHED_ENERGY_GUIDANCE_HOOK + + if config.fqn == "auto": + for identifier in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS: + if hasattr(module, identifier): + config.fqn = identifier + break + else: + raise ValueError( + "Could not find a suitable identifier for the transformer blocks automatically. Please provide a valid " + "`fqn` (fully qualified name) that identifies a stack of transformer blocks." + ) + + if config._query_proj_identifiers is None: + config._query_proj_identifiers = ["to_q"] + + transformer_blocks = _get_submodule_from_fqn(module, config.fqn) + blocks_found = False + for i, block in enumerate(transformer_blocks): + if i not in config.indices: + continue + + blocks_found = True + + for submodule_name, submodule in block.named_modules(): + if not isinstance(submodule, _ATTENTION_CLASSES) or submodule.is_cross_attention: + continue + for identifier in config._query_proj_identifiers: + query_proj = getattr(submodule, identifier, None) + if query_proj is None or not isinstance(query_proj, torch.nn.Linear): + continue + logger.debug( + f"Registering smoothed energy guidance hook on {config.fqn}.{i}.{submodule_name}.{identifier}" + ) + registry = HookRegistry.check_if_exists_or_initialize(query_proj) + hook = SmoothedEnergyGuidanceHook(blur_sigma) + registry.register_hook(hook, name) + + if not blocks_found: + raise ValueError( + f"Could not find any transformer blocks matching the provided indices {config.indices} and " + f"fully qualified name '{config.fqn}'. Please check the indices and fqn for correctness." + ) + + +# Modified from https://github.com/SusungHong/SEG-SDXL/blob/cf8256d640d5373541cfea3b3b6caf93272cf986/pipeline_seg.py#L71 +def _gaussian_blur_2d(query: torch.Tensor, kernel_size: int, sigma: float, sigma_threshold_inf: float) -> torch.Tensor: + """ + This implementation assumes that the input query is for visual (image/videos) tokens to apply the 2D gaussian blur. + However, some models use joint text-visual token attention for which this may not be suitable. Additionally, this + implementation also assumes that the visual tokens come from a square image/video. In practice, despite these + assumptions, applying the 2D square gaussian blur on the query projections generates reasonable results for + Smoothed Energy Guidance. + + SEG is only supported as an experimental prototype feature for now, so the implementation may be modified in the + future without warning or guarantee of reproducibility. + """ + assert query.ndim == 3 + + is_inf = sigma > sigma_threshold_inf + batch_size, seq_len, embed_dim = query.shape + + seq_len_sqrt = int(math.sqrt(seq_len)) + num_square_tokens = seq_len_sqrt * seq_len_sqrt + query_slice = query[:, :num_square_tokens, :] + query_slice = query_slice.permute(0, 2, 1) + query_slice = query_slice.reshape(batch_size, embed_dim, seq_len_sqrt, seq_len_sqrt) + + if is_inf: + kernel_size = min(kernel_size, seq_len_sqrt - (seq_len_sqrt % 2 - 1)) + kernel_size_half = (kernel_size - 1) / 2 + + x = torch.linspace(-kernel_size_half, kernel_size_half, steps=kernel_size) + pdf = torch.exp(-0.5 * (x / sigma).pow(2)) + kernel1d = pdf / pdf.sum() + kernel1d = kernel1d.to(query) + kernel2d = torch.matmul(kernel1d[:, None], kernel1d[None, :]) + kernel2d = kernel2d.expand(embed_dim, 1, kernel2d.shape[0], kernel2d.shape[1]) + + padding = [kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2] + query_slice = F.pad(query_slice, padding, mode="reflect") + query_slice = F.conv2d(query_slice, kernel2d, groups=embed_dim) + else: + query_slice[:] = query_slice.mean(dim=(-2, -1), keepdim=True) + + query_slice = query_slice.reshape(batch_size, embed_dim, num_square_tokens) + query_slice = query_slice.permute(0, 2, 1) + query[:, :num_square_tokens, :] = query_slice.clone() + + return query diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/taylorseer_cache.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/taylorseer_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..303155105e71e397d6a2990fa2264aa871ef980f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/taylorseer_cache.py @@ -0,0 +1,345 @@ +import math +import re +from dataclasses import dataclass + +import torch +import torch.nn as nn + +from ..utils import logging +from .hooks import HookRegistry, ModelHook, StateManager + + +logger = logging.get_logger(__name__) +_TAYLORSEER_CACHE_HOOK = "taylorseer_cache" +_SPATIAL_ATTENTION_BLOCK_IDENTIFIERS = ( + "^blocks.*attn", + "^transformer_blocks.*attn", + "^single_transformer_blocks.*attn", +) +_TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS = ("^temporal_transformer_blocks.*attn",) +_TRANSFORMER_BLOCK_IDENTIFIERS = _SPATIAL_ATTENTION_BLOCK_IDENTIFIERS + _TEMPORAL_ATTENTION_BLOCK_IDENTIFIERS +_BLOCK_IDENTIFIERS = ("^[^.]*block[^.]*\\.[^.]+$",) +_PROJ_OUT_IDENTIFIERS = ("^proj_out$",) + + +@dataclass +class TaylorSeerCacheConfig: + """ + Configuration for TaylorSeer cache. See: https://huggingface.co/papers/2503.06923 + + Attributes: + cache_interval (`int`, defaults to `5`): + The interval between full computation steps. After a full computation, the cached (predicted) outputs are + reused for this many subsequent denoising steps before refreshing with a new full forward pass. + + disable_cache_before_step (`int`, defaults to `3`): + The denoising step index before which caching is disabled, meaning full computation is performed for the + initial steps (0 to disable_cache_before_step - 1) to gather data for Taylor series approximations. During + these steps, Taylor factors are updated, but caching/predictions are not applied. Caching begins at this + step. + + disable_cache_after_step (`int`, *optional*, defaults to `None`): + The denoising step index after which caching is disabled. If set, for steps >= this value, all modules run + full computations without predictions or state updates, ensuring accuracy in later stages if needed. + + max_order (`int`, defaults to `1`): + The highest order in the Taylor series expansion for approximating module outputs. Higher orders provide + better approximations but increase computation and memory usage. + + taylor_factors_dtype (`torch.dtype`, defaults to `torch.bfloat16`): + Data type used for storing and computing Taylor series factors. Lower precision reduces memory but may + affect stability; higher precision improves accuracy at the cost of more memory. + + skip_predict_identifiers (`list[str]`, *optional*, defaults to `None`): + Regex patterns (using `re.fullmatch`) for module names to place as "skip" in "cache" mode. In this mode, + the module computes fully during initial or refresh steps but returns a zero tensor (matching recorded + shape) during prediction steps to skip computation cheaply. + + cache_identifiers (`list[str]`, *optional*, defaults to `None`): + Regex patterns (using `re.fullmatch`) for module names to place in Taylor-series caching mode, where + outputs are approximated and cached for reuse. + + use_lite_mode (`bool`, *optional*, defaults to `False`): + Enables a lightweight TaylorSeer variant that minimizes memory usage by applying predefined patterns for + skipping and caching (e.g., skipping blocks and caching projections). This overrides any custom + `inactive_identifiers` or `active_identifiers`. + + Notes: + - Patterns are matched using `re.fullmatch` on the module name. + - If `skip_predict_identifiers` or `cache_identifiers` are provided, only matching modules are hooked. + - If neither is provided, all attention-like modules are hooked by default. + + Example of inactive and active usage: + + ```py + def forward(x): + x = self.module1(x) # inactive module: returns zeros tensor based on shape recorded during full compute + x = self.module2(x) # active module: caches output here, avoiding recomputation of prior steps + return x + ``` + """ + + cache_interval: int = 5 + disable_cache_before_step: int = 3 + disable_cache_after_step: int | None = None + max_order: int = 1 + taylor_factors_dtype: torch.dtype | None = torch.bfloat16 + skip_predict_identifiers: list[str] | None = None + cache_identifiers: list[str] | None = None + use_lite_mode: bool = False + + def __repr__(self) -> str: + return ( + "TaylorSeerCacheConfig(" + f"cache_interval={self.cache_interval}, " + f"disable_cache_before_step={self.disable_cache_before_step}, " + f"disable_cache_after_step={self.disable_cache_after_step}, " + f"max_order={self.max_order}, " + f"taylor_factors_dtype={self.taylor_factors_dtype}, " + f"skip_predict_identifiers={self.skip_predict_identifiers}, " + f"cache_identifiers={self.cache_identifiers}, " + f"use_lite_mode={self.use_lite_mode})" + ) + + +class TaylorSeerState: + def __init__( + self, + taylor_factors_dtype: torch.dtype | None = torch.bfloat16, + max_order: int = 1, + is_inactive: bool = False, + ): + self.taylor_factors_dtype = taylor_factors_dtype + self.max_order = max_order + self.is_inactive = is_inactive + + self.module_dtypes: tuple[torch.dtype, ...] = () + self.last_update_step: int | None = None + self.taylor_factors: dict[int, dict[int, torch.Tensor]] = {} + self.inactive_shapes: tuple[tuple[int, ...], ...] | None = None + self.device: torch.device | None = None + self.current_step: int = -1 + + def reset(self) -> None: + self.current_step = -1 + self.last_update_step = None + self.taylor_factors = {} + self.inactive_shapes = None + self.device = None + + def update( + self, + outputs: tuple[torch.Tensor, ...], + ) -> None: + self.module_dtypes = tuple(output.dtype for output in outputs) + self.device = outputs[0].device + + if self.is_inactive: + self.inactive_shapes = tuple(output.shape for output in outputs) + else: + for i, features in enumerate(outputs): + new_factors: dict[int, torch.Tensor] = {0: features} + is_first_update = self.last_update_step is None + if not is_first_update: + delta_step = self.current_step - self.last_update_step + if delta_step == 0: + raise ValueError("Delta step cannot be zero for TaylorSeer update.") + + # Recursive divided differences up to max_order + prev_factors = self.taylor_factors.get(i, {}) + for j in range(self.max_order): + prev = prev_factors.get(j) + if prev is None: + break + new_factors[j + 1] = (new_factors[j] - prev.to(features.dtype)) / delta_step + self.taylor_factors[i] = { + order: factor.to(self.taylor_factors_dtype) for order, factor in new_factors.items() + } + + self.last_update_step = self.current_step + + @torch.compiler.disable + def predict(self) -> list[torch.Tensor]: + if self.last_update_step is None: + raise ValueError("Cannot predict without prior initialization/update.") + + step_offset = self.current_step - self.last_update_step + + outputs = [] + if self.is_inactive: + if self.inactive_shapes is None: + raise ValueError("Inactive shapes not set during prediction.") + for i in range(len(self.module_dtypes)): + outputs.append( + torch.zeros( + self.inactive_shapes[i], + dtype=self.module_dtypes[i], + device=self.device, + ) + ) + else: + if not self.taylor_factors: + raise ValueError("Taylor factors empty during prediction.") + num_outputs = len(self.taylor_factors) + num_orders = len(self.taylor_factors[0]) + for i in range(num_outputs): + output_dtype = self.module_dtypes[i] + taylor_factors = self.taylor_factors[i] + output = torch.zeros_like(taylor_factors[0], dtype=output_dtype) + for order in range(num_orders): + coeff = (step_offset**order) / math.factorial(order) + factor = taylor_factors[order] + output = output + factor.to(output_dtype) * coeff + outputs.append(output) + return outputs + + +class TaylorSeerCacheHook(ModelHook): + _is_stateful = True + + def __init__( + self, + cache_interval: int, + disable_cache_before_step: int, + taylor_factors_dtype: torch.dtype, + state_manager: StateManager, + disable_cache_after_step: int | None = None, + ): + super().__init__() + self.cache_interval = cache_interval + self.disable_cache_before_step = disable_cache_before_step + self.disable_cache_after_step = disable_cache_after_step + self.taylor_factors_dtype = taylor_factors_dtype + self.state_manager = state_manager + + def initialize_hook(self, module: torch.nn.Module): + return module + + def reset_state(self, module: torch.nn.Module) -> None: + """ + Reset state between sampling runs. + """ + self.state_manager.reset() + + @torch.compiler.disable + def _measure_should_compute(self) -> bool: + state: TaylorSeerState = self.state_manager.get_state() + state.current_step += 1 + current_step = state.current_step + is_warmup_phase = current_step < self.disable_cache_before_step + is_compute_interval = (current_step - self.disable_cache_before_step - 1) % self.cache_interval == 0 + is_cooldown_phase = self.disable_cache_after_step is not None and current_step >= self.disable_cache_after_step + should_compute = is_warmup_phase or is_compute_interval or is_cooldown_phase + return should_compute, state + + def new_forward(self, module: torch.nn.Module, *args, **kwargs): + should_compute, state = self._measure_should_compute() + if should_compute: + outputs = self.fn_ref.original_forward(*args, **kwargs) + wrapped_outputs = (outputs,) if isinstance(outputs, torch.Tensor) else outputs + state.update(wrapped_outputs) + return outputs + + outputs_list = state.predict() + return outputs_list[0] if len(outputs_list) == 1 else tuple(outputs_list) + + +def _resolve_patterns(config: TaylorSeerCacheConfig) -> tuple[list[str], list[str]]: + """ + Resolve effective inactive and active pattern lists from config + templates. + """ + + inactive_patterns = config.skip_predict_identifiers if config.skip_predict_identifiers is not None else None + active_patterns = config.cache_identifiers if config.cache_identifiers is not None else None + + return inactive_patterns or [], active_patterns or [] + + +def apply_taylorseer_cache(module: torch.nn.Module, config: TaylorSeerCacheConfig): + """ + Applies the TaylorSeer cache to a given pipeline (typically the transformer / UNet). + + This function hooks selected modules in the model to enable caching or skipping based on the provided + configuration, reducing redundant computations in diffusion denoising loops. + + Args: + module (torch.nn.Module): The model subtree to apply the hooks to. + config (TaylorSeerCacheConfig): Configuration for the cache. + + Example: + ```python + >>> import torch + >>> from diffusers import FluxPipeline, TaylorSeerCacheConfig + + >>> pipe = FluxPipeline.from_pretrained( + ... "black-forest-labs/FLUX.1-dev", + ... torch_dtype=torch.bfloat16, + ... ) + >>> pipe.to("cuda") + + >>> config = TaylorSeerCacheConfig( + ... cache_interval=5, + ... max_order=1, + ... disable_cache_before_step=3, + ... taylor_factors_dtype=torch.float32, + ... ) + >>> pipe.transformer.enable_cache(config) + ``` + """ + inactive_patterns, active_patterns = _resolve_patterns(config) + + active_patterns = active_patterns or _TRANSFORMER_BLOCK_IDENTIFIERS + + if config.use_lite_mode: + logger.info("Using TaylorSeer Lite variant for cache.") + active_patterns = _PROJ_OUT_IDENTIFIERS + inactive_patterns = _BLOCK_IDENTIFIERS + if config.skip_predict_identifiers or config.cache_identifiers: + logger.warning("Lite mode overrides user patterns.") + + for name, submodule in module.named_modules(): + matches_inactive = any(re.fullmatch(pattern, name) for pattern in inactive_patterns) + matches_active = any(re.fullmatch(pattern, name) for pattern in active_patterns) + if not (matches_inactive or matches_active): + continue + _apply_taylorseer_cache_hook( + module=submodule, + config=config, + is_inactive=matches_inactive, + ) + + +def _apply_taylorseer_cache_hook( + module: nn.Module, + config: TaylorSeerCacheConfig, + is_inactive: bool, +): + """ + Registers the TaylorSeer hook on the specified nn.Module. + + Args: + name: Name of the module. + module: The nn.Module to be hooked. + config: Cache configuration. + is_inactive: Whether this module should operate in "inactive" mode. + """ + state_manager = StateManager( + TaylorSeerState, + init_kwargs={ + "taylor_factors_dtype": config.taylor_factors_dtype, + "max_order": config.max_order, + "is_inactive": is_inactive, + }, + ) + + registry = HookRegistry.check_if_exists_or_initialize(module) + + hook = TaylorSeerCacheHook( + cache_interval=config.cache_interval, + disable_cache_before_step=config.disable_cache_before_step, + taylor_factors_dtype=config.taylor_factors_dtype, + disable_cache_after_step=config.disable_cache_after_step, + state_manager=state_manager, + ) + + registry.register_hook(hook, _TAYLORSEER_CACHE_HOOK) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..da9e398fdf29def2c4d01e5e8de443e808cc20cf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/hooks/utils.py @@ -0,0 +1,43 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from ._common import _ALL_TRANSFORMER_BLOCK_IDENTIFIERS, _ATTENTION_CLASSES, _FEEDFORWARD_CLASSES + + +def _get_identifiable_transformer_blocks_in_module(module: torch.nn.Module): + module_list_with_transformer_blocks = [] + for name, submodule in module.named_modules(): + name_endswith_identifier = any(name.endswith(identifier) for identifier in _ALL_TRANSFORMER_BLOCK_IDENTIFIERS) + is_ModuleList = isinstance(submodule, torch.nn.ModuleList) + if name_endswith_identifier and is_ModuleList: + module_list_with_transformer_blocks.append((name, submodule)) + return module_list_with_transformer_blocks + + +def _get_identifiable_attention_layers_in_module(module: torch.nn.Module): + attention_layers = [] + for name, submodule in module.named_modules(): + if isinstance(submodule, _ATTENTION_CLASSES): + attention_layers.append((name, submodule)) + return attention_layers + + +def _get_identifiable_feedforward_layers_in_module(module: torch.nn.Module): + feedforward_layers = [] + for name, submodule in module.named_modules(): + if isinstance(submodule, _FEEDFORWARD_CLASSES): + feedforward_layers.append((name, submodule)) + return feedforward_layers diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ed0d2a07336f7186ed1375e013fbc915aa69bd58 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__init__.py @@ -0,0 +1,147 @@ +from typing import TYPE_CHECKING + +from ..utils import DIFFUSERS_SLOW_IMPORT, _LazyModule, deprecate +from ..utils.import_utils import is_peft_available, is_torch_available, is_transformers_available + + +def text_encoder_lora_state_dict(text_encoder): + deprecate( + "text_encoder_load_state_dict in `models`", + "0.27.0", + "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.", + ) + state_dict = {} + + for name, module in text_encoder_attn_modules(text_encoder): + for k, v in module.q_proj.lora_linear_layer.state_dict().items(): + state_dict[f"{name}.q_proj.lora_linear_layer.{k}"] = v + + for k, v in module.k_proj.lora_linear_layer.state_dict().items(): + state_dict[f"{name}.k_proj.lora_linear_layer.{k}"] = v + + for k, v in module.v_proj.lora_linear_layer.state_dict().items(): + state_dict[f"{name}.v_proj.lora_linear_layer.{k}"] = v + + for k, v in module.out_proj.lora_linear_layer.state_dict().items(): + state_dict[f"{name}.out_proj.lora_linear_layer.{k}"] = v + + return state_dict + + +if is_transformers_available(): + + def text_encoder_attn_modules(text_encoder): + deprecate( + "text_encoder_attn_modules in `models`", + "0.27.0", + "`text_encoder_lora_state_dict` is deprecated and will be removed in 0.27.0. Make sure to retrieve the weights using `get_peft_model`. See https://huggingface.co/docs/peft/v0.6.2/en/quicktour#peftmodel for more information.", + ) + from transformers import CLIPTextModel, CLIPTextModelWithProjection + + attn_modules = [] + + if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)): + for i, layer in enumerate(text_encoder.text_model.encoder.layers): + name = f"text_model.encoder.layers.{i}.self_attn" + mod = layer.self_attn + attn_modules.append((name, mod)) + else: + raise ValueError(f"do not know how to get attention modules for: {text_encoder.__class__.__name__}") + + return attn_modules + + +_import_structure = {} + +if is_torch_available(): + _import_structure["single_file_model"] = ["FromOriginalModelMixin"] + _import_structure["transformer_flux"] = ["FluxTransformer2DLoadersMixin"] + _import_structure["transformer_sd3"] = ["SD3Transformer2DLoadersMixin"] + _import_structure["unet"] = ["UNet2DConditionLoadersMixin"] + _import_structure["utils"] = ["AttnProcsLayers"] + if is_transformers_available(): + _import_structure["single_file"] = ["FromSingleFileMixin"] + _import_structure["lora_pipeline"] = [ + "AmusedLoraLoaderMixin", + "StableDiffusionLoraLoaderMixin", + "SD3LoraLoaderMixin", + "AuraFlowLoraLoaderMixin", + "StableDiffusionXLLoraLoaderMixin", + "LTX2LoraLoaderMixin", + "LTXVideoLoraLoaderMixin", + "LoraLoaderMixin", + "FluxLoraLoaderMixin", + "CogVideoXLoraLoaderMixin", + "CogView4LoraLoaderMixin", + "Mochi1LoraLoaderMixin", + "HunyuanVideoLoraLoaderMixin", + "SanaLoraLoaderMixin", + "Lumina2LoraLoaderMixin", + "WanLoraLoaderMixin", + "HeliosLoraLoaderMixin", + "KandinskyLoraLoaderMixin", + "HiDreamImageLoraLoaderMixin", + "SkyReelsV2LoraLoaderMixin", + "QwenImageLoraLoaderMixin", + "ZImageLoraLoaderMixin", + "Flux2LoraLoaderMixin", + ] + _import_structure["textual_inversion"] = ["TextualInversionLoaderMixin"] + _import_structure["ip_adapter"] = [ + "IPAdapterMixin", + "FluxIPAdapterMixin", + "SD3IPAdapterMixin", + "ModularIPAdapterMixin", + ] + +_import_structure["peft"] = ["PeftAdapterMixin"] + + +if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: + if is_torch_available(): + from .single_file_model import FromOriginalModelMixin + from .transformer_flux import FluxTransformer2DLoadersMixin + from .transformer_sd3 import SD3Transformer2DLoadersMixin + from .unet import UNet2DConditionLoadersMixin + from .utils import AttnProcsLayers + + if is_transformers_available(): + from .ip_adapter import ( + FluxIPAdapterMixin, + IPAdapterMixin, + ModularIPAdapterMixin, + SD3IPAdapterMixin, + ) + from .lora_pipeline import ( + AmusedLoraLoaderMixin, + AuraFlowLoraLoaderMixin, + CogVideoXLoraLoaderMixin, + CogView4LoraLoaderMixin, + Flux2LoraLoaderMixin, + FluxLoraLoaderMixin, + HeliosLoraLoaderMixin, + HiDreamImageLoraLoaderMixin, + HunyuanVideoLoraLoaderMixin, + KandinskyLoraLoaderMixin, + LoraLoaderMixin, + LTX2LoraLoaderMixin, + LTXVideoLoraLoaderMixin, + Lumina2LoraLoaderMixin, + Mochi1LoraLoaderMixin, + QwenImageLoraLoaderMixin, + SanaLoraLoaderMixin, + SD3LoraLoaderMixin, + SkyReelsV2LoraLoaderMixin, + StableDiffusionLoraLoaderMixin, + StableDiffusionXLLoraLoaderMixin, + WanLoraLoaderMixin, + ZImageLoraLoaderMixin, + ) + from .single_file import FromSingleFileMixin + from .textual_inversion import TextualInversionLoaderMixin + + from .peft import PeftAdapterMixin +else: + import sys + + sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f410bfa96bac73a209759218d7e92a138aa4aa4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/ip_adapter.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/ip_adapter.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..078553365a967259e6a45ec3f8cb3d3bd589cf0e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/ip_adapter.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/lora_base.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/lora_base.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49a6dd49d58e4bd4b0e74b71f70d770d618a0cdb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/lora_base.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/peft.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/peft.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f16c0364bc26418540e4b44f7daba5938216bba3 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/peft.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/single_file.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/single_file.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a393cd56f6b1808412c74b4240bb3703e0593b2 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/single_file.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/single_file_model.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/single_file_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ca1c7868551d97030862a6a78796397db0ddd1e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/single_file_model.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/textual_inversion.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/textual_inversion.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b80063879d1d9e3f77a9fddaa67b8aa2f1dc4cb9 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/textual_inversion.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/transformer_flux.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/transformer_flux.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f1e56702d3de65ef502a7ea34f0bc5eae1b9ac4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/transformer_flux.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/transformer_sd3.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/transformer_sd3.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9875bf705f87eabef4cb23eb6e9b3a4582bcd663 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/transformer_sd3.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/unet.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/unet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..93a443b54d2d6393335c49105eed749bea858dee Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/unet.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/unet_loader_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/unet_loader_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85bdb7dd49063a0b1f9bb62122ca1f470a265c25 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/unet_loader_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..508a331a140d838c5d4f402fda7d4e3948d57443 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/__pycache__/utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/ip_adapter.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/ip_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..13bb44e4a2a6af41c74e1a908f6603e946a065bd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/ip_adapter.py @@ -0,0 +1,1121 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +from typing import List, Union + +import torch +import torch.nn.functional as F +from huggingface_hub.utils import validate_hf_hub_args +from safetensors import safe_open + +from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_state_dict +from ..utils import ( + USE_PEFT_BACKEND, + _get_detailed_type, + _get_model_file, + _is_valid_type, + is_accelerate_available, + is_torch_version, + is_transformers_available, + logging, +) +from .unet_loader_utils import _maybe_expand_lora_scales + + +if is_transformers_available(): + from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, SiglipImageProcessor, SiglipVisionModel + +from ..models.attention_processor import ( + AttnProcessor, + AttnProcessor2_0, + IPAdapterAttnProcessor, + IPAdapterAttnProcessor2_0, + IPAdapterXFormersAttnProcessor, + JointAttnProcessor2_0, + SD3IPAdapterJointAttnProcessor2_0, +) + + +logger = logging.get_logger(__name__) + + +class IPAdapterMixin: + """Mixin for handling IP Adapters.""" + + @validate_hf_hub_args + def load_ip_adapter( + self, + pretrained_model_name_or_path_or_dict: str | list[str] | dict[str, torch.Tensor], + subfolder: str | list[str], + weight_name: str | list[str], + image_encoder_folder: str | None = "image_encoder", + **kwargs, + ): + """ + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `list[str]` or `os.PathLike` or `list[os.PathLike]` or `dict` or `list[dict]`): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + subfolder (`str` or `list[str]`): + The subfolder location of a model file within a larger model repository on the Hub or locally. If a + list is passed, it should have the same length as `weight_name`. + weight_name (`str` or `list[str]`): + The name of the weight file to load. If a list is passed, it should have the same length as + `subfolder`. + image_encoder_folder (`str`, *optional*, defaults to `image_encoder`): + The subfolder location of the image encoder within a larger model repository on the Hub or locally. + Pass `None` to not load the image encoder. If the image encoder is located in a folder inside + `subfolder`, you only need to pass the name of the folder that contains image encoder weights, e.g. + `image_encoder_folder="image_encoder"`. If the image encoder is located in a folder other than + `subfolder`, you should pass the path to the folder that contains image encoder weights, for example, + `image_encoder_folder="different_subfolder/image_encoder"`. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + """ + + # handle the list inputs for multiple IP Adapters + if not isinstance(weight_name, list): + weight_name = [weight_name] + + if not isinstance(pretrained_model_name_or_path_or_dict, list): + pretrained_model_name_or_path_or_dict = [pretrained_model_name_or_path_or_dict] + if len(pretrained_model_name_or_path_or_dict) == 1: + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict * len(weight_name) + + if not isinstance(subfolder, list): + subfolder = [subfolder] + if len(subfolder) == 1: + subfolder = subfolder * len(weight_name) + + if len(weight_name) != len(pretrained_model_name_or_path_or_dict): + raise ValueError("`weight_name` and `pretrained_model_name_or_path_or_dict` must have the same length.") + + if len(weight_name) != len(subfolder): + raise ValueError("`weight_name` and `subfolder` must have the same length.") + + # Load the main state dict first. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + + if low_cpu_mem_usage and not is_accelerate_available(): + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + state_dicts = [] + for pretrained_model_name_or_path_or_dict, weight_name, subfolder in zip( + pretrained_model_name_or_path_or_dict, weight_name, subfolder + ): + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + model_file = _get_model_file( + pretrained_model_name_or_path_or_dict, + weights_name=weight_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + if weight_name.endswith(".safetensors"): + state_dict = {"image_proj": {}, "ip_adapter": {}} + with safe_open(model_file, framework="pt", device="cpu") as f: + for key in f.keys(): + if key.startswith("image_proj."): + state_dict["image_proj"][key.replace("image_proj.", "")] = f.get_tensor(key) + elif key.startswith("ip_adapter."): + state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = f.get_tensor(key) + else: + state_dict = load_state_dict(model_file) + else: + state_dict = pretrained_model_name_or_path_or_dict + + keys = list(state_dict.keys()) + if "image_proj" not in keys and "ip_adapter" not in keys: + raise ValueError("Required keys are (`image_proj` and `ip_adapter`) missing from the state dict.") + + state_dicts.append(state_dict) + + # load CLIP image encoder here if it has not been registered to the pipeline yet + if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is None: + if image_encoder_folder is not None: + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + logger.info(f"loading image_encoder from {pretrained_model_name_or_path_or_dict}") + if image_encoder_folder.count("/") == 0: + image_encoder_subfolder = Path(subfolder, image_encoder_folder).as_posix() + else: + image_encoder_subfolder = Path(image_encoder_folder).as_posix() + + image_encoder = CLIPVisionModelWithProjection.from_pretrained( + pretrained_model_name_or_path_or_dict, + subfolder=image_encoder_subfolder, + low_cpu_mem_usage=low_cpu_mem_usage, + cache_dir=cache_dir, + local_files_only=local_files_only, + torch_dtype=self.dtype, + ).to(self.device) + self.register_modules(image_encoder=image_encoder) + else: + raise ValueError( + "`image_encoder` cannot be loaded because `pretrained_model_name_or_path_or_dict` is a state dict." + ) + else: + logger.warning( + "image_encoder is not loaded since `image_encoder_folder=None` passed. You will not be able to use `ip_adapter_image` when calling the pipeline with IP-Adapter." + "Use `ip_adapter_image_embeds` to pass pre-generated image embedding instead." + ) + + # create feature extractor if it has not been registered to the pipeline yet + if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is None: + # FaceID IP adapters don't need the image encoder so it's not present, in this case we default to 224 + default_clip_size = 224 + clip_image_size = ( + self.image_encoder.config.image_size if self.image_encoder is not None else default_clip_size + ) + feature_extractor = CLIPImageProcessor(size=clip_image_size, crop_size=clip_image_size) + self.register_modules(feature_extractor=feature_extractor) + + # load ip-adapter into unet + unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet + unet._load_ip_adapter_weights(state_dicts, low_cpu_mem_usage=low_cpu_mem_usage) + + extra_loras = unet._load_ip_adapter_loras(state_dicts) + if extra_loras != {}: + if not USE_PEFT_BACKEND: + logger.warning("PEFT backend is required to load these weights.") + else: + # apply the IP Adapter Face ID LoRA weights + peft_config = getattr(unet, "peft_config", {}) + for k, lora in extra_loras.items(): + if f"faceid_{k}" not in peft_config: + self.load_lora_weights(lora, adapter_name=f"faceid_{k}") + self.set_adapters([f"faceid_{k}"], adapter_weights=[1.0]) + + def set_ip_adapter_scale(self, scale): + """ + Set IP-Adapter scales per-transformer block. Input `scale` could be a single config or a list of configs for + granular control over each IP-Adapter behavior. A config can be a float or a dictionary. + + Example: + + ```py + # To use original IP-Adapter + scale = 1.0 + pipeline.set_ip_adapter_scale(scale) + + # To use style block only + scale = { + "up": {"block_0": [0.0, 1.0, 0.0]}, + } + pipeline.set_ip_adapter_scale(scale) + + # To use style+layout blocks + scale = { + "down": {"block_2": [0.0, 1.0]}, + "up": {"block_0": [0.0, 1.0, 0.0]}, + } + pipeline.set_ip_adapter_scale(scale) + + # To use style and layout from 2 reference images + scales = [{"down": {"block_2": [0.0, 1.0]}}, {"up": {"block_0": [0.0, 1.0, 0.0]}}] + pipeline.set_ip_adapter_scale(scales) + ``` + """ + unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet + if not isinstance(scale, list): + scale = [scale] + scale_configs = _maybe_expand_lora_scales(unet, scale, default_scale=0.0) + + for attn_name, attn_processor in unet.attn_processors.items(): + if isinstance( + attn_processor, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0, IPAdapterXFormersAttnProcessor) + ): + if len(scale_configs) != len(attn_processor.scale): + raise ValueError( + f"Cannot assign {len(scale_configs)} scale_configs to {len(attn_processor.scale)} IP-Adapter." + ) + elif len(scale_configs) == 1: + scale_configs = scale_configs * len(attn_processor.scale) + for i, scale_config in enumerate(scale_configs): + if isinstance(scale_config, dict): + for k, s in scale_config.items(): + if attn_name.startswith(k): + attn_processor.scale[i] = s + else: + attn_processor.scale[i] = scale_config + + def unload_ip_adapter(self): + """ + Unloads the IP Adapter weights + + Examples: + + ```python + >>> # Assuming `pipeline` is already loaded with the IP Adapter weights. + >>> pipeline.unload_ip_adapter() + >>> ... + ``` + """ + # remove CLIP image encoder + if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is not None: + self.image_encoder = None + self.register_to_config(image_encoder=[None, None]) + + # remove feature extractor only when safety_checker is None as safety_checker uses + # the feature_extractor later + if not hasattr(self, "safety_checker"): + if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is not None: + self.feature_extractor = None + self.register_to_config(feature_extractor=[None, None]) + + # remove hidden encoder + self.unet.encoder_hid_proj = None + self.unet.config.encoder_hid_dim_type = None + + # Kolors: restore `encoder_hid_proj` with `text_encoder_hid_proj` + if hasattr(self.unet, "text_encoder_hid_proj") and self.unet.text_encoder_hid_proj is not None: + self.unet.encoder_hid_proj = self.unet.text_encoder_hid_proj + self.unet.text_encoder_hid_proj = None + self.unet.config.encoder_hid_dim_type = "text_proj" + + # restore original Unet attention processors layers + attn_procs = {} + for name, value in self.unet.attn_processors.items(): + attn_processor_class = ( + AttnProcessor2_0() if hasattr(F, "scaled_dot_product_attention") else AttnProcessor() + ) + attn_procs[name] = ( + attn_processor_class + if isinstance( + value, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0, IPAdapterXFormersAttnProcessor) + ) + else value.__class__() + ) + self.unet.set_attn_processor(attn_procs) + + +class ModularIPAdapterMixin: + """Mixin for handling IP Adapters.""" + + @validate_hf_hub_args + def load_ip_adapter( + self, + pretrained_model_name_or_path_or_dict: str | list[str] | dict[str, torch.Tensor], + subfolder: str | list[str], + weight_name: str | list[str], + **kwargs, + ): + """ + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `list[str]` or `os.PathLike` or `list[os.PathLike]` or `dict` or `list[dict]`): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + subfolder (`str` or `list[str]`): + The subfolder location of a model file within a larger model repository on the Hub or locally. If a + list is passed, it should have the same length as `weight_name`. + weight_name (`str` or `list[str]`): + The name of the weight file to load. If a list is passed, it should have the same length as + `subfolder`. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + """ + + # handle the list inputs for multiple IP Adapters + if not isinstance(weight_name, list): + weight_name = [weight_name] + + if not isinstance(pretrained_model_name_or_path_or_dict, list): + pretrained_model_name_or_path_or_dict = [pretrained_model_name_or_path_or_dict] + if len(pretrained_model_name_or_path_or_dict) == 1: + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict * len(weight_name) + + if not isinstance(subfolder, list): + subfolder = [subfolder] + if len(subfolder) == 1: + subfolder = subfolder * len(weight_name) + + if len(weight_name) != len(pretrained_model_name_or_path_or_dict): + raise ValueError("`weight_name` and `pretrained_model_name_or_path_or_dict` must have the same length.") + + if len(weight_name) != len(subfolder): + raise ValueError("`weight_name` and `subfolder` must have the same length.") + + # Load the main state dict first. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + + if low_cpu_mem_usage and not is_accelerate_available(): + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + user_agent = { + "file_type": "attn_procs_weights", + "framework": "pytorch", + } + state_dicts = [] + for pretrained_model_name_or_path_or_dict, weight_name, subfolder in zip( + pretrained_model_name_or_path_or_dict, weight_name, subfolder + ): + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + model_file = _get_model_file( + pretrained_model_name_or_path_or_dict, + weights_name=weight_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + if weight_name.endswith(".safetensors"): + state_dict = {"image_proj": {}, "ip_adapter": {}} + with safe_open(model_file, framework="pt", device="cpu") as f: + for key in f.keys(): + if key.startswith("image_proj."): + state_dict["image_proj"][key.replace("image_proj.", "")] = f.get_tensor(key) + elif key.startswith("ip_adapter."): + state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = f.get_tensor(key) + else: + state_dict = load_state_dict(model_file) + else: + state_dict = pretrained_model_name_or_path_or_dict + + keys = list(state_dict.keys()) + if "image_proj" not in keys and "ip_adapter" not in keys: + raise ValueError("Required keys are (`image_proj` and `ip_adapter`) missing from the state dict.") + + state_dicts.append(state_dict) + + unet_name = getattr(self, "unet_name", "unet") + unet = getattr(self, unet_name) + unet._load_ip_adapter_weights(state_dicts, low_cpu_mem_usage=low_cpu_mem_usage) + + extra_loras = unet._load_ip_adapter_loras(state_dicts) + if extra_loras != {}: + if not USE_PEFT_BACKEND: + logger.warning("PEFT backend is required to load these weights.") + else: + # apply the IP Adapter Face ID LoRA weights + peft_config = getattr(unet, "peft_config", {}) + for k, lora in extra_loras.items(): + if f"faceid_{k}" not in peft_config: + self.load_lora_weights(lora, adapter_name=f"faceid_{k}") + self.set_adapters([f"faceid_{k}"], adapter_weights=[1.0]) + + def set_ip_adapter_scale(self, scale): + """ + Set IP-Adapter scales per-transformer block. Input `scale` could be a single config or a list of configs for + granular control over each IP-Adapter behavior. A config can be a float or a dictionary. + + Example: + + ```py + # To use original IP-Adapter + scale = 1.0 + pipeline.set_ip_adapter_scale(scale) + + # To use style block only + scale = { + "up": {"block_0": [0.0, 1.0, 0.0]}, + } + pipeline.set_ip_adapter_scale(scale) + + # To use style+layout blocks + scale = { + "down": {"block_2": [0.0, 1.0]}, + "up": {"block_0": [0.0, 1.0, 0.0]}, + } + pipeline.set_ip_adapter_scale(scale) + + # To use style and layout from 2 reference images + scales = [{"down": {"block_2": [0.0, 1.0]}}, {"up": {"block_0": [0.0, 1.0, 0.0]}}] + pipeline.set_ip_adapter_scale(scales) + ``` + """ + unet_name = getattr(self, "unet_name", "unet") + unet = getattr(self, unet_name) + if not isinstance(scale, list): + scale = [scale] + scale_configs = _maybe_expand_lora_scales(unet, scale, default_scale=0.0) + + for attn_name, attn_processor in unet.attn_processors.items(): + if isinstance( + attn_processor, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0, IPAdapterXFormersAttnProcessor) + ): + if len(scale_configs) != len(attn_processor.scale): + raise ValueError( + f"Cannot assign {len(scale_configs)} scale_configs to {len(attn_processor.scale)} IP-Adapter." + ) + elif len(scale_configs) == 1: + scale_configs = scale_configs * len(attn_processor.scale) + for i, scale_config in enumerate(scale_configs): + if isinstance(scale_config, dict): + for k, s in scale_config.items(): + if attn_name.startswith(k): + attn_processor.scale[i] = s + else: + attn_processor.scale[i] = scale_config + + def unload_ip_adapter(self): + """ + Unloads the IP Adapter weights + + Examples: + + ```python + >>> # Assuming `pipeline` is already loaded with the IP Adapter weights. + >>> pipeline.unload_ip_adapter() + >>> ... + ``` + """ + + # remove hidden encoder + if self.unet is None: + return + + self.unet.encoder_hid_proj = None + self.unet.config.encoder_hid_dim_type = None + + # Kolors: restore `encoder_hid_proj` with `text_encoder_hid_proj` + if hasattr(self.unet, "text_encoder_hid_proj") and self.unet.text_encoder_hid_proj is not None: + self.unet.encoder_hid_proj = self.unet.text_encoder_hid_proj + self.unet.text_encoder_hid_proj = None + self.unet.config.encoder_hid_dim_type = "text_proj" + + # restore original Unet attention processors layers + attn_procs = {} + for name, value in self.unet.attn_processors.items(): + attn_processor_class = ( + AttnProcessor2_0() if hasattr(F, "scaled_dot_product_attention") else AttnProcessor() + ) + attn_procs[name] = ( + attn_processor_class + if isinstance( + value, (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0, IPAdapterXFormersAttnProcessor) + ) + else value.__class__() + ) + self.unet.set_attn_processor(attn_procs) + + +class FluxIPAdapterMixin: + """Mixin for handling Flux IP Adapters.""" + + @validate_hf_hub_args + def load_ip_adapter( + self, + pretrained_model_name_or_path_or_dict: str | list[str] | dict[str, torch.Tensor], + weight_name: str | list[str], + subfolder: str | list[str] | None = "", + image_encoder_pretrained_model_name_or_path: str | None = "image_encoder", + image_encoder_subfolder: str | None = "", + image_encoder_dtype: torch.dtype = torch.float16, + **kwargs, + ): + """ + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `list[str]` or `os.PathLike` or `list[os.PathLike]` or `dict` or `list[dict]`): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + subfolder (`str` or `list[str]`): + The subfolder location of a model file within a larger model repository on the Hub or locally. If a + list is passed, it should have the same length as `weight_name`. + weight_name (`str` or `list[str]`): + The name of the weight file to load. If a list is passed, it should have the same length as + `weight_name`. + image_encoder_pretrained_model_name_or_path (`str`, *optional*, defaults to `./image_encoder`): + Can be either: + + - A string, the *model id* (for example `openai/clip-vit-large-patch14`) of a pretrained model + hosted on the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + """ + + # handle the list inputs for multiple IP Adapters + if not isinstance(weight_name, list): + weight_name = [weight_name] + + if not isinstance(pretrained_model_name_or_path_or_dict, list): + pretrained_model_name_or_path_or_dict = [pretrained_model_name_or_path_or_dict] + if len(pretrained_model_name_or_path_or_dict) == 1: + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict * len(weight_name) + + if not isinstance(subfolder, list): + subfolder = [subfolder] + if len(subfolder) == 1: + subfolder = subfolder * len(weight_name) + + if len(weight_name) != len(pretrained_model_name_or_path_or_dict): + raise ValueError("`weight_name` and `pretrained_model_name_or_path_or_dict` must have the same length.") + + if len(weight_name) != len(subfolder): + raise ValueError("`weight_name` and `subfolder` must have the same length.") + + # Load the main state dict first. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + + if low_cpu_mem_usage and not is_accelerate_available(): + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + state_dicts = [] + for pretrained_model_name_or_path_or_dict, weight_name, subfolder in zip( + pretrained_model_name_or_path_or_dict, weight_name, subfolder + ): + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + model_file = _get_model_file( + pretrained_model_name_or_path_or_dict, + weights_name=weight_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + if weight_name.endswith(".safetensors"): + state_dict = {"image_proj": {}, "ip_adapter": {}} + with safe_open(model_file, framework="pt", device="cpu") as f: + image_proj_keys = ["ip_adapter_proj_model.", "image_proj."] + ip_adapter_keys = ["double_blocks.", "ip_adapter."] + for key in f.keys(): + if any(key.startswith(prefix) for prefix in image_proj_keys): + diffusers_name = ".".join(key.split(".")[1:]) + state_dict["image_proj"][diffusers_name] = f.get_tensor(key) + elif any(key.startswith(prefix) for prefix in ip_adapter_keys): + diffusers_name = ( + ".".join(key.split(".")[1:]) + .replace("ip_adapter_double_stream_k_proj", "to_k_ip") + .replace("ip_adapter_double_stream_v_proj", "to_v_ip") + .replace("processor.", "") + ) + state_dict["ip_adapter"][diffusers_name] = f.get_tensor(key) + else: + state_dict = load_state_dict(model_file) + else: + state_dict = pretrained_model_name_or_path_or_dict + + keys = list(state_dict.keys()) + if keys != ["image_proj", "ip_adapter"]: + raise ValueError("Required keys are (`image_proj` and `ip_adapter`) missing from the state dict.") + + state_dicts.append(state_dict) + + # load CLIP image encoder here if it has not been registered to the pipeline yet + if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is None: + if image_encoder_pretrained_model_name_or_path is not None: + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + logger.info(f"loading image_encoder from {image_encoder_pretrained_model_name_or_path}") + image_encoder = ( + CLIPVisionModelWithProjection.from_pretrained( + image_encoder_pretrained_model_name_or_path, + subfolder=image_encoder_subfolder, + low_cpu_mem_usage=low_cpu_mem_usage, + cache_dir=cache_dir, + local_files_only=local_files_only, + torch_dtype=image_encoder_dtype, + ) + .to(self.device) + .eval() + ) + self.register_modules(image_encoder=image_encoder) + else: + raise ValueError( + "`image_encoder` cannot be loaded because `pretrained_model_name_or_path_or_dict` is a state dict." + ) + else: + logger.warning( + "image_encoder is not loaded since `image_encoder_folder=None` passed. You will not be able to use `ip_adapter_image` when calling the pipeline with IP-Adapter." + "Use `ip_adapter_image_embeds` to pass pre-generated image embedding instead." + ) + + # create feature extractor if it has not been registered to the pipeline yet + if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is None: + # FaceID IP adapters don't need the image encoder so it's not present, in this case we default to 224 + default_clip_size = 224 + clip_image_size = ( + self.image_encoder.config.image_size if self.image_encoder is not None else default_clip_size + ) + feature_extractor = CLIPImageProcessor(size=clip_image_size, crop_size=clip_image_size) + self.register_modules(feature_extractor=feature_extractor) + + # load ip-adapter into transformer + self.transformer._load_ip_adapter_weights(state_dicts, low_cpu_mem_usage=low_cpu_mem_usage) + + def set_ip_adapter_scale(self, scale: float | list[float] | list[list[float]]): + """ + Set IP-Adapter scales per-transformer block. Input `scale` could be a single config or a list of configs for + granular control over each IP-Adapter behavior. A config can be a float or a list. + + `float` is converted to list and repeated for the number of blocks and the number of IP adapters. `list[float]` + length match the number of blocks, it is repeated for each IP adapter. `list[list[float]]` must match the + number of IP adapters and each must match the number of blocks. + + Example: + + ```py + # To use original IP-Adapter + scale = 1.0 + pipeline.set_ip_adapter_scale(scale) + + + def LinearStrengthModel(start, finish, size): + return [(start + (finish - start) * (i / (size - 1))) for i in range(size)] + + + ip_strengths = LinearStrengthModel(0.3, 0.92, 19) + pipeline.set_ip_adapter_scale(ip_strengths) + ``` + """ + + scale_type = Union[int, float] + num_ip_adapters = self.transformer.encoder_hid_proj.num_ip_adapters + num_layers = self.transformer.config.num_layers + + # Single value for all layers of all IP-Adapters + if isinstance(scale, scale_type): + scale = [scale for _ in range(num_ip_adapters)] + # List of per-layer scales for a single IP-Adapter + elif _is_valid_type(scale, List[scale_type]) and num_ip_adapters == 1: + scale = [scale] + # Invalid scale type + elif not _is_valid_type(scale, List[Union[scale_type, List[scale_type]]]): + raise TypeError(f"Unexpected type {_get_detailed_type(scale)} for scale.") + + if len(scale) != num_ip_adapters: + raise ValueError(f"Cannot assign {len(scale)} scales to {num_ip_adapters} IP-Adapters.") + + if any(len(s) != num_layers for s in scale if isinstance(s, list)): + invalid_scale_sizes = {len(s) for s in scale if isinstance(s, list)} - {num_layers} + raise ValueError( + f"Expected list of {num_layers} scales, got {', '.join(str(x) for x in invalid_scale_sizes)}." + ) + + # Scalars are transformed to lists with length num_layers + scale_configs = [[s] * num_layers if isinstance(s, scale_type) else s for s in scale] + + # Set scales. zip over scale_configs prevents going into single transformer layers + for attn_processor, *scale in zip(self.transformer.attn_processors.values(), *scale_configs): + attn_processor.scale = scale + + def unload_ip_adapter(self): + """ + Unloads the IP Adapter weights + + Examples: + + ```python + >>> # Assuming `pipeline` is already loaded with the IP Adapter weights. + >>> pipeline.unload_ip_adapter() + >>> ... + ``` + """ + # TODO: once the 1.0.0 deprecations are in, we can move the imports to top-level + from ..models.transformers.transformer_flux import FluxAttnProcessor, FluxIPAdapterAttnProcessor + + # remove CLIP image encoder + if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is not None: + self.image_encoder = None + self.register_to_config(image_encoder=[None, None]) + + # remove feature extractor only when safety_checker is None as safety_checker uses + # the feature_extractor later + if not hasattr(self, "safety_checker"): + if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is not None: + self.feature_extractor = None + self.register_to_config(feature_extractor=[None, None]) + + # remove hidden encoder + self.transformer.encoder_hid_proj = None + self.transformer.config.encoder_hid_dim_type = None + + # restore original Transformer attention processors layers + attn_procs = {} + for name, value in self.transformer.attn_processors.items(): + attn_processor_class = FluxAttnProcessor() + attn_procs[name] = ( + attn_processor_class if isinstance(value, FluxIPAdapterAttnProcessor) else value.__class__() + ) + self.transformer.set_attn_processor(attn_procs) + + +class SD3IPAdapterMixin: + """Mixin for handling StableDiffusion 3 IP Adapters.""" + + @property + def is_ip_adapter_active(self) -> bool: + """Checks if IP-Adapter is loaded and scale > 0. + + IP-Adapter scale controls the influence of the image prompt versus text prompt. When this value is set to 0, + the image context is irrelevant. + + Returns: + `bool`: True when IP-Adapter is loaded and any layer has scale > 0. + """ + scales = [ + attn_proc.scale + for attn_proc in self.transformer.attn_processors.values() + if isinstance(attn_proc, SD3IPAdapterJointAttnProcessor2_0) + ] + + return len(scales) > 0 and any(scale > 0 for scale in scales) + + @validate_hf_hub_args + def load_ip_adapter( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + weight_name: str = "ip-adapter.safetensors", + subfolder: str | None = None, + image_encoder_folder: str | None = "image_encoder", + **kwargs, + ) -> None: + """ + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): + Can be either: + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + weight_name (`str`, defaults to "ip-adapter.safetensors"): + The name of the weight file to load. If a list is passed, it should have the same length as + `subfolder`. + subfolder (`str`, *optional*): + The subfolder location of a model file within a larger model repository on the Hub or locally. If a + list is passed, it should have the same length as `weight_name`. + image_encoder_folder (`str`, *optional*, defaults to `image_encoder`): + The subfolder location of the image encoder within a larger model repository on the Hub or locally. + Pass `None` to not load the image encoder. If the image encoder is located in a folder inside + `subfolder`, you only need to pass the name of the folder that contains image encoder weights, e.g. + `image_encoder_folder="image_encoder"`. If the image encoder is located in a folder other than + `subfolder`, you should pass the path to the folder that contains image encoder weights, for example, + `image_encoder_folder="different_subfolder/image_encoder"`. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + """ + # Load the main state dict first + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + + if low_cpu_mem_usage and not is_accelerate_available(): + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + model_file = _get_model_file( + pretrained_model_name_or_path_or_dict, + weights_name=weight_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + if weight_name.endswith(".safetensors"): + state_dict = {"image_proj": {}, "ip_adapter": {}} + with safe_open(model_file, framework="pt", device="cpu") as f: + for key in f.keys(): + if key.startswith("image_proj."): + state_dict["image_proj"][key.replace("image_proj.", "")] = f.get_tensor(key) + elif key.startswith("ip_adapter."): + state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = f.get_tensor(key) + else: + state_dict = load_state_dict(model_file) + else: + state_dict = pretrained_model_name_or_path_or_dict + + keys = list(state_dict.keys()) + if "image_proj" not in keys and "ip_adapter" not in keys: + raise ValueError("Required keys are (`image_proj` and `ip_adapter`) missing from the state dict.") + + # Load image_encoder and feature_extractor here if they haven't been registered to the pipeline yet + if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is None: + if image_encoder_folder is not None: + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + logger.info(f"loading image_encoder from {pretrained_model_name_or_path_or_dict}") + if image_encoder_folder.count("/") == 0: + image_encoder_subfolder = Path(subfolder, image_encoder_folder).as_posix() + else: + image_encoder_subfolder = Path(image_encoder_folder).as_posix() + + # Commons args for loading image encoder and image processor + kwargs = { + "low_cpu_mem_usage": low_cpu_mem_usage, + "cache_dir": cache_dir, + "local_files_only": local_files_only, + } + + self.register_modules( + feature_extractor=SiglipImageProcessor.from_pretrained(image_encoder_subfolder, **kwargs), + image_encoder=SiglipVisionModel.from_pretrained( + image_encoder_subfolder, torch_dtype=self.dtype, **kwargs + ).to(self.device), + ) + else: + raise ValueError( + "`image_encoder` cannot be loaded because `pretrained_model_name_or_path_or_dict` is a state dict." + ) + else: + logger.warning( + "image_encoder is not loaded since `image_encoder_folder=None` passed. You will not be able to use `ip_adapter_image` when calling the pipeline with IP-Adapter." + "Use `ip_adapter_image_embeds` to pass pre-generated image embedding instead." + ) + + # Load IP-Adapter into transformer + self.transformer._load_ip_adapter_weights(state_dict, low_cpu_mem_usage=low_cpu_mem_usage) + + def set_ip_adapter_scale(self, scale: float) -> None: + """ + Set IP-Adapter scale, which controls image prompt conditioning. A value of 1.0 means the model is only + conditioned on the image prompt, and 0.0 only conditioned by the text prompt. Lowering this value encourages + the model to produce more diverse images, but they may not be as aligned with the image prompt. + + Example: + + ```python + >>> # Assuming `pipeline` is already loaded with the IP Adapter weights. + >>> pipeline.set_ip_adapter_scale(0.6) + >>> ... + ``` + + Args: + scale (float): + IP-Adapter scale to be set. + + """ + for attn_processor in self.transformer.attn_processors.values(): + if isinstance(attn_processor, SD3IPAdapterJointAttnProcessor2_0): + attn_processor.scale = scale + + def unload_ip_adapter(self) -> None: + """ + Unloads the IP Adapter weights. + + Example: + + ```python + >>> # Assuming `pipeline` is already loaded with the IP Adapter weights. + >>> pipeline.unload_ip_adapter() + >>> ... + ``` + """ + # Remove image encoder + if hasattr(self, "image_encoder") and getattr(self, "image_encoder", None) is not None: + self.image_encoder = None + self.register_to_config(image_encoder=None) + + # Remove feature extractor + if hasattr(self, "feature_extractor") and getattr(self, "feature_extractor", None) is not None: + self.feature_extractor = None + self.register_to_config(feature_extractor=None) + + # Remove image projection + self.transformer.image_proj = None + + # Restore original attention processors layers + attn_procs = { + name: ( + JointAttnProcessor2_0() if isinstance(value, SD3IPAdapterJointAttnProcessor2_0) else value.__class__() + ) + for name, value in self.transformer.attn_processors.items() + } + self.transformer.set_attn_processor(attn_procs) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_base.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_base.py new file mode 100644 index 0000000000000000000000000000000000000000..5b5579664b559ad478adab1e9cf9dee8c2336dc4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_base.py @@ -0,0 +1,1097 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import copy +import inspect +import json +import os +from pathlib import Path +from typing import Callable + +import safetensors +import torch +import torch.nn as nn +from huggingface_hub import model_info +from huggingface_hub.constants import HF_HUB_OFFLINE + +from ..models.modeling_utils import ModelMixin, load_state_dict +from ..utils import ( + USE_PEFT_BACKEND, + _get_model_file, + convert_state_dict_to_diffusers, + convert_state_dict_to_peft, + delete_adapter_layers, + deprecate, + get_adapter_name, + is_accelerate_available, + is_peft_available, + is_peft_version, + is_transformers_available, + is_transformers_version, + logging, + recurse_remove_peft_layers, + scale_lora_layers, + set_adapter_layers, + set_weights_and_activate_adapters, +) +from ..utils.peft_utils import _create_lora_config +from ..utils.state_dict_utils import _load_sft_state_dict_metadata + + +if is_transformers_available(): + from transformers import PreTrainedModel + +if is_peft_available(): + from peft.tuners.tuners_utils import BaseTunerLayer + +if is_accelerate_available(): + from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module + +logger = logging.get_logger(__name__) + +LORA_WEIGHT_NAME = "pytorch_lora_weights.bin" +LORA_WEIGHT_NAME_SAFE = "pytorch_lora_weights.safetensors" +LORA_ADAPTER_METADATA_KEY = "lora_adapter_metadata" + + +def fuse_text_encoder_lora(text_encoder, lora_scale=1.0, safe_fusing=False, adapter_names=None): + """ + Fuses LoRAs for the text encoder. + + Args: + text_encoder (`torch.nn.Module`): + The text encoder module to set the adapter layers for. If `None`, it will try to get the `text_encoder` + attribute. + lora_scale (`float`, defaults to 1.0): + Controls how much to influence the outputs with the LoRA parameters. + safe_fusing (`bool`, defaults to `False`): + Whether to check fused weights for NaN values before fusing and if values are NaN not fusing them. + adapter_names (`list[str]` or `str`): + The names of the adapters to use. + """ + merge_kwargs = {"safe_merge": safe_fusing} + + for module in text_encoder.modules(): + if isinstance(module, BaseTunerLayer): + if lora_scale != 1.0: + module.scale_layer(lora_scale) + + # For BC with previous PEFT versions, we need to check the signature + # of the `merge` method to see if it supports the `adapter_names` argument. + supported_merge_kwargs = list(inspect.signature(module.merge).parameters) + if "adapter_names" in supported_merge_kwargs: + merge_kwargs["adapter_names"] = adapter_names + elif "adapter_names" not in supported_merge_kwargs and adapter_names is not None: + raise ValueError( + "The `adapter_names` argument is not supported with your PEFT version. " + "Please upgrade to the latest version of PEFT. `pip install -U peft`" + ) + + module.merge(**merge_kwargs) + + +def unfuse_text_encoder_lora(text_encoder): + """ + Unfuses LoRAs for the text encoder. + + Args: + text_encoder (`torch.nn.Module`): + The text encoder module to set the adapter layers for. If `None`, it will try to get the `text_encoder` + attribute. + """ + for module in text_encoder.modules(): + if isinstance(module, BaseTunerLayer): + module.unmerge() + + +def set_adapters_for_text_encoder( + adapter_names: list[str] | str, + text_encoder: "PreTrainedModel" | None = None, # noqa: F821 + text_encoder_weights: float | list[float] | list[None] | None = None, +): + """ + Sets the adapter layers for the text encoder. + + Args: + adapter_names (`list[str]` or `str`): + The names of the adapters to use. + text_encoder (`torch.nn.Module`, *optional*): + The text encoder module to set the adapter layers for. If `None`, it will try to get the `text_encoder` + attribute. + text_encoder_weights (`list[float]`, *optional*): + The weights to use for the text encoder. If `None`, the weights are set to `1.0` for all the adapters. + """ + if text_encoder is None: + raise ValueError( + "The pipeline does not have a default `pipe.text_encoder` class. Please make sure to pass a `text_encoder` instead." + ) + + def process_weights(adapter_names, weights): + # Expand weights into a list, one entry per adapter + # e.g. for 2 adapters: 7 -> [7,7] ; [3, None] -> [3, None] + if not isinstance(weights, list): + weights = [weights] * len(adapter_names) + + if len(adapter_names) != len(weights): + raise ValueError( + f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}" + ) + + # Set None values to default of 1.0 + # e.g. [7,7] -> [7,7] ; [3, None] -> [3,1] + weights = [w if w is not None else 1.0 for w in weights] + + return weights + + adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names + text_encoder_weights = process_weights(adapter_names, text_encoder_weights) + set_weights_and_activate_adapters(text_encoder, adapter_names, text_encoder_weights) + + +def disable_lora_for_text_encoder(text_encoder: "PreTrainedModel" | None = None): + """ + Disables the LoRA layers for the text encoder. + + Args: + text_encoder (`torch.nn.Module`, *optional*): + The text encoder module to disable the LoRA layers for. If `None`, it will try to get the `text_encoder` + attribute. + """ + if text_encoder is None: + raise ValueError("Text Encoder not found.") + set_adapter_layers(text_encoder, enabled=False) + + +def enable_lora_for_text_encoder(text_encoder: "PreTrainedModel" | None = None): + """ + Enables the LoRA layers for the text encoder. + + Args: + text_encoder (`torch.nn.Module`, *optional*): + The text encoder module to enable the LoRA layers for. If `None`, it will try to get the `text_encoder` + attribute. + """ + if text_encoder is None: + raise ValueError("Text Encoder not found.") + set_adapter_layers(text_encoder, enabled=True) + + +def _remove_text_encoder_monkey_patch(text_encoder): + recurse_remove_peft_layers(text_encoder) + if getattr(text_encoder, "peft_config", None) is not None: + del text_encoder.peft_config + text_encoder._hf_peft_config_loaded = None + + +def _fetch_state_dict( + pretrained_model_name_or_path_or_dict, + weight_name, + use_safetensors, + local_files_only, + cache_dir, + force_download, + proxies, + token, + revision, + subfolder, + user_agent, + allow_pickle, + metadata=None, +): + model_file = None + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + # Let's first try to load .safetensors weights + if (use_safetensors and weight_name is None) or ( + weight_name is not None and weight_name.endswith(".safetensors") + ): + try: + # Here we're relaxing the loading check to enable more Inference API + # friendliness where sometimes, it's not at all possible to automatically + # determine `weight_name`. + if weight_name is None: + weight_name = _best_guess_weight_name( + pretrained_model_name_or_path_or_dict, + file_extension=".safetensors", + local_files_only=local_files_only, + ) + model_file = _get_model_file( + pretrained_model_name_or_path_or_dict, + weights_name=weight_name or LORA_WEIGHT_NAME_SAFE, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + state_dict = safetensors.torch.load_file(model_file, device="cpu") + metadata = _load_sft_state_dict_metadata(model_file) + + except (IOError, safetensors.SafetensorError) as e: + if not allow_pickle: + raise e + # try loading non-safetensors weights + model_file = None + metadata = None + pass + + if model_file is None: + if weight_name is None: + weight_name = _best_guess_weight_name( + pretrained_model_name_or_path_or_dict, file_extension=".bin", local_files_only=local_files_only + ) + model_file = _get_model_file( + pretrained_model_name_or_path_or_dict, + weights_name=weight_name or LORA_WEIGHT_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + state_dict = load_state_dict(model_file) + metadata = None + else: + state_dict = pretrained_model_name_or_path_or_dict + + return state_dict, metadata + + +def _best_guess_weight_name( + pretrained_model_name_or_path_or_dict, file_extension=".safetensors", local_files_only=False +): + if local_files_only or HF_HUB_OFFLINE: + raise ValueError("When using the offline mode, you must specify a `weight_name`.") + + targeted_files = [] + + if os.path.isfile(pretrained_model_name_or_path_or_dict): + return + elif os.path.isdir(pretrained_model_name_or_path_or_dict): + targeted_files = [f for f in os.listdir(pretrained_model_name_or_path_or_dict) if f.endswith(file_extension)] + else: + files_in_repo = model_info(pretrained_model_name_or_path_or_dict).siblings + targeted_files = [f.rfilename for f in files_in_repo if f.rfilename.endswith(file_extension)] + if len(targeted_files) == 0: + return + + # "scheduler" does not correspond to a LoRA checkpoint. + # "optimizer" does not correspond to a LoRA checkpoint + # only top-level checkpoints are considered and not the other ones, hence "checkpoint". + unallowed_substrings = {"scheduler", "optimizer", "checkpoint"} + targeted_files = list( + filter(lambda x: all(substring not in x for substring in unallowed_substrings), targeted_files) + ) + + if any(f.endswith(LORA_WEIGHT_NAME) for f in targeted_files): + targeted_files = list(filter(lambda x: x.endswith(LORA_WEIGHT_NAME), targeted_files)) + elif any(f.endswith(LORA_WEIGHT_NAME_SAFE) for f in targeted_files): + targeted_files = list(filter(lambda x: x.endswith(LORA_WEIGHT_NAME_SAFE), targeted_files)) + + if len(targeted_files) > 1: + logger.warning( + f"Provided path contains more than one weights file in the {file_extension} format. `{targeted_files[0]}` is going to be loaded, for precise control, specify a `weight_name` in `load_lora_weights`." + ) + weight_name = targeted_files[0] + return weight_name + + +def _pack_dict_with_prefix(state_dict, prefix): + sd_with_prefix = {f"{prefix}.{key}": value for key, value in state_dict.items()} + return sd_with_prefix + + +def _load_lora_into_text_encoder( + state_dict, + network_alphas, + text_encoder, + prefix=None, + lora_scale=1.0, + text_encoder_name="text_encoder", + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, +): + from ..hooks.group_offloading import _maybe_remove_and_reapply_group_offloading + + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + if network_alphas and metadata: + raise ValueError("`network_alphas` and `metadata` cannot be specified both at the same time.") + + peft_kwargs = {} + if low_cpu_mem_usage: + if not is_peft_version(">=", "0.13.1"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + if not is_transformers_version(">", "4.45.2"): + # Note from sayakpaul: It's not in `transformers` stable yet. + # https://github.com/huggingface/transformers/pull/33725/ + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `transformers` version. Please update it with `pip install -U transformers`." + ) + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + + # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), + # then the `state_dict` keys should have `unet_name` and/or `text_encoder_name` as + # their prefixes. + prefix = text_encoder_name if prefix is None else prefix + + # Safe prefix to check with. + if hotswap and any(text_encoder_name in key for key in state_dict.keys()): + raise ValueError("At the moment, hotswapping is not supported for text encoders, please pass `hotswap=False`.") + + # Load the layers corresponding to text encoder and make necessary adjustments. + if prefix is not None: + state_dict = {k.removeprefix(f"{prefix}."): v for k, v in state_dict.items() if k.startswith(f"{prefix}.")} + if metadata is not None: + metadata = {k.removeprefix(f"{prefix}."): v for k, v in metadata.items() if k.startswith(f"{prefix}.")} + + if len(state_dict) > 0: + logger.info(f"Loading {prefix}.") + rank = {} + state_dict = convert_state_dict_to_diffusers(state_dict) + + # convert state dict + state_dict = convert_state_dict_to_peft(state_dict) + + for name, _ in text_encoder.named_modules(): + if name.endswith((".q_proj", ".k_proj", ".v_proj", ".out_proj", ".fc1", ".fc2")): + rank_key = f"{name}.lora_B.weight" + if rank_key in state_dict: + rank[rank_key] = state_dict[rank_key].shape[1] + + if network_alphas is not None: + alpha_keys = [k for k in network_alphas.keys() if k.startswith(prefix) and k.split(".")[0] == prefix] + network_alphas = {k.removeprefix(f"{prefix}."): v for k, v in network_alphas.items() if k in alpha_keys} + + # create `LoraConfig` + lora_config = _create_lora_config(state_dict, network_alphas, metadata, rank, is_unet=False) + + # adapter_name + if adapter_name is None: + adapter_name = get_adapter_name(text_encoder) + + # + + if prefix is not None and not state_dict: + model_class_name = text_encoder.__class__.__name__ + logger.warning( + f"No LoRA keys associated to {model_class_name} found with the {prefix=}. " + "This is safe to ignore if LoRA state dict didn't originally have any " + f"{model_class_name} related params. You can also try specifying `prefix=None` " + "to resolve the warning. Otherwise, open an issue if you think it's unexpected: " + "https://github.com/huggingface/diffusers/issues/new" + ) + + +def _func_optionally_disable_offloading(_pipeline): + """ + Optionally removes offloading in case the pipeline has been already sequentially offloaded to CPU. + + Args: + _pipeline (`DiffusionPipeline`): + The pipeline to disable offloading for. + + Returns: + tuple: + A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` or `is_group_offload` is True. + """ + from ..hooks.group_offloading import _is_group_offload_enabled + + is_model_cpu_offload = False + is_sequential_cpu_offload = False + is_group_offload = False + + if _pipeline is not None and _pipeline.hf_device_map is None: + for _, component in _pipeline.components.items(): + if not isinstance(component, nn.Module): + continue + is_group_offload = is_group_offload or _is_group_offload_enabled(component) + if not hasattr(component, "_hf_hook"): + continue + is_model_cpu_offload = is_model_cpu_offload or isinstance(component._hf_hook, CpuOffload) + is_sequential_cpu_offload = is_sequential_cpu_offload or ( + isinstance(component._hf_hook, AlignDevicesHook) + or hasattr(component._hf_hook, "hooks") + and isinstance(component._hf_hook.hooks[0], AlignDevicesHook) + ) + + if is_sequential_cpu_offload or is_model_cpu_offload: + logger.info( + "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again." + ) + for _, component in _pipeline.components.items(): + if not isinstance(component, nn.Module) or not hasattr(component, "_hf_hook"): + continue + remove_hook_from_module(component, recurse=is_sequential_cpu_offload) + + return (is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload) + + +class LoraBaseMixin: + """Utility class for handling LoRAs.""" + + _lora_loadable_modules = [] + _merged_adapters = set() + + @property + def lora_scale(self) -> float: + """ + Returns the lora scale which can be set at run time by the pipeline. # if `_lora_scale` has not been set, + return 1. + """ + return self._lora_scale if hasattr(self, "_lora_scale") else 1.0 + + @property + def num_fused_loras(self): + """Returns the number of LoRAs that have been fused.""" + return len(self._merged_adapters) + + @property + def fused_loras(self): + """Returns names of the LoRAs that have been fused.""" + return self._merged_adapters + + def load_lora_weights(self, **kwargs): + raise NotImplementedError("`load_lora_weights()` is not implemented.") + + @classmethod + def save_lora_weights(cls, **kwargs): + raise NotImplementedError("`save_lora_weights()` not implemented.") + + @classmethod + def lora_state_dict(cls, **kwargs): + raise NotImplementedError("`lora_state_dict()` is not implemented.") + + def unload_lora_weights(self): + """ + Unloads the LoRA parameters. + + Examples: + + ```python + >>> # Assuming `pipeline` is already loaded with the LoRA parameters. + >>> pipeline.unload_lora_weights() + >>> ... + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + for component in self._lora_loadable_modules: + model = getattr(self, component, None) + if model is not None: + if issubclass(model.__class__, ModelMixin): + model.unload_lora() + elif issubclass(model.__class__, PreTrainedModel): + _remove_text_encoder_monkey_patch(model) + + def fuse_lora( + self, + components: list[str] = [], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + Fuses the LoRA parameters into the original parameters of the corresponding blocks. + + > [!WARNING] > This is an experimental API. + + Args: + components: (`list[str]`): list of LoRA-injectable components to fuse the LoRAs into. + lora_scale (`float`, defaults to 1.0): + Controls how much to influence the outputs with the LoRA parameters. + safe_fusing (`bool`, defaults to `False`): + Whether to check fused weights for NaN values before fusing and if values are NaN not fusing them. + adapter_names (`list[str]`, *optional*): + Adapter names to be used for fusing. If nothing is passed, all active adapters will be fused. + + Example: + + ```py + from diffusers import DiffusionPipeline + import torch + + pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel") + pipeline.fuse_lora(lora_scale=0.7) + ``` + """ + if "fuse_unet" in kwargs: + depr_message = "Passing `fuse_unet` to `fuse_lora()` is deprecated and will be ignored. Please use the `components` argument and provide a list of the components whose LoRAs are to be fused. `fuse_unet` will be removed in a future version." + deprecate( + "fuse_unet", + "1.0.0", + depr_message, + ) + if "fuse_transformer" in kwargs: + depr_message = "Passing `fuse_transformer` to `fuse_lora()` is deprecated and will be ignored. Please use the `components` argument and provide a list of the components whose LoRAs are to be fused. `fuse_transformer` will be removed in a future version." + deprecate( + "fuse_transformer", + "1.0.0", + depr_message, + ) + if "fuse_text_encoder" in kwargs: + depr_message = "Passing `fuse_text_encoder` to `fuse_lora()` is deprecated and will be ignored. Please use the `components` argument and provide a list of the components whose LoRAs are to be fused. `fuse_text_encoder` will be removed in a future version." + deprecate( + "fuse_text_encoder", + "1.0.0", + depr_message, + ) + + if len(components) == 0: + raise ValueError("`components` cannot be an empty list.") + + # Need to retrieve the names as `adapter_names` can be None. So we cannot directly use it + # in `self._merged_adapters = self._merged_adapters | merged_adapter_names`. + merged_adapter_names = set() + for fuse_component in components: + if fuse_component not in self._lora_loadable_modules: + raise ValueError(f"{fuse_component} is not found in {self._lora_loadable_modules=}.") + + model = getattr(self, fuse_component, None) + if model is not None: + # check if diffusers model + if issubclass(model.__class__, ModelMixin): + model.fuse_lora(lora_scale, safe_fusing=safe_fusing, adapter_names=adapter_names) + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + merged_adapter_names.update(set(module.merged_adapters)) + # handle transformers models. + if issubclass(model.__class__, PreTrainedModel): + fuse_text_encoder_lora( + model, lora_scale=lora_scale, safe_fusing=safe_fusing, adapter_names=adapter_names + ) + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + merged_adapter_names.update(set(module.merged_adapters)) + + self._merged_adapters = self._merged_adapters | merged_adapter_names + + def unfuse_lora(self, components: list[str] = [], **kwargs): + r""" + Reverses the effect of + [`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora). + + > [!WARNING] > This is an experimental API. + + Args: + components (`list[str]`): list of LoRA-injectable components to unfuse LoRA from. + unfuse_unet (`bool`, defaults to `True`): Whether to unfuse the UNet LoRA parameters. + unfuse_text_encoder (`bool`, defaults to `True`): + Whether to unfuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the + LoRA parameters then it won't have any effect. + """ + if "unfuse_unet" in kwargs: + depr_message = "Passing `unfuse_unet` to `unfuse_lora()` is deprecated and will be ignored. Please use the `components` argument. `unfuse_unet` will be removed in a future version." + deprecate( + "unfuse_unet", + "1.0.0", + depr_message, + ) + if "unfuse_transformer" in kwargs: + depr_message = "Passing `unfuse_transformer` to `unfuse_lora()` is deprecated and will be ignored. Please use the `components` argument. `unfuse_transformer` will be removed in a future version." + deprecate( + "unfuse_transformer", + "1.0.0", + depr_message, + ) + if "unfuse_text_encoder" in kwargs: + depr_message = "Passing `unfuse_text_encoder` to `unfuse_lora()` is deprecated and will be ignored. Please use the `components` argument. `unfuse_text_encoder` will be removed in a future version." + deprecate( + "unfuse_text_encoder", + "1.0.0", + depr_message, + ) + + if len(components) == 0: + raise ValueError("`components` cannot be an empty list.") + + for fuse_component in components: + if fuse_component not in self._lora_loadable_modules: + raise ValueError(f"{fuse_component} is not found in {self._lora_loadable_modules=}.") + + model = getattr(self, fuse_component, None) + if model is not None: + if issubclass(model.__class__, (ModelMixin, PreTrainedModel)): + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + for adapter in set(module.merged_adapters): + if adapter and adapter in self._merged_adapters: + self._merged_adapters = self._merged_adapters - {adapter} + module.unmerge() + + def set_adapters( + self, + adapter_names: list[str] | str, + adapter_weights: float | dict | list[float] | list[dict] | None = None, + ): + """ + Set the currently active adapters for use in the pipeline. + + Args: + adapter_names (`list[str]` or `str`): + The names of the adapters to use. + adapter_weights (`list[float, float]`, *optional*): + The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all the + adapters. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic" + ) + pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel") + pipeline.set_adapters(["cinematic", "pixel"], adapter_weights=[0.5, 0.5]) + ``` + """ + if isinstance(adapter_weights, dict): + components_passed = set(adapter_weights.keys()) + lora_components = set(self._lora_loadable_modules) + + invalid_components = sorted(components_passed - lora_components) + if invalid_components: + logger.warning( + f"The following components in `adapter_weights` are not part of the pipeline: {invalid_components}. " + f"Available components that are LoRA-compatible: {self._lora_loadable_modules}. So, weights belonging " + "to the invalid components will be removed and ignored." + ) + adapter_weights = {k: v for k, v in adapter_weights.items() if k not in invalid_components} + + adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names + adapter_weights = copy.deepcopy(adapter_weights) + + # Expand weights into a list, one entry per adapter + if not isinstance(adapter_weights, list): + adapter_weights = [adapter_weights] * len(adapter_names) + + if len(adapter_names) != len(adapter_weights): + raise ValueError( + f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(adapter_weights)}" + ) + + list_adapters = self.get_list_adapters() # eg {"unet": ["adapter1", "adapter2"], "text_encoder": ["adapter2"]} + # eg ["adapter1", "adapter2"] + all_adapters = {adapter for adapters in list_adapters.values() for adapter in adapters} + missing_adapters = set(adapter_names) - all_adapters + if len(missing_adapters) > 0: + raise ValueError( + f"Adapter name(s) {missing_adapters} not in the list of present adapters: {all_adapters}." + ) + + # eg {"adapter1": ["unet"], "adapter2": ["unet", "text_encoder"]} + invert_list_adapters = { + adapter: [part for part, adapters in list_adapters.items() if adapter in adapters] + for adapter in all_adapters + } + + # Decompose weights into weights for denoiser and text encoders. + _component_adapter_weights = {} + for component in self._lora_loadable_modules: + model = getattr(self, component, None) + # To guard for cases like Wan. In Wan2.1 and WanVace, we have a single denoiser. + # Whereas in Wan 2.2, we have two denoisers. + if model is None: + continue + + for adapter_name, weights in zip(adapter_names, adapter_weights): + if isinstance(weights, dict): + component_adapter_weights = weights.pop(component, None) + if component_adapter_weights is not None and component not in invert_list_adapters[adapter_name]: + logger.warning( + ( + f"Lora weight dict for adapter '{adapter_name}' contains {component}," + f"but this will be ignored because {adapter_name} does not contain weights for {component}." + f"Valid parts for {adapter_name} are: {invert_list_adapters[adapter_name]}." + ) + ) + + else: + component_adapter_weights = weights + + _component_adapter_weights.setdefault(component, []) + _component_adapter_weights[component].append(component_adapter_weights) + + if issubclass(model.__class__, ModelMixin): + model.set_adapters(adapter_names, _component_adapter_weights[component]) + elif issubclass(model.__class__, PreTrainedModel): + set_adapters_for_text_encoder(adapter_names, model, _component_adapter_weights[component]) + + def disable_lora(self): + """ + Disables the active LoRA layers of the pipeline. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic" + ) + pipeline.disable_lora() + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + for component in self._lora_loadable_modules: + model = getattr(self, component, None) + if model is not None: + if issubclass(model.__class__, ModelMixin): + model.disable_lora() + elif issubclass(model.__class__, PreTrainedModel): + disable_lora_for_text_encoder(model) + + def enable_lora(self): + """ + Enables the active LoRA layers of the pipeline. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic" + ) + pipeline.enable_lora() + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + for component in self._lora_loadable_modules: + model = getattr(self, component, None) + if model is not None: + if issubclass(model.__class__, ModelMixin): + model.enable_lora() + elif issubclass(model.__class__, PreTrainedModel): + enable_lora_for_text_encoder(model) + + def delete_adapters(self, adapter_names: list[str] | str): + """ + Delete an adapter's LoRA layers from the pipeline. + + Args: + adapter_names (`list[str, str]`): + The names of the adapters to delete. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_names="cinematic" + ) + pipeline.delete_adapters("cinematic") + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + if isinstance(adapter_names, str): + adapter_names = [adapter_names] + + for component in self._lora_loadable_modules: + model = getattr(self, component, None) + if model is not None: + if issubclass(model.__class__, ModelMixin): + model.delete_adapters(adapter_names) + elif issubclass(model.__class__, PreTrainedModel): + for adapter_name in adapter_names: + delete_adapter_layers(model, adapter_name) + + def get_active_adapters(self) -> list[str]: + """ + Gets the list of the current active adapters. + + Example: + + ```python + from diffusers import DiffusionPipeline + + pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + ).to("cuda") + pipeline.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy") + pipeline.get_active_adapters() + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError( + "PEFT backend is required for this method. Please install the latest version of PEFT `pip install -U peft`" + ) + + active_adapters = [] + + for component in self._lora_loadable_modules: + model = getattr(self, component, None) + if model is not None and issubclass(model.__class__, ModelMixin): + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + active_adapters = module.active_adapters + break + + return active_adapters + + def get_list_adapters(self) -> dict[str, list[str]]: + """ + Gets the current list of all available adapters in the pipeline. + """ + if not USE_PEFT_BACKEND: + raise ValueError( + "PEFT backend is required for this method. Please install the latest version of PEFT `pip install -U peft`" + ) + + set_adapters = {} + + for component in self._lora_loadable_modules: + model = getattr(self, component, None) + if ( + model is not None + and issubclass(model.__class__, (ModelMixin, PreTrainedModel)) + and hasattr(model, "peft_config") + ): + set_adapters[component] = list(model.peft_config.keys()) + + return set_adapters + + def set_lora_device(self, adapter_names: list[str], device: torch.device | str | int) -> None: + """ + Moves the LoRAs listed in `adapter_names` to a target device. Useful for offloading the LoRA to the CPU in case + you want to load multiple adapters and free some GPU memory. + + After offloading the LoRA adapters to CPU, as long as the rest of the model is still on GPU, the LoRA adapters + can no longer be used for inference, as that would cause a device mismatch. Remember to set the device back to + GPU before using those LoRA adapters for inference. + + ```python + >>> pipe.load_lora_weights(path_1, adapter_name="adapter-1") + >>> pipe.load_lora_weights(path_2, adapter_name="adapter-2") + >>> pipe.set_adapters("adapter-1") + >>> image_1 = pipe(**kwargs) + >>> # switch to adapter-2, offload adapter-1 + >>> pipeline.set_lora_device(adapter_names=["adapter-1"], device="cpu") + >>> pipeline.set_lora_device(adapter_names=["adapter-2"], device="cuda:0") + >>> pipe.set_adapters("adapter-2") + >>> image_2 = pipe(**kwargs) + >>> # switch back to adapter-1, offload adapter-2 + >>> pipeline.set_lora_device(adapter_names=["adapter-2"], device="cpu") + >>> pipeline.set_lora_device(adapter_names=["adapter-1"], device="cuda:0") + >>> pipe.set_adapters("adapter-1") + >>> ... + ``` + + Args: + adapter_names (`list[str]`): + list of adapters to send device to. + device (`torch.device | str | int`): + Device to send the adapters to. Can be either a torch device, a str or an integer. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + for component in self._lora_loadable_modules: + model = getattr(self, component, None) + if model is not None: + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + for adapter_name in adapter_names: + if adapter_name not in module.lora_A: + # it is sufficient to check lora_A + continue + + module.lora_A[adapter_name].to(device) + module.lora_B[adapter_name].to(device) + # this is a param, not a module, so device placement is not in-place -> re-assign + if hasattr(module, "lora_magnitude_vector") and module.lora_magnitude_vector is not None: + if adapter_name in module.lora_magnitude_vector: + module.lora_magnitude_vector[adapter_name] = module.lora_magnitude_vector[ + adapter_name + ].to(device) + + def enable_lora_hotswap(self, **kwargs) -> None: + """ + Hotswap adapters without triggering recompilation of a model or if the ranks of the loaded adapters are + different. + + Args: + target_rank (`int`): + The highest rank among all the adapters that will be loaded. + check_compiled (`str`, *optional*, defaults to `"error"`): + How to handle a model that is already compiled. The check can return the following messages: + - "error" (default): raise an error + - "warn": issue a warning + - "ignore": do nothing + """ + for key, component in self.components.items(): + if hasattr(component, "enable_lora_hotswap") and (key in self._lora_loadable_modules): + component.enable_lora_hotswap(**kwargs) + + @staticmethod + def pack_weights(layers, prefix): + layers_weights = layers.state_dict() if isinstance(layers, torch.nn.Module) else layers + return _pack_dict_with_prefix(layers_weights, prefix) + + @staticmethod + def write_lora_layers( + state_dict: dict[str, torch.Tensor], + save_directory: str, + is_main_process: bool, + weight_name: str, + save_function: Callable, + safe_serialization: bool, + lora_adapter_metadata: dict | None = None, + ): + """Writes the state dict of the LoRA layers (optionally with metadata) to disk.""" + if os.path.isfile(save_directory): + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") + return + + if lora_adapter_metadata and not safe_serialization: + raise ValueError("`lora_adapter_metadata` cannot be specified when not using `safe_serialization`.") + if lora_adapter_metadata and not isinstance(lora_adapter_metadata, dict): + raise TypeError("`lora_adapter_metadata` must be of type `dict`.") + + if save_function is None: + if safe_serialization: + + def save_function(weights, filename): + # Inject framework format. + metadata = {"format": "pt"} + if lora_adapter_metadata: + for key, value in lora_adapter_metadata.items(): + if isinstance(value, set): + lora_adapter_metadata[key] = list(value) + metadata[LORA_ADAPTER_METADATA_KEY] = json.dumps( + lora_adapter_metadata, indent=2, sort_keys=True + ) + + return safetensors.torch.save_file(weights, filename, metadata=metadata) + + else: + save_function = torch.save + + os.makedirs(save_directory, exist_ok=True) + + if weight_name is None: + if safe_serialization: + weight_name = LORA_WEIGHT_NAME_SAFE + else: + weight_name = LORA_WEIGHT_NAME + + save_path = Path(save_directory, weight_name).as_posix() + save_function(state_dict, save_path) + logger.info(f"Model weights saved in {save_path}") + + @classmethod + def _save_lora_weights( + cls, + save_directory: str | os.PathLike, + lora_layers: dict[str, dict[str, torch.nn.Module | torch.Tensor]], + lora_metadata: dict[str, dict | None], + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + ): + """ + Helper method to pack and save LoRA weights and metadata. This method centralizes the saving logic for all + pipeline types. + """ + state_dict = {} + final_lora_adapter_metadata = {} + + for prefix, layers in lora_layers.items(): + state_dict.update(cls.pack_weights(layers, prefix)) + + for prefix, metadata in lora_metadata.items(): + if metadata: + final_lora_adapter_metadata.update(_pack_dict_with_prefix(metadata, prefix)) + + cls.write_lora_layers( + state_dict=state_dict, + save_directory=save_directory, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + lora_adapter_metadata=final_lora_adapter_metadata if final_lora_adapter_metadata else None, + ) + + @classmethod + def _optionally_disable_offloading(cls, _pipeline): + return _func_optionally_disable_offloading(_pipeline=_pipeline) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_conversion_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_conversion_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0895d5223e13dae3b0a77cb7a6bca0296153d591 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_conversion_utils.py @@ -0,0 +1,2631 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import torch + +from ..utils import is_peft_version, logging, state_dict_all_zero + + +logger = logging.get_logger(__name__) + + +def swap_scale_shift(weight): + shift, scale = weight.chunk(2, dim=0) + new_weight = torch.cat([scale, shift], dim=0) + return new_weight + + +def _maybe_map_sgm_blocks_to_diffusers(state_dict, unet_config, delimiter="_", block_slice_pos=5): + # 1. get all state_dict_keys + all_keys = list(state_dict.keys()) + sgm_patterns = ["input_blocks", "middle_block", "output_blocks"] + not_sgm_patterns = ["down_blocks", "mid_block", "up_blocks"] + + # check if state_dict contains both patterns + contains_sgm_patterns = False + contains_not_sgm_patterns = False + for key in all_keys: + if any(p in key for p in sgm_patterns): + contains_sgm_patterns = True + elif any(p in key for p in not_sgm_patterns): + contains_not_sgm_patterns = True + + # if state_dict contains both patterns, remove sgm + # we can then return state_dict immediately + if contains_sgm_patterns and contains_not_sgm_patterns: + for key in all_keys: + if any(p in key for p in sgm_patterns): + state_dict.pop(key) + return state_dict + + # 2. check if needs remapping, if not return original dict + is_in_sgm_format = False + for key in all_keys: + if any(p in key for p in sgm_patterns): + is_in_sgm_format = True + break + + if not is_in_sgm_format: + return state_dict + + # 3. Else remap from SGM patterns + new_state_dict = {} + inner_block_map = ["resnets", "attentions", "upsamplers"] + + # Retrieves # of down, mid and up blocks + input_block_ids, middle_block_ids, output_block_ids = set(), set(), set() + + for layer in all_keys: + if "text" in layer: + new_state_dict[layer] = state_dict.pop(layer) + else: + layer_id = int(layer.split(delimiter)[:block_slice_pos][-1]) + if sgm_patterns[0] in layer: + input_block_ids.add(layer_id) + elif sgm_patterns[1] in layer: + middle_block_ids.add(layer_id) + elif sgm_patterns[2] in layer: + output_block_ids.add(layer_id) + else: + raise ValueError(f"Checkpoint not supported because layer {layer} not supported.") + + input_blocks = { + layer_id: [key for key in state_dict if f"input_blocks{delimiter}{layer_id}" in key] + for layer_id in input_block_ids + } + middle_blocks = { + layer_id: [key for key in state_dict if f"middle_block{delimiter}{layer_id}" in key] + for layer_id in middle_block_ids + } + output_blocks = { + layer_id: [key for key in state_dict if f"output_blocks{delimiter}{layer_id}" in key] + for layer_id in output_block_ids + } + + # Rename keys accordingly + for i in input_block_ids: + block_id = (i - 1) // (unet_config.layers_per_block + 1) + layer_in_block_id = (i - 1) % (unet_config.layers_per_block + 1) + + for key in input_blocks[i]: + inner_block_id = int(key.split(delimiter)[block_slice_pos]) + inner_block_key = inner_block_map[inner_block_id] if "op" not in key else "downsamplers" + inner_layers_in_block = str(layer_in_block_id) if "op" not in key else "0" + new_key = delimiter.join( + key.split(delimiter)[: block_slice_pos - 1] + + [str(block_id), inner_block_key, inner_layers_in_block] + + key.split(delimiter)[block_slice_pos + 1 :] + ) + new_state_dict[new_key] = state_dict.pop(key) + + for i in middle_block_ids: + key_part = None + if i == 0: + key_part = [inner_block_map[0], "0"] + elif i == 1: + key_part = [inner_block_map[1], "0"] + elif i == 2: + key_part = [inner_block_map[0], "1"] + else: + raise ValueError(f"Invalid middle block id {i}.") + + for key in middle_blocks[i]: + new_key = delimiter.join( + key.split(delimiter)[: block_slice_pos - 1] + key_part + key.split(delimiter)[block_slice_pos:] + ) + new_state_dict[new_key] = state_dict.pop(key) + + for i in output_block_ids: + block_id = i // (unet_config.layers_per_block + 1) + layer_in_block_id = i % (unet_config.layers_per_block + 1) + + for key in output_blocks[i]: + inner_block_id = int(key.split(delimiter)[block_slice_pos]) + inner_block_key = inner_block_map[inner_block_id] + inner_layers_in_block = str(layer_in_block_id) if inner_block_id < 2 else "0" + new_key = delimiter.join( + key.split(delimiter)[: block_slice_pos - 1] + + [str(block_id), inner_block_key, inner_layers_in_block] + + key.split(delimiter)[block_slice_pos + 1 :] + ) + new_state_dict[new_key] = state_dict.pop(key) + + if state_dict: + raise ValueError("At this point all state dict entries have to be converted.") + + return new_state_dict + + +def _convert_non_diffusers_lora_to_diffusers(state_dict, unet_name="unet", text_encoder_name="text_encoder"): + """ + Converts a non-Diffusers LoRA state dict to a Diffusers compatible state dict. + + Args: + state_dict (`dict`): The state dict to convert. + unet_name (`str`, optional): The name of the U-Net module in the Diffusers model. Defaults to "unet". + text_encoder_name (`str`, optional): The name of the text encoder module in the Diffusers model. Defaults to + "text_encoder". + + Returns: + `tuple`: A tuple containing the converted state dict and a dictionary of alphas. + """ + unet_state_dict = {} + te_state_dict = {} + te2_state_dict = {} + network_alphas = {} + + # Check for DoRA-enabled LoRAs. + dora_present_in_unet = any("dora_scale" in k and "lora_unet_" in k for k in state_dict) + dora_present_in_te = any("dora_scale" in k and ("lora_te_" in k or "lora_te1_" in k) for k in state_dict) + dora_present_in_te2 = any("dora_scale" in k and "lora_te2_" in k for k in state_dict) + if dora_present_in_unet or dora_present_in_te or dora_present_in_te2: + if is_peft_version("<", "0.9.0"): + raise ValueError( + "You need `peft` 0.9.0 at least to use DoRA-enabled LoRAs. Please upgrade your installation of `peft`." + ) + + # Iterate over all LoRA weights. + all_lora_keys = list(state_dict.keys()) + for key in all_lora_keys: + if not key.endswith("lora_down.weight"): + continue + + # Extract LoRA name. + lora_name = key.split(".")[0] + + # Find corresponding up weight and alpha. + lora_name_up = lora_name + ".lora_up.weight" + lora_name_alpha = lora_name + ".alpha" + + # Handle U-Net LoRAs. + if lora_name.startswith("lora_unet_"): + diffusers_name = _convert_unet_lora_key(key) + + # Store down and up weights. + unet_state_dict[diffusers_name] = state_dict.pop(key) + unet_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up) + + # Store DoRA scale if present. + if dora_present_in_unet: + dora_scale_key_to_replace = "_lora.down." if "_lora.down." in diffusers_name else ".lora.down." + unet_state_dict[diffusers_name.replace(dora_scale_key_to_replace, ".lora_magnitude_vector.")] = ( + state_dict.pop(key.replace("lora_down.weight", "dora_scale")) + ) + + # Handle text encoder LoRAs. + elif lora_name.startswith(("lora_te_", "lora_te1_", "lora_te2_")): + diffusers_name = _convert_text_encoder_lora_key(key, lora_name) + + # Store down and up weights for te or te2. + if lora_name.startswith(("lora_te_", "lora_te1_")): + te_state_dict[diffusers_name] = state_dict.pop(key) + te_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up) + else: + te2_state_dict[diffusers_name] = state_dict.pop(key) + te2_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up) + + # Store DoRA scale if present. + if dora_present_in_te or dora_present_in_te2: + dora_scale_key_to_replace_te = ( + "_lora.down." if "_lora.down." in diffusers_name else ".lora_linear_layer." + ) + if lora_name.startswith(("lora_te_", "lora_te1_")): + te_state_dict[diffusers_name.replace(dora_scale_key_to_replace_te, ".lora_magnitude_vector.")] = ( + state_dict.pop(key.replace("lora_down.weight", "dora_scale")) + ) + elif lora_name.startswith("lora_te2_"): + te2_state_dict[diffusers_name.replace(dora_scale_key_to_replace_te, ".lora_magnitude_vector.")] = ( + state_dict.pop(key.replace("lora_down.weight", "dora_scale")) + ) + + # Store alpha if present. + if lora_name_alpha in state_dict: + alpha = state_dict.pop(lora_name_alpha).item() + network_alphas.update(_get_alpha_name(lora_name_alpha, diffusers_name, alpha)) + + # Check if any keys remain. + if len(state_dict) > 0: + raise ValueError(f"The following keys have not been correctly renamed: \n\n {', '.join(state_dict.keys())}") + + logger.info("Non-diffusers checkpoint detected.") + + # Construct final state dict. + unet_state_dict = {f"{unet_name}.{module_name}": params for module_name, params in unet_state_dict.items()} + te_state_dict = {f"{text_encoder_name}.{module_name}": params for module_name, params in te_state_dict.items()} + te2_state_dict = ( + {f"text_encoder_2.{module_name}": params for module_name, params in te2_state_dict.items()} + if len(te2_state_dict) > 0 + else None + ) + if te2_state_dict is not None: + te_state_dict.update(te2_state_dict) + + new_state_dict = {**unet_state_dict, **te_state_dict} + return new_state_dict, network_alphas + + +def _convert_unet_lora_key(key): + """ + Converts a U-Net LoRA key to a Diffusers compatible key. + """ + diffusers_name = key.replace("lora_unet_", "").replace("_", ".") + + # Replace common U-Net naming patterns. + diffusers_name = diffusers_name.replace("input.blocks", "down_blocks") + diffusers_name = diffusers_name.replace("down.blocks", "down_blocks") + diffusers_name = diffusers_name.replace("middle.block", "mid_block") + diffusers_name = diffusers_name.replace("mid.block", "mid_block") + diffusers_name = diffusers_name.replace("output.blocks", "up_blocks") + diffusers_name = diffusers_name.replace("up.blocks", "up_blocks") + diffusers_name = diffusers_name.replace("transformer.blocks", "transformer_blocks") + diffusers_name = diffusers_name.replace("to.q.lora", "to_q_lora") + diffusers_name = diffusers_name.replace("to.k.lora", "to_k_lora") + diffusers_name = diffusers_name.replace("to.v.lora", "to_v_lora") + diffusers_name = diffusers_name.replace("to.out.0.lora", "to_out_lora") + diffusers_name = diffusers_name.replace("proj.in", "proj_in") + diffusers_name = diffusers_name.replace("proj.out", "proj_out") + diffusers_name = diffusers_name.replace("emb.layers", "time_emb_proj") + + # SDXL specific conversions. + if "emb" in diffusers_name and "time.emb.proj" not in diffusers_name: + pattern = r"\.\d+(?=\D*$)" + diffusers_name = re.sub(pattern, "", diffusers_name, count=1) + if ".in." in diffusers_name: + diffusers_name = diffusers_name.replace("in.layers.2", "conv1") + if ".out." in diffusers_name: + diffusers_name = diffusers_name.replace("out.layers.3", "conv2") + if "downsamplers" in diffusers_name or "upsamplers" in diffusers_name: + diffusers_name = diffusers_name.replace("op", "conv") + if "skip" in diffusers_name: + diffusers_name = diffusers_name.replace("skip.connection", "conv_shortcut") + + # LyCORIS specific conversions. + if "time.emb.proj" in diffusers_name: + diffusers_name = diffusers_name.replace("time.emb.proj", "time_emb_proj") + if "conv.shortcut" in diffusers_name: + diffusers_name = diffusers_name.replace("conv.shortcut", "conv_shortcut") + + # General conversions. + if "transformer_blocks" in diffusers_name: + if "attn1" in diffusers_name or "attn2" in diffusers_name: + diffusers_name = diffusers_name.replace("attn1", "attn1.processor") + diffusers_name = diffusers_name.replace("attn2", "attn2.processor") + elif "ff" in diffusers_name: + pass + elif any(key in diffusers_name for key in ("proj_in", "proj_out")): + pass + else: + pass + + return diffusers_name + + +def _convert_text_encoder_lora_key(key, lora_name): + """ + Converts a text encoder LoRA key to a Diffusers compatible key. + """ + if lora_name.startswith(("lora_te_", "lora_te1_")): + key_to_replace = "lora_te_" if lora_name.startswith("lora_te_") else "lora_te1_" + else: + key_to_replace = "lora_te2_" + + diffusers_name = key.replace(key_to_replace, "").replace("_", ".") + diffusers_name = diffusers_name.replace("text.model", "text_model") + diffusers_name = diffusers_name.replace("self.attn", "self_attn") + diffusers_name = diffusers_name.replace("q.proj.lora", "to_q_lora") + diffusers_name = diffusers_name.replace("k.proj.lora", "to_k_lora") + diffusers_name = diffusers_name.replace("v.proj.lora", "to_v_lora") + diffusers_name = diffusers_name.replace("out.proj.lora", "to_out_lora") + diffusers_name = diffusers_name.replace("text.projection", "text_projection") + + if "self_attn" in diffusers_name or "text_projection" in diffusers_name: + pass + elif "mlp" in diffusers_name: + # Be aware that this is the new diffusers convention and the rest of the code might + # not utilize it yet. + diffusers_name = diffusers_name.replace(".lora.", ".lora_linear_layer.") + + return diffusers_name + + +def _get_alpha_name(lora_name_alpha, diffusers_name, alpha): + """ + Gets the correct alpha name for the Diffusers model. + """ + if lora_name_alpha.startswith("lora_unet_"): + prefix = "unet." + elif lora_name_alpha.startswith(("lora_te_", "lora_te1_")): + prefix = "text_encoder." + else: + prefix = "text_encoder_2." + new_name = prefix + diffusers_name.split(".lora.")[0] + ".alpha" + return {new_name: alpha} + + +# The utilities under `_convert_kohya_flux_lora_to_diffusers()` +# are adapted from https://github.com/kohya-ss/sd-scripts/blob/a61cf73a5cb5209c3f4d1a3688dd276a4dfd1ecb/networks/convert_flux_lora.py +def _convert_kohya_flux_lora_to_diffusers(state_dict): + def _convert_to_ai_toolkit(sds_sd, ait_sd, sds_key, ait_key): + if sds_key + ".lora_down.weight" not in sds_sd: + return + down_weight = sds_sd.pop(sds_key + ".lora_down.weight") + + # scale weight by alpha and dim + rank = down_weight.shape[0] + default_alpha = torch.tensor(rank, dtype=down_weight.dtype, device=down_weight.device, requires_grad=False) + alpha = sds_sd.pop(sds_key + ".alpha", default_alpha).item() # alpha is scalar + scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here + + # calculate scale_down and scale_up to keep the same value. if scale is 4, scale_down is 2 and scale_up is 2 + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + + ait_sd[ait_key + ".lora_A.weight"] = down_weight * scale_down + ait_sd[ait_key + ".lora_B.weight"] = sds_sd.pop(sds_key + ".lora_up.weight") * scale_up + + def _convert_to_ai_toolkit_cat(sds_sd, ait_sd, sds_key, ait_keys, dims=None): + if sds_key + ".lora_down.weight" not in sds_sd: + return + down_weight = sds_sd.pop(sds_key + ".lora_down.weight") + up_weight = sds_sd.pop(sds_key + ".lora_up.weight") + sd_lora_rank = down_weight.shape[0] + + # scale weight by alpha and dim + default_alpha = torch.tensor( + sd_lora_rank, dtype=down_weight.dtype, device=down_weight.device, requires_grad=False + ) + alpha = sds_sd.pop(sds_key + ".alpha", default_alpha) + scale = alpha / sd_lora_rank + + # calculate scale_down and scale_up + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + + down_weight = down_weight * scale_down + up_weight = up_weight * scale_up + + # calculate dims if not provided + num_splits = len(ait_keys) + if dims is None: + dims = [up_weight.shape[0] // num_splits] * num_splits + else: + assert sum(dims) == up_weight.shape[0] + + # check upweight is sparse or not + is_sparse = False + if sd_lora_rank % num_splits == 0: + ait_rank = sd_lora_rank // num_splits + is_sparse = True + i = 0 + for j in range(len(dims)): + for k in range(len(dims)): + if j == k: + continue + is_sparse = is_sparse and torch.all( + up_weight[i : i + dims[j], k * ait_rank : (k + 1) * ait_rank] == 0 + ) + i += dims[j] + if is_sparse: + logger.info(f"weight is sparse: {sds_key}") + + # make ai-toolkit weight + ait_down_keys = [k + ".lora_A.weight" for k in ait_keys] + ait_up_keys = [k + ".lora_B.weight" for k in ait_keys] + if not is_sparse: + # down_weight is copied to each split + ait_sd.update(dict.fromkeys(ait_down_keys, down_weight)) + + # up_weight is split to each split + ait_sd.update({k: v for k, v in zip(ait_up_keys, torch.split(up_weight, dims, dim=0))}) # noqa: C416 + else: + # down_weight is chunked to each split + ait_sd.update({k: v for k, v in zip(ait_down_keys, torch.chunk(down_weight, num_splits, dim=0))}) # noqa: C416 + + # up_weight is sparse: only non-zero values are copied to each split + i = 0 + for j in range(len(dims)): + ait_sd[ait_up_keys[j]] = up_weight[i : i + dims[j], j * ait_rank : (j + 1) * ait_rank].contiguous() + i += dims[j] + + def _convert_sd_scripts_to_ai_toolkit(sds_sd): + ait_sd = {} + for i in range(19): + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_img_attn_proj", + f"transformer.transformer_blocks.{i}.attn.to_out.0", + ) + _convert_to_ai_toolkit_cat( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_img_attn_qkv", + [ + f"transformer.transformer_blocks.{i}.attn.to_q", + f"transformer.transformer_blocks.{i}.attn.to_k", + f"transformer.transformer_blocks.{i}.attn.to_v", + ], + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_img_mlp_0", + f"transformer.transformer_blocks.{i}.ff.net.0.proj", + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_img_mlp_2", + f"transformer.transformer_blocks.{i}.ff.net.2", + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_img_mod_lin", + f"transformer.transformer_blocks.{i}.norm1.linear", + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_txt_attn_proj", + f"transformer.transformer_blocks.{i}.attn.to_add_out", + ) + _convert_to_ai_toolkit_cat( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_txt_attn_qkv", + [ + f"transformer.transformer_blocks.{i}.attn.add_q_proj", + f"transformer.transformer_blocks.{i}.attn.add_k_proj", + f"transformer.transformer_blocks.{i}.attn.add_v_proj", + ], + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_txt_mlp_0", + f"transformer.transformer_blocks.{i}.ff_context.net.0.proj", + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_txt_mlp_2", + f"transformer.transformer_blocks.{i}.ff_context.net.2", + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_double_blocks_{i}_txt_mod_lin", + f"transformer.transformer_blocks.{i}.norm1_context.linear", + ) + + for i in range(38): + _convert_to_ai_toolkit_cat( + sds_sd, + ait_sd, + f"lora_unet_single_blocks_{i}_linear1", + [ + f"transformer.single_transformer_blocks.{i}.attn.to_q", + f"transformer.single_transformer_blocks.{i}.attn.to_k", + f"transformer.single_transformer_blocks.{i}.attn.to_v", + f"transformer.single_transformer_blocks.{i}.proj_mlp", + ], + dims=[3072, 3072, 3072, 12288], + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_single_blocks_{i}_linear2", + f"transformer.single_transformer_blocks.{i}.proj_out", + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + f"lora_unet_single_blocks_{i}_modulation_lin", + f"transformer.single_transformer_blocks.{i}.norm.linear", + ) + + # TODO: alphas. + def assign_remaining_weights(assignments, source): + for lora_key in ["lora_A", "lora_B"]: + orig_lora_key = "lora_down" if lora_key == "lora_A" else "lora_up" + for target_fmt, source_fmt, transform in assignments: + target_key = target_fmt.format(lora_key=lora_key) + source_key = source_fmt.format(orig_lora_key=orig_lora_key) + value = source.pop(source_key) + if transform: + value = transform(value) + ait_sd[target_key] = value + + if any("guidance_in" in k for k in sds_sd): + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + "lora_unet_guidance_in_in_layer", + "time_text_embed.guidance_embedder.linear_1", + ) + + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + "lora_unet_guidance_in_out_layer", + "time_text_embed.guidance_embedder.linear_2", + ) + + if any("img_in" in k for k in sds_sd): + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + "lora_unet_img_in", + "x_embedder", + ) + + if any("txt_in" in k for k in sds_sd): + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + "lora_unet_txt_in", + "context_embedder", + ) + + if any("time_in" in k for k in sds_sd): + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + "lora_unet_time_in_in_layer", + "time_text_embed.timestep_embedder.linear_1", + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + "lora_unet_time_in_out_layer", + "time_text_embed.timestep_embedder.linear_2", + ) + + if any("vector_in" in k for k in sds_sd): + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + "lora_unet_vector_in_in_layer", + "time_text_embed.text_embedder.linear_1", + ) + _convert_to_ai_toolkit( + sds_sd, + ait_sd, + "lora_unet_vector_in_out_layer", + "time_text_embed.text_embedder.linear_2", + ) + + if any("final_layer" in k for k in sds_sd): + # Notice the swap in processing for "final_layer". + assign_remaining_weights( + [ + ( + "norm_out.linear.{lora_key}.weight", + "lora_unet_final_layer_adaLN_modulation_1.{orig_lora_key}.weight", + swap_scale_shift, + ), + ("proj_out.{lora_key}.weight", "lora_unet_final_layer_linear.{orig_lora_key}.weight", None), + ], + sds_sd, + ) + + remaining_keys = list(sds_sd.keys()) + te_state_dict = {} + if remaining_keys: + if not all(k.startswith(("lora_te", "lora_te1")) for k in remaining_keys): + raise ValueError(f"Incompatible keys detected: \n\n {', '.join(remaining_keys)}") + for key in remaining_keys: + if not key.endswith("lora_down.weight"): + continue + + lora_name = key.split(".")[0] + lora_name_up = f"{lora_name}.lora_up.weight" + lora_name_alpha = f"{lora_name}.alpha" + diffusers_name = _convert_text_encoder_lora_key(key, lora_name) + + if lora_name.startswith(("lora_te_", "lora_te1_")): + down_weight = sds_sd.pop(key) + sd_lora_rank = down_weight.shape[0] + te_state_dict[diffusers_name] = down_weight + te_state_dict[diffusers_name.replace(".down.", ".up.")] = sds_sd.pop(lora_name_up) + + if lora_name_alpha in sds_sd: + alpha = sds_sd.pop(lora_name_alpha).item() + scale = alpha / sd_lora_rank + + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + + te_state_dict[diffusers_name] *= scale_down + te_state_dict[diffusers_name.replace(".down.", ".up.")] *= scale_up + + if len(sds_sd) > 0: + logger.warning(f"Unsupported keys for ai-toolkit: {sds_sd.keys()}") + + if te_state_dict: + te_state_dict = {f"text_encoder.{module_name}": params for module_name, params in te_state_dict.items()} + + new_state_dict = {**ait_sd, **te_state_dict} + return new_state_dict + + def _convert_mixture_state_dict_to_diffusers(state_dict): + new_state_dict = {} + + def _convert(original_key, diffusers_key, state_dict, new_state_dict): + down_key = f"{original_key}.lora_down.weight" + down_weight = state_dict.pop(down_key) + lora_rank = down_weight.shape[0] + + up_weight_key = f"{original_key}.lora_up.weight" + up_weight = state_dict.pop(up_weight_key) + + alpha_key = f"{original_key}.alpha" + alpha = state_dict.pop(alpha_key) + + # scale weight by alpha and dim + scale = alpha / lora_rank + # calculate scale_down and scale_up + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + down_weight = down_weight * scale_down + up_weight = up_weight * scale_up + + diffusers_down_key = f"{diffusers_key}.lora_A.weight" + new_state_dict[diffusers_down_key] = down_weight + new_state_dict[diffusers_down_key.replace(".lora_A.", ".lora_B.")] = up_weight + + all_unique_keys = { + k.replace(".lora_down.weight", "").replace(".lora_up.weight", "").replace(".alpha", "") + for k in state_dict + if not k.startswith(("lora_unet_")) + } + assert all(k.startswith(("lora_transformer_", "lora_te1_")) for k in all_unique_keys), f"{all_unique_keys=}" + + has_te_keys = False + for k in all_unique_keys: + if k.startswith("lora_transformer_single_transformer_blocks_"): + i = int(k.split("lora_transformer_single_transformer_blocks_")[-1].split("_")[0]) + diffusers_key = f"single_transformer_blocks.{i}" + elif k.startswith("lora_transformer_transformer_blocks_"): + i = int(k.split("lora_transformer_transformer_blocks_")[-1].split("_")[0]) + diffusers_key = f"transformer_blocks.{i}" + elif k.startswith("lora_te1_"): + has_te_keys = True + continue + elif k.startswith("lora_transformer_context_embedder"): + diffusers_key = "context_embedder" + elif k.startswith("lora_transformer_norm_out_linear"): + diffusers_key = "norm_out.linear" + elif k.startswith("lora_transformer_proj_out"): + diffusers_key = "proj_out" + elif k.startswith("lora_transformer_x_embedder"): + diffusers_key = "x_embedder" + elif k.startswith("lora_transformer_time_text_embed_guidance_embedder_linear_"): + i = int(k.split("lora_transformer_time_text_embed_guidance_embedder_linear_")[-1]) + diffusers_key = f"time_text_embed.guidance_embedder.linear_{i}" + elif k.startswith("lora_transformer_time_text_embed_text_embedder_linear_"): + i = int(k.split("lora_transformer_time_text_embed_text_embedder_linear_")[-1]) + diffusers_key = f"time_text_embed.text_embedder.linear_{i}" + elif k.startswith("lora_transformer_time_text_embed_timestep_embedder_linear_"): + i = int(k.split("lora_transformer_time_text_embed_timestep_embedder_linear_")[-1]) + diffusers_key = f"time_text_embed.timestep_embedder.linear_{i}" + else: + raise NotImplementedError(f"Handling for key ({k}) is not implemented.") + + if "attn_" in k: + if "_to_out_0" in k: + diffusers_key += ".attn.to_out.0" + elif "_to_add_out" in k: + diffusers_key += ".attn.to_add_out" + elif any(qkv in k for qkv in ["to_q", "to_k", "to_v"]): + remaining = k.split("attn_")[-1] + diffusers_key += f".attn.{remaining}" + elif any(add_qkv in k for add_qkv in ["add_q_proj", "add_k_proj", "add_v_proj"]): + remaining = k.split("attn_")[-1] + diffusers_key += f".attn.{remaining}" + + _convert(k, diffusers_key, state_dict, new_state_dict) + + if has_te_keys: + layer_pattern = re.compile(r"lora_te1_text_model_encoder_layers_(\d+)") + attn_mapping = { + "q_proj": ".self_attn.q_proj", + "k_proj": ".self_attn.k_proj", + "v_proj": ".self_attn.v_proj", + "out_proj": ".self_attn.out_proj", + } + mlp_mapping = {"fc1": ".mlp.fc1", "fc2": ".mlp.fc2"} + for k in all_unique_keys: + if not k.startswith("lora_te1_"): + continue + + match = layer_pattern.search(k) + if not match: + continue + i = int(match.group(1)) + diffusers_key = f"text_model.encoder.layers.{i}" + + if "attn" in k: + for key_fragment, suffix in attn_mapping.items(): + if key_fragment in k: + diffusers_key += suffix + break + elif "mlp" in k: + for key_fragment, suffix in mlp_mapping.items(): + if key_fragment in k: + diffusers_key += suffix + break + + _convert(k, diffusers_key, state_dict, new_state_dict) + + remaining_all_unet = False + if state_dict: + remaining_all_unet = all(k.startswith("lora_unet_") for k in state_dict) + if remaining_all_unet: + keys = list(state_dict.keys()) + for k in keys: + state_dict.pop(k) + + if len(state_dict) > 0: + raise ValueError( + f"Expected an empty state dict at this point but its has these keys which couldn't be parsed: {list(state_dict.keys())}." + ) + + transformer_state_dict = { + f"transformer.{k}": v for k, v in new_state_dict.items() if not k.startswith("text_model.") + } + te_state_dict = {f"text_encoder.{k}": v for k, v in new_state_dict.items() if k.startswith("text_model.")} + return {**transformer_state_dict, **te_state_dict} + + # This is weird. + # https://huggingface.co/sayakpaul/different-lora-from-civitai/tree/main?show_file_info=sharp_detailed_foot.safetensors + # has both `peft` and non-peft state dict. + has_peft_state_dict = any(k.startswith("transformer.") for k in state_dict) + if has_peft_state_dict: + state_dict = { + k.replace("lora_down.weight", "lora_A.weight").replace("lora_up.weight", "lora_B.weight"): v + for k, v in state_dict.items() + if k.startswith("transformer.") + } + return state_dict + + # Another weird one. + has_mixture = any( + k.startswith("lora_transformer_") and ("lora_down" in k or "lora_up" in k or "alpha" in k) for k in state_dict + ) + + # ComfyUI. + if not has_mixture: + state_dict = {k.replace("diffusion_model.", "lora_unet_"): v for k, v in state_dict.items()} + state_dict = {k.replace("text_encoders.clip_l.transformer.", "lora_te_"): v for k, v in state_dict.items()} + + has_position_embedding = any("position_embedding" in k for k in state_dict) + if has_position_embedding: + zero_status_pe = state_dict_all_zero(state_dict, "position_embedding") + if zero_status_pe: + logger.info( + "The `position_embedding` LoRA params are all zeros which make them ineffective. " + "So, we will purge them out of the current state dict to make loading possible." + ) + + else: + logger.info( + "The state_dict has position_embedding LoRA params and we currently do not support them. " + "Open an issue if you need this supported - https://github.com/huggingface/diffusers/issues/new." + ) + state_dict = {k: v for k, v in state_dict.items() if "position_embedding" not in k} + + has_t5xxl = any(k.startswith("text_encoders.t5xxl.transformer.") for k in state_dict) + if has_t5xxl: + zero_status_t5 = state_dict_all_zero(state_dict, "text_encoders.t5xxl") + if zero_status_t5: + logger.info( + "The `t5xxl` LoRA params are all zeros which make them ineffective. " + "So, we will purge them out of the current state dict to make loading possible." + ) + else: + logger.info( + "T5-xxl keys found in the state dict, which are currently unsupported. We will filter them out." + "Open an issue if this is a problem - https://github.com/huggingface/diffusers/issues/new." + ) + state_dict = {k: v for k, v in state_dict.items() if not k.startswith("text_encoders.t5xxl.transformer.")} + + has_diffb = any("diff_b" in k and k.startswith(("lora_unet_", "lora_te_", "lora_te1_")) for k in state_dict) + if has_diffb: + zero_status_diff_b = state_dict_all_zero(state_dict, ".diff_b") + if zero_status_diff_b: + logger.info( + "The `diff_b` LoRA params are all zeros which make them ineffective. " + "So, we will purge them out of the current state dict to make loading possible." + ) + else: + logger.info( + "`diff_b` keys found in the state dict which are currently unsupported. " + "So, we will filter out those keys. Open an issue if this is a problem - " + "https://github.com/huggingface/diffusers/issues/new." + ) + state_dict = {k: v for k, v in state_dict.items() if ".diff_b" not in k} + + has_norm_diff = any(".norm" in k and ".diff" in k for k in state_dict) + if has_norm_diff: + zero_status_diff = state_dict_all_zero(state_dict, ".diff") + if zero_status_diff: + logger.info( + "The `diff` LoRA params are all zeros which make them ineffective. " + "So, we will purge them out of the current state dict to make loading possible." + ) + else: + logger.info( + "Normalization diff keys found in the state dict which are currently unsupported. " + "So, we will filter out those keys. Open an issue if this is a problem - " + "https://github.com/huggingface/diffusers/issues/new." + ) + state_dict = {k: v for k, v in state_dict.items() if ".norm" not in k and ".diff" not in k} + + limit_substrings = ["lora_down", "lora_up"] + if any("alpha" in k for k in state_dict): + limit_substrings.append("alpha") + + state_dict = { + _custom_replace(k, limit_substrings): v + for k, v in state_dict.items() + if k.startswith(("lora_unet_", "lora_te_", "lora_te1_")) + } + + if any("text_projection" in k for k in state_dict): + logger.info( + "`text_projection` keys found in the `state_dict` which are unexpected. " + "So, we will filter out those keys. Open an issue if this is a problem - " + "https://github.com/huggingface/diffusers/issues/new." + ) + state_dict = {k: v for k, v in state_dict.items() if "text_projection" not in k} + + if has_mixture: + return _convert_mixture_state_dict_to_diffusers(state_dict) + + return _convert_sd_scripts_to_ai_toolkit(state_dict) + + +# Adapted from https://gist.github.com/Leommm-byte/6b331a1e9bd53271210b26543a7065d6 +# Some utilities were reused from +# https://github.com/kohya-ss/sd-scripts/blob/a61cf73a5cb5209c3f4d1a3688dd276a4dfd1ecb/networks/convert_flux_lora.py +def _convert_xlabs_flux_lora_to_diffusers(old_state_dict): + new_state_dict = {} + orig_keys = list(old_state_dict.keys()) + + def handle_qkv(sds_sd, ait_sd, sds_key, ait_keys, dims=None): + down_weight = sds_sd.pop(sds_key) + up_weight = sds_sd.pop(sds_key.replace(".down.weight", ".up.weight")) + + # calculate dims if not provided + num_splits = len(ait_keys) + if dims is None: + dims = [up_weight.shape[0] // num_splits] * num_splits + else: + assert sum(dims) == up_weight.shape[0] + + # make ai-toolkit weight + ait_down_keys = [k + ".lora_A.weight" for k in ait_keys] + ait_up_keys = [k + ".lora_B.weight" for k in ait_keys] + + # down_weight is copied to each split + ait_sd.update(dict.fromkeys(ait_down_keys, down_weight)) + + # up_weight is split to each split + ait_sd.update({k: v for k, v in zip(ait_up_keys, torch.split(up_weight, dims, dim=0))}) # noqa: C416 + + for old_key in orig_keys: + # Handle double_blocks + if old_key.startswith(("diffusion_model.double_blocks", "double_blocks")): + block_num = re.search(r"double_blocks\.(\d+)", old_key).group(1) + new_key = f"transformer.transformer_blocks.{block_num}" + + if "processor.proj_lora1" in old_key: + new_key += ".attn.to_out.0" + elif "processor.proj_lora2" in old_key: + new_key += ".attn.to_add_out" + # Handle text latents. + elif "processor.qkv_lora2" in old_key and "up" not in old_key: + handle_qkv( + old_state_dict, + new_state_dict, + old_key, + [ + f"transformer.transformer_blocks.{block_num}.attn.add_q_proj", + f"transformer.transformer_blocks.{block_num}.attn.add_k_proj", + f"transformer.transformer_blocks.{block_num}.attn.add_v_proj", + ], + ) + # continue + # Handle image latents. + elif "processor.qkv_lora1" in old_key and "up" not in old_key: + handle_qkv( + old_state_dict, + new_state_dict, + old_key, + [ + f"transformer.transformer_blocks.{block_num}.attn.to_q", + f"transformer.transformer_blocks.{block_num}.attn.to_k", + f"transformer.transformer_blocks.{block_num}.attn.to_v", + ], + ) + # continue + + if "down" in old_key: + new_key += ".lora_A.weight" + elif "up" in old_key: + new_key += ".lora_B.weight" + + # Handle single_blocks + elif old_key.startswith(("diffusion_model.single_blocks", "single_blocks")): + block_num = re.search(r"single_blocks\.(\d+)", old_key).group(1) + new_key = f"transformer.single_transformer_blocks.{block_num}" + + if "proj_lora" in old_key: + new_key += ".proj_out" + elif "qkv_lora" in old_key and "up" not in old_key: + handle_qkv( + old_state_dict, + new_state_dict, + old_key, + [ + f"transformer.single_transformer_blocks.{block_num}.attn.to_q", + f"transformer.single_transformer_blocks.{block_num}.attn.to_k", + f"transformer.single_transformer_blocks.{block_num}.attn.to_v", + ], + ) + + if "down" in old_key: + new_key += ".lora_A.weight" + elif "up" in old_key: + new_key += ".lora_B.weight" + + else: + # Handle other potential key patterns here + new_key = old_key + + # Since we already handle qkv above. + if "qkv" not in old_key: + new_state_dict[new_key] = old_state_dict.pop(old_key) + + if len(old_state_dict) > 0: + raise ValueError(f"`old_state_dict` should be at this point but has: {list(old_state_dict.keys())}.") + + return new_state_dict + + +def _custom_replace(key: str, substrings: list[str]) -> str: + # Replaces the "."s with "_"s upto the `substrings`. + # Example: + # lora_unet.foo.bar.lora_A.weight -> lora_unet_foo_bar.lora_A.weight + pattern = "(" + "|".join(re.escape(sub) for sub in substrings) + ")" + + match = re.search(pattern, key) + if match: + start_sub = match.start() + if start_sub > 0 and key[start_sub - 1] == ".": + boundary = start_sub - 1 + else: + boundary = start_sub + left = key[:boundary].replace(".", "_") + right = key[boundary:] + return left + right + else: + return key.replace(".", "_") + + +def _convert_bfl_flux_control_lora_to_diffusers(original_state_dict): + converted_state_dict = {} + original_state_dict_keys = list(original_state_dict.keys()) + num_layers = 19 + num_single_layers = 38 + inner_dim = 3072 + mlp_ratio = 4.0 + + for lora_key in ["lora_A", "lora_B"]: + ## time_text_embed.timestep_embedder <- time_in + converted_state_dict[f"time_text_embed.timestep_embedder.linear_1.{lora_key}.weight"] = ( + original_state_dict.pop(f"time_in.in_layer.{lora_key}.weight") + ) + if f"time_in.in_layer.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"time_text_embed.timestep_embedder.linear_1.{lora_key}.bias"] = ( + original_state_dict.pop(f"time_in.in_layer.{lora_key}.bias") + ) + + converted_state_dict[f"time_text_embed.timestep_embedder.linear_2.{lora_key}.weight"] = ( + original_state_dict.pop(f"time_in.out_layer.{lora_key}.weight") + ) + if f"time_in.out_layer.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"time_text_embed.timestep_embedder.linear_2.{lora_key}.bias"] = ( + original_state_dict.pop(f"time_in.out_layer.{lora_key}.bias") + ) + + ## time_text_embed.text_embedder <- vector_in + converted_state_dict[f"time_text_embed.text_embedder.linear_1.{lora_key}.weight"] = original_state_dict.pop( + f"vector_in.in_layer.{lora_key}.weight" + ) + if f"vector_in.in_layer.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"time_text_embed.text_embedder.linear_1.{lora_key}.bias"] = original_state_dict.pop( + f"vector_in.in_layer.{lora_key}.bias" + ) + + converted_state_dict[f"time_text_embed.text_embedder.linear_2.{lora_key}.weight"] = original_state_dict.pop( + f"vector_in.out_layer.{lora_key}.weight" + ) + if f"vector_in.out_layer.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"time_text_embed.text_embedder.linear_2.{lora_key}.bias"] = original_state_dict.pop( + f"vector_in.out_layer.{lora_key}.bias" + ) + + # guidance + has_guidance = any("guidance" in k for k in original_state_dict) + if has_guidance: + converted_state_dict[f"time_text_embed.guidance_embedder.linear_1.{lora_key}.weight"] = ( + original_state_dict.pop(f"guidance_in.in_layer.{lora_key}.weight") + ) + if f"guidance_in.in_layer.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"time_text_embed.guidance_embedder.linear_1.{lora_key}.bias"] = ( + original_state_dict.pop(f"guidance_in.in_layer.{lora_key}.bias") + ) + + converted_state_dict[f"time_text_embed.guidance_embedder.linear_2.{lora_key}.weight"] = ( + original_state_dict.pop(f"guidance_in.out_layer.{lora_key}.weight") + ) + if f"guidance_in.out_layer.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"time_text_embed.guidance_embedder.linear_2.{lora_key}.bias"] = ( + original_state_dict.pop(f"guidance_in.out_layer.{lora_key}.bias") + ) + + # context_embedder + converted_state_dict[f"context_embedder.{lora_key}.weight"] = original_state_dict.pop( + f"txt_in.{lora_key}.weight" + ) + if f"txt_in.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"context_embedder.{lora_key}.bias"] = original_state_dict.pop( + f"txt_in.{lora_key}.bias" + ) + + # x_embedder + converted_state_dict[f"x_embedder.{lora_key}.weight"] = original_state_dict.pop(f"img_in.{lora_key}.weight") + if f"img_in.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"x_embedder.{lora_key}.bias"] = original_state_dict.pop(f"img_in.{lora_key}.bias") + + # double transformer blocks + for i in range(num_layers): + block_prefix = f"transformer_blocks.{i}." + + for lora_key in ["lora_A", "lora_B"]: + # norms + converted_state_dict[f"{block_prefix}norm1.linear.{lora_key}.weight"] = original_state_dict.pop( + f"double_blocks.{i}.img_mod.lin.{lora_key}.weight" + ) + if f"double_blocks.{i}.img_mod.lin.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}norm1.linear.{lora_key}.bias"] = original_state_dict.pop( + f"double_blocks.{i}.img_mod.lin.{lora_key}.bias" + ) + + converted_state_dict[f"{block_prefix}norm1_context.linear.{lora_key}.weight"] = original_state_dict.pop( + f"double_blocks.{i}.txt_mod.lin.{lora_key}.weight" + ) + if f"double_blocks.{i}.txt_mod.lin.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}norm1_context.linear.{lora_key}.bias"] = original_state_dict.pop( + f"double_blocks.{i}.txt_mod.lin.{lora_key}.bias" + ) + + # Q, K, V + if lora_key == "lora_A": + sample_lora_weight = original_state_dict.pop(f"double_blocks.{i}.img_attn.qkv.{lora_key}.weight") + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.weight"] = torch.cat([sample_lora_weight]) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.weight"] = torch.cat([sample_lora_weight]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.weight"] = torch.cat([sample_lora_weight]) + + context_lora_weight = original_state_dict.pop(f"double_blocks.{i}.txt_attn.qkv.{lora_key}.weight") + converted_state_dict[f"{block_prefix}attn.add_q_proj.{lora_key}.weight"] = torch.cat( + [context_lora_weight] + ) + converted_state_dict[f"{block_prefix}attn.add_k_proj.{lora_key}.weight"] = torch.cat( + [context_lora_weight] + ) + converted_state_dict[f"{block_prefix}attn.add_v_proj.{lora_key}.weight"] = torch.cat( + [context_lora_weight] + ) + else: + sample_q, sample_k, sample_v = torch.chunk( + original_state_dict.pop(f"double_blocks.{i}.img_attn.qkv.{lora_key}.weight"), 3, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.weight"] = torch.cat([sample_q]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.weight"] = torch.cat([sample_k]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.weight"] = torch.cat([sample_v]) + + context_q, context_k, context_v = torch.chunk( + original_state_dict.pop(f"double_blocks.{i}.txt_attn.qkv.{lora_key}.weight"), 3, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.add_q_proj.{lora_key}.weight"] = torch.cat([context_q]) + converted_state_dict[f"{block_prefix}attn.add_k_proj.{lora_key}.weight"] = torch.cat([context_k]) + converted_state_dict[f"{block_prefix}attn.add_v_proj.{lora_key}.weight"] = torch.cat([context_v]) + + if f"double_blocks.{i}.img_attn.qkv.{lora_key}.bias" in original_state_dict_keys: + sample_q_bias, sample_k_bias, sample_v_bias = torch.chunk( + original_state_dict.pop(f"double_blocks.{i}.img_attn.qkv.{lora_key}.bias"), 3, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.bias"] = torch.cat([sample_q_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.bias"] = torch.cat([sample_k_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.bias"] = torch.cat([sample_v_bias]) + + if f"double_blocks.{i}.txt_attn.qkv.{lora_key}.bias" in original_state_dict_keys: + context_q_bias, context_k_bias, context_v_bias = torch.chunk( + original_state_dict.pop(f"double_blocks.{i}.txt_attn.qkv.{lora_key}.bias"), 3, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.add_q_proj.{lora_key}.bias"] = torch.cat([context_q_bias]) + converted_state_dict[f"{block_prefix}attn.add_k_proj.{lora_key}.bias"] = torch.cat([context_k_bias]) + converted_state_dict[f"{block_prefix}attn.add_v_proj.{lora_key}.bias"] = torch.cat([context_v_bias]) + + # ff img_mlp + converted_state_dict[f"{block_prefix}ff.net.0.proj.{lora_key}.weight"] = original_state_dict.pop( + f"double_blocks.{i}.img_mlp.0.{lora_key}.weight" + ) + if f"double_blocks.{i}.img_mlp.0.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}ff.net.0.proj.{lora_key}.bias"] = original_state_dict.pop( + f"double_blocks.{i}.img_mlp.0.{lora_key}.bias" + ) + + converted_state_dict[f"{block_prefix}ff.net.2.{lora_key}.weight"] = original_state_dict.pop( + f"double_blocks.{i}.img_mlp.2.{lora_key}.weight" + ) + if f"double_blocks.{i}.img_mlp.2.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}ff.net.2.{lora_key}.bias"] = original_state_dict.pop( + f"double_blocks.{i}.img_mlp.2.{lora_key}.bias" + ) + + converted_state_dict[f"{block_prefix}ff_context.net.0.proj.{lora_key}.weight"] = original_state_dict.pop( + f"double_blocks.{i}.txt_mlp.0.{lora_key}.weight" + ) + if f"double_blocks.{i}.txt_mlp.0.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}ff_context.net.0.proj.{lora_key}.bias"] = original_state_dict.pop( + f"double_blocks.{i}.txt_mlp.0.{lora_key}.bias" + ) + + converted_state_dict[f"{block_prefix}ff_context.net.2.{lora_key}.weight"] = original_state_dict.pop( + f"double_blocks.{i}.txt_mlp.2.{lora_key}.weight" + ) + if f"double_blocks.{i}.txt_mlp.2.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}ff_context.net.2.{lora_key}.bias"] = original_state_dict.pop( + f"double_blocks.{i}.txt_mlp.2.{lora_key}.bias" + ) + + # output projections. + converted_state_dict[f"{block_prefix}attn.to_out.0.{lora_key}.weight"] = original_state_dict.pop( + f"double_blocks.{i}.img_attn.proj.{lora_key}.weight" + ) + if f"double_blocks.{i}.img_attn.proj.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}attn.to_out.0.{lora_key}.bias"] = original_state_dict.pop( + f"double_blocks.{i}.img_attn.proj.{lora_key}.bias" + ) + converted_state_dict[f"{block_prefix}attn.to_add_out.{lora_key}.weight"] = original_state_dict.pop( + f"double_blocks.{i}.txt_attn.proj.{lora_key}.weight" + ) + if f"double_blocks.{i}.txt_attn.proj.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}attn.to_add_out.{lora_key}.bias"] = original_state_dict.pop( + f"double_blocks.{i}.txt_attn.proj.{lora_key}.bias" + ) + + # qk_norm + converted_state_dict[f"{block_prefix}attn.norm_q.weight"] = original_state_dict.pop( + f"double_blocks.{i}.img_attn.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_k.weight"] = original_state_dict.pop( + f"double_blocks.{i}.img_attn.norm.key_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_added_q.weight"] = original_state_dict.pop( + f"double_blocks.{i}.txt_attn.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_added_k.weight"] = original_state_dict.pop( + f"double_blocks.{i}.txt_attn.norm.key_norm.scale" + ) + + # single transformer blocks + for i in range(num_single_layers): + block_prefix = f"single_transformer_blocks.{i}." + + for lora_key in ["lora_A", "lora_B"]: + # norm.linear <- single_blocks.0.modulation.lin + converted_state_dict[f"{block_prefix}norm.linear.{lora_key}.weight"] = original_state_dict.pop( + f"single_blocks.{i}.modulation.lin.{lora_key}.weight" + ) + if f"single_blocks.{i}.modulation.lin.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}norm.linear.{lora_key}.bias"] = original_state_dict.pop( + f"single_blocks.{i}.modulation.lin.{lora_key}.bias" + ) + + # Q, K, V, mlp + mlp_hidden_dim = int(inner_dim * mlp_ratio) + split_size = (inner_dim, inner_dim, inner_dim, mlp_hidden_dim) + + if lora_key == "lora_A": + lora_weight = original_state_dict.pop(f"single_blocks.{i}.linear1.{lora_key}.weight") + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.weight"] = torch.cat([lora_weight]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.weight"] = torch.cat([lora_weight]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.weight"] = torch.cat([lora_weight]) + converted_state_dict[f"{block_prefix}proj_mlp.{lora_key}.weight"] = torch.cat([lora_weight]) + + if f"single_blocks.{i}.linear1.{lora_key}.bias" in original_state_dict_keys: + lora_bias = original_state_dict.pop(f"single_blocks.{i}.linear1.{lora_key}.bias") + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.bias"] = torch.cat([lora_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.bias"] = torch.cat([lora_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.bias"] = torch.cat([lora_bias]) + converted_state_dict[f"{block_prefix}proj_mlp.{lora_key}.bias"] = torch.cat([lora_bias]) + else: + q, k, v, mlp = torch.split( + original_state_dict.pop(f"single_blocks.{i}.linear1.{lora_key}.weight"), split_size, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.weight"] = torch.cat([q]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.weight"] = torch.cat([k]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.weight"] = torch.cat([v]) + converted_state_dict[f"{block_prefix}proj_mlp.{lora_key}.weight"] = torch.cat([mlp]) + + if f"single_blocks.{i}.linear1.{lora_key}.bias" in original_state_dict_keys: + q_bias, k_bias, v_bias, mlp_bias = torch.split( + original_state_dict.pop(f"single_blocks.{i}.linear1.{lora_key}.bias"), split_size, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.bias"] = torch.cat([q_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.bias"] = torch.cat([k_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.bias"] = torch.cat([v_bias]) + converted_state_dict[f"{block_prefix}proj_mlp.{lora_key}.bias"] = torch.cat([mlp_bias]) + + # output projections. + converted_state_dict[f"{block_prefix}proj_out.{lora_key}.weight"] = original_state_dict.pop( + f"single_blocks.{i}.linear2.{lora_key}.weight" + ) + if f"single_blocks.{i}.linear2.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}proj_out.{lora_key}.bias"] = original_state_dict.pop( + f"single_blocks.{i}.linear2.{lora_key}.bias" + ) + + # qk norm + converted_state_dict[f"{block_prefix}attn.norm_q.weight"] = original_state_dict.pop( + f"single_blocks.{i}.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_k.weight"] = original_state_dict.pop( + f"single_blocks.{i}.norm.key_norm.scale" + ) + + for lora_key in ["lora_A", "lora_B"]: + converted_state_dict[f"proj_out.{lora_key}.weight"] = original_state_dict.pop( + f"final_layer.linear.{lora_key}.weight" + ) + if f"final_layer.linear.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"proj_out.{lora_key}.bias"] = original_state_dict.pop( + f"final_layer.linear.{lora_key}.bias" + ) + + converted_state_dict[f"norm_out.linear.{lora_key}.weight"] = swap_scale_shift( + original_state_dict.pop(f"final_layer.adaLN_modulation.1.{lora_key}.weight") + ) + if f"final_layer.adaLN_modulation.1.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"norm_out.linear.{lora_key}.bias"] = swap_scale_shift( + original_state_dict.pop(f"final_layer.adaLN_modulation.1.{lora_key}.bias") + ) + + if len(original_state_dict) > 0: + raise ValueError(f"`original_state_dict` should be empty at this point but has {original_state_dict.keys()=}.") + + for key in list(converted_state_dict.keys()): + converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) + + return converted_state_dict + + +def _convert_fal_kontext_lora_to_diffusers(original_state_dict): + converted_state_dict = {} + original_state_dict_keys = list(original_state_dict.keys()) + num_layers = 19 + num_single_layers = 38 + inner_dim = 3072 + mlp_ratio = 4.0 + + # double transformer blocks + for i in range(num_layers): + block_prefix = f"transformer_blocks.{i}." + original_block_prefix = "base_model.model." + + for lora_key in ["lora_A", "lora_B"]: + # norms + converted_state_dict[f"{block_prefix}norm1.linear.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_mod.lin.{lora_key}.weight" + ) + if f"double_blocks.{i}.img_mod.lin.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}norm1.linear.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_mod.lin.{lora_key}.bias" + ) + + converted_state_dict[f"{block_prefix}norm1_context.linear.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_mod.lin.{lora_key}.weight" + ) + + # Q, K, V + if lora_key == "lora_A": + sample_lora_weight = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_attn.qkv.{lora_key}.weight" + ) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.weight"] = torch.cat([sample_lora_weight]) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.weight"] = torch.cat([sample_lora_weight]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.weight"] = torch.cat([sample_lora_weight]) + + context_lora_weight = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_attn.qkv.{lora_key}.weight" + ) + converted_state_dict[f"{block_prefix}attn.add_q_proj.{lora_key}.weight"] = torch.cat( + [context_lora_weight] + ) + converted_state_dict[f"{block_prefix}attn.add_k_proj.{lora_key}.weight"] = torch.cat( + [context_lora_weight] + ) + converted_state_dict[f"{block_prefix}attn.add_v_proj.{lora_key}.weight"] = torch.cat( + [context_lora_weight] + ) + else: + sample_q, sample_k, sample_v = torch.chunk( + original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_attn.qkv.{lora_key}.weight" + ), + 3, + dim=0, + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.weight"] = torch.cat([sample_q]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.weight"] = torch.cat([sample_k]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.weight"] = torch.cat([sample_v]) + + context_q, context_k, context_v = torch.chunk( + original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_attn.qkv.{lora_key}.weight" + ), + 3, + dim=0, + ) + converted_state_dict[f"{block_prefix}attn.add_q_proj.{lora_key}.weight"] = torch.cat([context_q]) + converted_state_dict[f"{block_prefix}attn.add_k_proj.{lora_key}.weight"] = torch.cat([context_k]) + converted_state_dict[f"{block_prefix}attn.add_v_proj.{lora_key}.weight"] = torch.cat([context_v]) + + if f"double_blocks.{i}.img_attn.qkv.{lora_key}.bias" in original_state_dict_keys: + sample_q_bias, sample_k_bias, sample_v_bias = torch.chunk( + original_state_dict.pop(f"{original_block_prefix}double_blocks.{i}.img_attn.qkv.{lora_key}.bias"), + 3, + dim=0, + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.bias"] = torch.cat([sample_q_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.bias"] = torch.cat([sample_k_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.bias"] = torch.cat([sample_v_bias]) + + if f"double_blocks.{i}.txt_attn.qkv.{lora_key}.bias" in original_state_dict_keys: + context_q_bias, context_k_bias, context_v_bias = torch.chunk( + original_state_dict.pop(f"{original_block_prefix}double_blocks.{i}.txt_attn.qkv.{lora_key}.bias"), + 3, + dim=0, + ) + converted_state_dict[f"{block_prefix}attn.add_q_proj.{lora_key}.bias"] = torch.cat([context_q_bias]) + converted_state_dict[f"{block_prefix}attn.add_k_proj.{lora_key}.bias"] = torch.cat([context_k_bias]) + converted_state_dict[f"{block_prefix}attn.add_v_proj.{lora_key}.bias"] = torch.cat([context_v_bias]) + + # ff img_mlp + converted_state_dict[f"{block_prefix}ff.net.0.proj.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_mlp.0.{lora_key}.weight" + ) + if f"{original_block_prefix}double_blocks.{i}.img_mlp.0.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}ff.net.0.proj.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_mlp.0.{lora_key}.bias" + ) + + converted_state_dict[f"{block_prefix}ff.net.2.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_mlp.2.{lora_key}.weight" + ) + if f"{original_block_prefix}double_blocks.{i}.img_mlp.2.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}ff.net.2.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_mlp.2.{lora_key}.bias" + ) + + converted_state_dict[f"{block_prefix}ff_context.net.0.proj.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_mlp.0.{lora_key}.weight" + ) + if f"{original_block_prefix}double_blocks.{i}.txt_mlp.0.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}ff_context.net.0.proj.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_mlp.0.{lora_key}.bias" + ) + + converted_state_dict[f"{block_prefix}ff_context.net.2.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_mlp.2.{lora_key}.weight" + ) + if f"{original_block_prefix}double_blocks.{i}.txt_mlp.2.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}ff_context.net.2.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_mlp.2.{lora_key}.bias" + ) + + # output projections. + converted_state_dict[f"{block_prefix}attn.to_out.0.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_attn.proj.{lora_key}.weight" + ) + if f"{original_block_prefix}double_blocks.{i}.img_attn.proj.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}attn.to_out.0.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.img_attn.proj.{lora_key}.bias" + ) + converted_state_dict[f"{block_prefix}attn.to_add_out.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_attn.proj.{lora_key}.weight" + ) + if f"{original_block_prefix}double_blocks.{i}.txt_attn.proj.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}attn.to_add_out.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}double_blocks.{i}.txt_attn.proj.{lora_key}.bias" + ) + + # single transformer blocks + for i in range(num_single_layers): + block_prefix = f"single_transformer_blocks.{i}." + + for lora_key in ["lora_A", "lora_B"]: + # norm.linear <- single_blocks.0.modulation.lin + converted_state_dict[f"{block_prefix}norm.linear.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}single_blocks.{i}.modulation.lin.{lora_key}.weight" + ) + if f"{original_block_prefix}single_blocks.{i}.modulation.lin.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}norm.linear.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}single_blocks.{i}.modulation.lin.{lora_key}.bias" + ) + + # Q, K, V, mlp + mlp_hidden_dim = int(inner_dim * mlp_ratio) + split_size = (inner_dim, inner_dim, inner_dim, mlp_hidden_dim) + + if lora_key == "lora_A": + lora_weight = original_state_dict.pop( + f"{original_block_prefix}single_blocks.{i}.linear1.{lora_key}.weight" + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.weight"] = torch.cat([lora_weight]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.weight"] = torch.cat([lora_weight]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.weight"] = torch.cat([lora_weight]) + converted_state_dict[f"{block_prefix}proj_mlp.{lora_key}.weight"] = torch.cat([lora_weight]) + + if f"{original_block_prefix}single_blocks.{i}.linear1.{lora_key}.bias" in original_state_dict_keys: + lora_bias = original_state_dict.pop(f"single_blocks.{i}.linear1.{lora_key}.bias") + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.bias"] = torch.cat([lora_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.bias"] = torch.cat([lora_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.bias"] = torch.cat([lora_bias]) + converted_state_dict[f"{block_prefix}proj_mlp.{lora_key}.bias"] = torch.cat([lora_bias]) + else: + q, k, v, mlp = torch.split( + original_state_dict.pop(f"{original_block_prefix}single_blocks.{i}.linear1.{lora_key}.weight"), + split_size, + dim=0, + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.weight"] = torch.cat([q]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.weight"] = torch.cat([k]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.weight"] = torch.cat([v]) + converted_state_dict[f"{block_prefix}proj_mlp.{lora_key}.weight"] = torch.cat([mlp]) + + if f"{original_block_prefix}single_blocks.{i}.linear1.{lora_key}.bias" in original_state_dict_keys: + q_bias, k_bias, v_bias, mlp_bias = torch.split( + original_state_dict.pop(f"{original_block_prefix}single_blocks.{i}.linear1.{lora_key}.bias"), + split_size, + dim=0, + ) + converted_state_dict[f"{block_prefix}attn.to_q.{lora_key}.bias"] = torch.cat([q_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.{lora_key}.bias"] = torch.cat([k_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.{lora_key}.bias"] = torch.cat([v_bias]) + converted_state_dict[f"{block_prefix}proj_mlp.{lora_key}.bias"] = torch.cat([mlp_bias]) + + # output projections. + converted_state_dict[f"{block_prefix}proj_out.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}single_blocks.{i}.linear2.{lora_key}.weight" + ) + if f"{original_block_prefix}single_blocks.{i}.linear2.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"{block_prefix}proj_out.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}single_blocks.{i}.linear2.{lora_key}.bias" + ) + + for lora_key in ["lora_A", "lora_B"]: + converted_state_dict[f"proj_out.{lora_key}.weight"] = original_state_dict.pop( + f"{original_block_prefix}final_layer.linear.{lora_key}.weight" + ) + if f"{original_block_prefix}final_layer.linear.{lora_key}.bias" in original_state_dict_keys: + converted_state_dict[f"proj_out.{lora_key}.bias"] = original_state_dict.pop( + f"{original_block_prefix}final_layer.linear.{lora_key}.bias" + ) + + if len(original_state_dict) > 0: + raise ValueError(f"`original_state_dict` should be empty at this point but has {original_state_dict.keys()=}.") + + for key in list(converted_state_dict.keys()): + converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) + + return converted_state_dict + + +def _convert_hunyuan_video_lora_to_diffusers(original_state_dict): + converted_state_dict = {k: original_state_dict.pop(k) for k in list(original_state_dict.keys())} + + def remap_norm_scale_shift_(key, state_dict): + weight = state_dict.pop(key) + shift, scale = weight.chunk(2, dim=0) + new_weight = torch.cat([scale, shift], dim=0) + state_dict[key.replace("final_layer.adaLN_modulation.1", "norm_out.linear")] = new_weight + + def remap_txt_in_(key, state_dict): + def rename_key(key): + new_key = key.replace("individual_token_refiner.blocks", "token_refiner.refiner_blocks") + new_key = new_key.replace("adaLN_modulation.1", "norm_out.linear") + new_key = new_key.replace("txt_in", "context_embedder") + new_key = new_key.replace("t_embedder.mlp.0", "time_text_embed.timestep_embedder.linear_1") + new_key = new_key.replace("t_embedder.mlp.2", "time_text_embed.timestep_embedder.linear_2") + new_key = new_key.replace("c_embedder", "time_text_embed.text_embedder") + new_key = new_key.replace("mlp", "ff") + return new_key + + if "self_attn_qkv" in key: + weight = state_dict.pop(key) + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_q"))] = to_q + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_k"))] = to_k + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_v"))] = to_v + else: + state_dict[rename_key(key)] = state_dict.pop(key) + + def remap_img_attn_qkv_(key, state_dict): + weight = state_dict.pop(key) + if "lora_A" in key: + state_dict[key.replace("img_attn_qkv", "attn.to_q")] = weight + state_dict[key.replace("img_attn_qkv", "attn.to_k")] = weight + state_dict[key.replace("img_attn_qkv", "attn.to_v")] = weight + else: + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[key.replace("img_attn_qkv", "attn.to_q")] = to_q + state_dict[key.replace("img_attn_qkv", "attn.to_k")] = to_k + state_dict[key.replace("img_attn_qkv", "attn.to_v")] = to_v + + def remap_txt_attn_qkv_(key, state_dict): + weight = state_dict.pop(key) + if "lora_A" in key: + state_dict[key.replace("txt_attn_qkv", "attn.add_q_proj")] = weight + state_dict[key.replace("txt_attn_qkv", "attn.add_k_proj")] = weight + state_dict[key.replace("txt_attn_qkv", "attn.add_v_proj")] = weight + else: + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[key.replace("txt_attn_qkv", "attn.add_q_proj")] = to_q + state_dict[key.replace("txt_attn_qkv", "attn.add_k_proj")] = to_k + state_dict[key.replace("txt_attn_qkv", "attn.add_v_proj")] = to_v + + def remap_single_transformer_blocks_(key, state_dict): + hidden_size = 3072 + + if "linear1.lora_A.weight" in key or "linear1.lora_B.weight" in key: + linear1_weight = state_dict.pop(key) + if "lora_A" in key: + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix( + ".linear1.lora_A.weight" + ) + state_dict[f"{new_key}.attn.to_q.lora_A.weight"] = linear1_weight + state_dict[f"{new_key}.attn.to_k.lora_A.weight"] = linear1_weight + state_dict[f"{new_key}.attn.to_v.lora_A.weight"] = linear1_weight + state_dict[f"{new_key}.proj_mlp.lora_A.weight"] = linear1_weight + else: + split_size = (hidden_size, hidden_size, hidden_size, linear1_weight.size(0) - 3 * hidden_size) + q, k, v, mlp = torch.split(linear1_weight, split_size, dim=0) + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix( + ".linear1.lora_B.weight" + ) + state_dict[f"{new_key}.attn.to_q.lora_B.weight"] = q + state_dict[f"{new_key}.attn.to_k.lora_B.weight"] = k + state_dict[f"{new_key}.attn.to_v.lora_B.weight"] = v + state_dict[f"{new_key}.proj_mlp.lora_B.weight"] = mlp + + elif "linear1.lora_A.bias" in key or "linear1.lora_B.bias" in key: + linear1_bias = state_dict.pop(key) + if "lora_A" in key: + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix( + ".linear1.lora_A.bias" + ) + state_dict[f"{new_key}.attn.to_q.lora_A.bias"] = linear1_bias + state_dict[f"{new_key}.attn.to_k.lora_A.bias"] = linear1_bias + state_dict[f"{new_key}.attn.to_v.lora_A.bias"] = linear1_bias + state_dict[f"{new_key}.proj_mlp.lora_A.bias"] = linear1_bias + else: + split_size = (hidden_size, hidden_size, hidden_size, linear1_bias.size(0) - 3 * hidden_size) + q_bias, k_bias, v_bias, mlp_bias = torch.split(linear1_bias, split_size, dim=0) + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix( + ".linear1.lora_B.bias" + ) + state_dict[f"{new_key}.attn.to_q.lora_B.bias"] = q_bias + state_dict[f"{new_key}.attn.to_k.lora_B.bias"] = k_bias + state_dict[f"{new_key}.attn.to_v.lora_B.bias"] = v_bias + state_dict[f"{new_key}.proj_mlp.lora_B.bias"] = mlp_bias + + else: + new_key = key.replace("single_blocks", "single_transformer_blocks") + new_key = new_key.replace("linear2", "proj_out") + new_key = new_key.replace("q_norm", "attn.norm_q") + new_key = new_key.replace("k_norm", "attn.norm_k") + state_dict[new_key] = state_dict.pop(key) + + TRANSFORMER_KEYS_RENAME_DICT = { + "img_in": "x_embedder", + "time_in.mlp.0": "time_text_embed.timestep_embedder.linear_1", + "time_in.mlp.2": "time_text_embed.timestep_embedder.linear_2", + "guidance_in.mlp.0": "time_text_embed.guidance_embedder.linear_1", + "guidance_in.mlp.2": "time_text_embed.guidance_embedder.linear_2", + "vector_in.in_layer": "time_text_embed.text_embedder.linear_1", + "vector_in.out_layer": "time_text_embed.text_embedder.linear_2", + "double_blocks": "transformer_blocks", + "img_attn_q_norm": "attn.norm_q", + "img_attn_k_norm": "attn.norm_k", + "img_attn_proj": "attn.to_out.0", + "txt_attn_q_norm": "attn.norm_added_q", + "txt_attn_k_norm": "attn.norm_added_k", + "txt_attn_proj": "attn.to_add_out", + "img_mod.linear": "norm1.linear", + "img_norm1": "norm1.norm", + "img_norm2": "norm2", + "img_mlp": "ff", + "txt_mod.linear": "norm1_context.linear", + "txt_norm1": "norm1.norm", + "txt_norm2": "norm2_context", + "txt_mlp": "ff_context", + "self_attn_proj": "attn.to_out.0", + "modulation.linear": "norm.linear", + "pre_norm": "norm.norm", + "final_layer.norm_final": "norm_out.norm", + "final_layer.linear": "proj_out", + "fc1": "net.0.proj", + "fc2": "net.2", + "input_embedder": "proj_in", + } + + TRANSFORMER_SPECIAL_KEYS_REMAP = { + "txt_in": remap_txt_in_, + "img_attn_qkv": remap_img_attn_qkv_, + "txt_attn_qkv": remap_txt_attn_qkv_, + "single_blocks": remap_single_transformer_blocks_, + "final_layer.adaLN_modulation.1": remap_norm_scale_shift_, + } + + # Some folks attempt to make their state dict compatible with diffusers by adding "transformer." prefix to all keys + # and use their custom code. To make sure both "original" and "attempted diffusers" loras work as expected, we make + # sure that both follow the same initial format by stripping off the "transformer." prefix. + for key in list(converted_state_dict.keys()): + if key.startswith("transformer."): + converted_state_dict[key[len("transformer.") :]] = converted_state_dict.pop(key) + if key.startswith("diffusion_model."): + converted_state_dict[key[len("diffusion_model.") :]] = converted_state_dict.pop(key) + + # Rename and remap the state dict keys + for key in list(converted_state_dict.keys()): + new_key = key[:] + for replace_key, rename_key in TRANSFORMER_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + converted_state_dict[new_key] = converted_state_dict.pop(key) + + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in TRANSFORMER_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + # Add back the "transformer." prefix + for key in list(converted_state_dict.keys()): + converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) + + return converted_state_dict + + +def _convert_non_diffusers_lumina2_lora_to_diffusers(state_dict): + # Remove "diffusion_model." prefix from keys. + state_dict = {k[len("diffusion_model.") :]: v for k, v in state_dict.items()} + converted_state_dict = {} + + def get_num_layers(keys, pattern): + layers = set() + for key in keys: + match = re.search(pattern, key) + if match: + layers.add(int(match.group(1))) + return len(layers) + + def process_block(prefix, index, convert_norm): + # Process attention qkv: pop lora_A and lora_B weights. + lora_down = state_dict.pop(f"{prefix}.{index}.attention.qkv.lora_A.weight") + lora_up = state_dict.pop(f"{prefix}.{index}.attention.qkv.lora_B.weight") + for attn_key in ["to_q", "to_k", "to_v"]: + converted_state_dict[f"{prefix}.{index}.attn.{attn_key}.lora_A.weight"] = lora_down + for attn_key, weight in zip(["to_q", "to_k", "to_v"], torch.split(lora_up, [2304, 768, 768], dim=0)): + converted_state_dict[f"{prefix}.{index}.attn.{attn_key}.lora_B.weight"] = weight + + # Process attention out weights. + converted_state_dict[f"{prefix}.{index}.attn.to_out.0.lora_A.weight"] = state_dict.pop( + f"{prefix}.{index}.attention.out.lora_A.weight" + ) + converted_state_dict[f"{prefix}.{index}.attn.to_out.0.lora_B.weight"] = state_dict.pop( + f"{prefix}.{index}.attention.out.lora_B.weight" + ) + + # Process feed-forward weights for layers 1, 2, and 3. + for layer in range(1, 4): + converted_state_dict[f"{prefix}.{index}.feed_forward.linear_{layer}.lora_A.weight"] = state_dict.pop( + f"{prefix}.{index}.feed_forward.w{layer}.lora_A.weight" + ) + converted_state_dict[f"{prefix}.{index}.feed_forward.linear_{layer}.lora_B.weight"] = state_dict.pop( + f"{prefix}.{index}.feed_forward.w{layer}.lora_B.weight" + ) + + if convert_norm: + converted_state_dict[f"{prefix}.{index}.norm1.linear.lora_A.weight"] = state_dict.pop( + f"{prefix}.{index}.adaLN_modulation.1.lora_A.weight" + ) + converted_state_dict[f"{prefix}.{index}.norm1.linear.lora_B.weight"] = state_dict.pop( + f"{prefix}.{index}.adaLN_modulation.1.lora_B.weight" + ) + + noise_refiner_pattern = r"noise_refiner\.(\d+)\." + num_noise_refiner_layers = get_num_layers(state_dict.keys(), noise_refiner_pattern) + for i in range(num_noise_refiner_layers): + process_block("noise_refiner", i, convert_norm=True) + + context_refiner_pattern = r"context_refiner\.(\d+)\." + num_context_refiner_layers = get_num_layers(state_dict.keys(), context_refiner_pattern) + for i in range(num_context_refiner_layers): + process_block("context_refiner", i, convert_norm=False) + + core_transformer_pattern = r"layers\.(\d+)\." + num_core_transformer_layers = get_num_layers(state_dict.keys(), core_transformer_pattern) + for i in range(num_core_transformer_layers): + process_block("layers", i, convert_norm=True) + + if len(state_dict) > 0: + raise ValueError(f"`state_dict` should be empty at this point but has {state_dict.keys()=}") + + for key in list(converted_state_dict.keys()): + converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) + + return converted_state_dict + + +def _convert_non_diffusers_wan_lora_to_diffusers(state_dict): + converted_state_dict = {} + original_state_dict = {k[len("diffusion_model.") :]: v for k, v in state_dict.items()} + + block_numbers = {int(k.split(".")[1]) for k in original_state_dict if k.startswith("blocks.")} + min_block = min(block_numbers) + max_block = max(block_numbers) + + is_i2v_lora = any("k_img" in k for k in original_state_dict) and any("v_img" in k for k in original_state_dict) + lora_down_key = "lora_A" if any("lora_A" in k for k in original_state_dict) else "lora_down" + lora_up_key = "lora_B" if any("lora_B" in k for k in original_state_dict) else "lora_up" + has_time_projection_weight = any( + k.startswith("time_projection") and k.endswith(".weight") for k in original_state_dict + ) + + def get_alpha_scales(down_weight, alpha_key): + rank = down_weight.shape[0] + alpha = original_state_dict.pop(alpha_key).item() + scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + return scale_down, scale_up + + for key in list(original_state_dict.keys()): + if key.endswith((".diff", ".diff_b")) and "norm" in key: + # NOTE: we don't support this because norm layer diff keys are just zeroed values. We can support it + # in future if needed and they are not zeroed. + original_state_dict.pop(key) + logger.debug(f"Removing {key} key from the state dict as it is a norm diff key. This is unsupported.") + + if "time_projection" in key and not has_time_projection_weight: + # AccVideo lora has diff bias keys but not the weight keys. This causes a weird problem where + # our lora config adds the time proj lora layers, but we don't have the weights for them. + # CausVid lora has the weight keys and the bias keys. + original_state_dict.pop(key) + + # For the `diff_b` keys, we treat them as lora_bias. + # https://huggingface.co/docs/peft/main/en/package_reference/lora#peft.LoraConfig.lora_bias + + for i in range(min_block, max_block + 1): + # Self-attention + for o, c in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]): + alpha_key = f"blocks.{i}.self_attn.{o}.alpha" + has_alpha = alpha_key in original_state_dict + original_key_A = f"blocks.{i}.self_attn.{o}.{lora_down_key}.weight" + converted_key_A = f"blocks.{i}.attn1.{c}.lora_A.weight" + + original_key_B = f"blocks.{i}.self_attn.{o}.{lora_up_key}.weight" + converted_key_B = f"blocks.{i}.attn1.{c}.lora_B.weight" + + if has_alpha: + down_weight = original_state_dict.pop(original_key_A) + up_weight = original_state_dict.pop(original_key_B) + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) + converted_state_dict[converted_key_A] = down_weight * scale_down + converted_state_dict[converted_key_B] = up_weight * scale_up + + else: + if original_key_A in original_state_dict: + converted_state_dict[converted_key_A] = original_state_dict.pop(original_key_A) + if original_key_B in original_state_dict: + converted_state_dict[converted_key_B] = original_state_dict.pop(original_key_B) + + original_key = f"blocks.{i}.self_attn.{o}.diff_b" + converted_key = f"blocks.{i}.attn1.{c}.lora_B.bias" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + # Cross-attention + for o, c in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]): + alpha_key = f"blocks.{i}.cross_attn.{o}.alpha" + has_alpha = alpha_key in original_state_dict + original_key_A = f"blocks.{i}.cross_attn.{o}.{lora_down_key}.weight" + converted_key_A = f"blocks.{i}.attn2.{c}.lora_A.weight" + + original_key_B = f"blocks.{i}.cross_attn.{o}.{lora_up_key}.weight" + converted_key_B = f"blocks.{i}.attn2.{c}.lora_B.weight" + + if original_key_A in original_state_dict: + down_weight = original_state_dict.pop(original_key_A) + converted_state_dict[converted_key_A] = down_weight + if original_key_B in original_state_dict: + up_weight = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_B] = up_weight + if has_alpha: + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) + converted_state_dict[converted_key_A] *= scale_down + converted_state_dict[converted_key_B] *= scale_up + + original_key = f"blocks.{i}.cross_attn.{o}.diff_b" + converted_key = f"blocks.{i}.attn2.{c}.lora_B.bias" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + if is_i2v_lora: + for o, c in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]): + alpha_key = f"blocks.{i}.cross_attn.{o}.alpha" + has_alpha = alpha_key in original_state_dict + original_key_A = f"blocks.{i}.cross_attn.{o}.{lora_down_key}.weight" + converted_key_A = f"blocks.{i}.attn2.{c}.lora_A.weight" + + original_key_B = f"blocks.{i}.cross_attn.{o}.{lora_up_key}.weight" + converted_key_B = f"blocks.{i}.attn2.{c}.lora_B.weight" + + if original_key_A in original_state_dict: + down_weight = original_state_dict.pop(original_key_A) + converted_state_dict[converted_key_A] = down_weight + if original_key_B in original_state_dict: + up_weight = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_B] = up_weight + if has_alpha: + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) + converted_state_dict[converted_key_A] *= scale_down + converted_state_dict[converted_key_B] *= scale_up + + original_key = f"blocks.{i}.cross_attn.{o}.diff_b" + converted_key = f"blocks.{i}.attn2.{c}.lora_B.bias" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + # FFN + for o, c in zip(["ffn.0", "ffn.2"], ["net.0.proj", "net.2"]): + alpha_key = f"blocks.{i}.{o}.alpha" + has_alpha = alpha_key in original_state_dict + original_key_A = f"blocks.{i}.{o}.{lora_down_key}.weight" + converted_key_A = f"blocks.{i}.ffn.{c}.lora_A.weight" + + original_key_B = f"blocks.{i}.{o}.{lora_up_key}.weight" + converted_key_B = f"blocks.{i}.ffn.{c}.lora_B.weight" + + if original_key_A in original_state_dict: + down_weight = original_state_dict.pop(original_key_A) + converted_state_dict[converted_key_A] = down_weight + if original_key_B in original_state_dict: + up_weight = original_state_dict.pop(original_key_B) + converted_state_dict[converted_key_B] = up_weight + if has_alpha: + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) + converted_state_dict[converted_key_A] *= scale_down + converted_state_dict[converted_key_B] *= scale_up + + original_key = f"blocks.{i}.{o}.diff_b" + converted_key = f"blocks.{i}.ffn.{c}.lora_B.bias" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + # Remaining. + if original_state_dict: + if any("time_projection" in k for k in original_state_dict): + original_key = f"time_projection.1.{lora_down_key}.weight" + converted_key = "condition_embedder.time_proj.lora_A.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + original_key = f"time_projection.1.{lora_up_key}.weight" + converted_key = "condition_embedder.time_proj.lora_B.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + if "time_projection.1.diff_b" in original_state_dict: + converted_state_dict["condition_embedder.time_proj.lora_B.bias"] = original_state_dict.pop( + "time_projection.1.diff_b" + ) + + if any("head.head" in k for k in original_state_dict): + if any(f"head.head.{lora_down_key}.weight" in k for k in state_dict): + converted_state_dict["proj_out.lora_A.weight"] = original_state_dict.pop( + f"head.head.{lora_down_key}.weight" + ) + if any(f"head.head.{lora_up_key}.weight" in k for k in state_dict): + converted_state_dict["proj_out.lora_B.weight"] = original_state_dict.pop( + f"head.head.{lora_up_key}.weight" + ) + if "head.head.diff_b" in original_state_dict: + converted_state_dict["proj_out.lora_B.bias"] = original_state_dict.pop("head.head.diff_b") + + # Notes: https://huggingface.co/lightx2v/Wan2.2-Distill-Loras + # This is my (sayakpaul) assumption that this particular key belongs to the down matrix. + # Since for this particular LoRA, we don't have the corresponding up matrix, I will use + # an identity. + if any("head.head" in k and k.endswith(".diff") for k in state_dict): + if f"head.head.{lora_down_key}.weight" in state_dict: + logger.info( + f"The state dict seems to be have both `head.head.diff` and `head.head.{lora_down_key}.weight` keys, which is unexpected." + ) + converted_state_dict["proj_out.lora_A.weight"] = original_state_dict.pop("head.head.diff") + down_matrix_head = converted_state_dict["proj_out.lora_A.weight"] + up_matrix_shape = (down_matrix_head.shape[0], converted_state_dict["proj_out.lora_B.bias"].shape[0]) + converted_state_dict["proj_out.lora_B.weight"] = torch.eye( + *up_matrix_shape, dtype=down_matrix_head.dtype, device=down_matrix_head.device + ).T + + for text_time in ["text_embedding", "time_embedding"]: + if any(text_time in k for k in original_state_dict): + for b_n in [0, 2]: + diffusers_b_n = 1 if b_n == 0 else 2 + diffusers_name = ( + "condition_embedder.text_embedder" + if text_time == "text_embedding" + else "condition_embedder.time_embedder" + ) + if any(f"{text_time}.{b_n}" in k for k in original_state_dict): + converted_state_dict[f"{diffusers_name}.linear_{diffusers_b_n}.lora_A.weight"] = ( + original_state_dict.pop(f"{text_time}.{b_n}.{lora_down_key}.weight") + ) + converted_state_dict[f"{diffusers_name}.linear_{diffusers_b_n}.lora_B.weight"] = ( + original_state_dict.pop(f"{text_time}.{b_n}.{lora_up_key}.weight") + ) + if f"{text_time}.{b_n}.diff_b" in original_state_dict: + converted_state_dict[f"{diffusers_name}.linear_{diffusers_b_n}.lora_B.bias"] = ( + original_state_dict.pop(f"{text_time}.{b_n}.diff_b") + ) + + for img_ours, img_theirs in [ + ("ff.net.0.proj", "img_emb.proj.1"), + ("ff.net.2", "img_emb.proj.3"), + ]: + original_key = f"{img_theirs}.{lora_down_key}.weight" + converted_key = f"condition_embedder.image_embedder.{img_ours}.lora_A.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + + original_key = f"{img_theirs}.{lora_up_key}.weight" + converted_key = f"condition_embedder.image_embedder.{img_ours}.lora_B.weight" + if original_key in original_state_dict: + converted_state_dict[converted_key] = original_state_dict.pop(original_key) + bias_key_theirs = original_key.removesuffix(f".{lora_up_key}.weight") + ".diff_b" + if bias_key_theirs in original_state_dict: + bias_key = converted_key.removesuffix(".weight") + ".bias" + converted_state_dict[bias_key] = original_state_dict.pop(bias_key_theirs) + + if len(original_state_dict) > 0: + diff = all(".diff" in k for k in original_state_dict) + if diff: + diff_keys = {k for k in original_state_dict if k.endswith(".diff")} + if not all("lora" not in k for k in diff_keys): + raise ValueError + logger.info( + "The remaining `state_dict` contains `diff` keys which we do not handle yet. If you see performance issues, please file an issue: " + "https://github.com/huggingface/diffusers//issues/new" + ) + else: + raise ValueError(f"`state_dict` should be empty at this point but has {original_state_dict.keys()=}") + + for key in list(converted_state_dict.keys()): + converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) + + return converted_state_dict + + +def _convert_musubi_wan_lora_to_diffusers(state_dict): + # https://github.com/kohya-ss/musubi-tuner + converted_state_dict = {} + original_state_dict = {k[len("lora_unet_") :]: v for k, v in state_dict.items()} + + num_blocks = len({k.split("blocks_")[1].split("_")[0] for k in original_state_dict}) + is_i2v_lora = any("k_img" in k for k in original_state_dict) and any("v_img" in k for k in original_state_dict) + + def get_alpha_scales(down_weight, key): + rank = down_weight.shape[0] + alpha = original_state_dict.pop(key + ".alpha").item() + scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + return scale_down, scale_up + + for i in range(num_blocks): + # Self-attention + for o, c in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]): + down_weight = original_state_dict.pop(f"blocks_{i}_self_attn_{o}.lora_down.weight") + up_weight = original_state_dict.pop(f"blocks_{i}_self_attn_{o}.lora_up.weight") + scale_down, scale_up = get_alpha_scales(down_weight, f"blocks_{i}_self_attn_{o}") + converted_state_dict[f"blocks.{i}.attn1.{c}.lora_A.weight"] = down_weight * scale_down + converted_state_dict[f"blocks.{i}.attn1.{c}.lora_B.weight"] = up_weight * scale_up + + # Cross-attention + for o, c in zip(["q", "k", "v", "o"], ["to_q", "to_k", "to_v", "to_out.0"]): + down_weight = original_state_dict.pop(f"blocks_{i}_cross_attn_{o}.lora_down.weight") + up_weight = original_state_dict.pop(f"blocks_{i}_cross_attn_{o}.lora_up.weight") + scale_down, scale_up = get_alpha_scales(down_weight, f"blocks_{i}_cross_attn_{o}") + converted_state_dict[f"blocks.{i}.attn2.{c}.lora_A.weight"] = down_weight * scale_down + converted_state_dict[f"blocks.{i}.attn2.{c}.lora_B.weight"] = up_weight * scale_up + + if is_i2v_lora: + for o, c in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]): + down_weight = original_state_dict.pop(f"blocks_{i}_cross_attn_{o}.lora_down.weight") + up_weight = original_state_dict.pop(f"blocks_{i}_cross_attn_{o}.lora_up.weight") + scale_down, scale_up = get_alpha_scales(down_weight, f"blocks_{i}_cross_attn_{o}") + converted_state_dict[f"blocks.{i}.attn2.{c}.lora_A.weight"] = down_weight * scale_down + converted_state_dict[f"blocks.{i}.attn2.{c}.lora_B.weight"] = up_weight * scale_up + + # FFN + for o, c in zip(["ffn_0", "ffn_2"], ["net.0.proj", "net.2"]): + down_weight = original_state_dict.pop(f"blocks_{i}_{o}.lora_down.weight") + up_weight = original_state_dict.pop(f"blocks_{i}_{o}.lora_up.weight") + scale_down, scale_up = get_alpha_scales(down_weight, f"blocks_{i}_{o}") + converted_state_dict[f"blocks.{i}.ffn.{c}.lora_A.weight"] = down_weight * scale_down + converted_state_dict[f"blocks.{i}.ffn.{c}.lora_B.weight"] = up_weight * scale_up + + if len(original_state_dict) > 0: + raise ValueError(f"`state_dict` should be empty at this point but has {original_state_dict.keys()=}") + + for key in list(converted_state_dict.keys()): + converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) + + return converted_state_dict + + +def _convert_non_diffusers_hidream_lora_to_diffusers(state_dict, non_diffusers_prefix="diffusion_model"): + if not all(k.startswith(non_diffusers_prefix) for k in state_dict): + raise ValueError("Invalid LoRA state dict for HiDream.") + converted_state_dict = {k.removeprefix(f"{non_diffusers_prefix}."): v for k, v in state_dict.items()} + converted_state_dict = {f"transformer.{k}": v for k, v in converted_state_dict.items()} + return converted_state_dict + + +def _convert_non_diffusers_ltxv_lora_to_diffusers(state_dict, non_diffusers_prefix="diffusion_model"): + if not all(k.startswith(f"{non_diffusers_prefix}.") for k in state_dict): + raise ValueError("Invalid LoRA state dict for LTX-Video.") + converted_state_dict = {k.removeprefix(f"{non_diffusers_prefix}."): v for k, v in state_dict.items()} + converted_state_dict = {f"transformer.{k}": v for k, v in converted_state_dict.items()} + return converted_state_dict + + +def _convert_non_diffusers_ltx2_lora_to_diffusers(state_dict, non_diffusers_prefix="diffusion_model"): + # Remove the prefix + state_dict = {k: v for k, v in state_dict.items() if k.startswith(f"{non_diffusers_prefix}.")} + converted_state_dict = {k.removeprefix(f"{non_diffusers_prefix}."): v for k, v in state_dict.items()} + + if non_diffusers_prefix == "diffusion_model": + rename_dict = { + "patchify_proj": "proj_in", + "audio_patchify_proj": "audio_proj_in", + "av_ca_video_scale_shift_adaln_single": "av_cross_attn_video_scale_shift", + "av_ca_a2v_gate_adaln_single": "av_cross_attn_video_a2v_gate", + "av_ca_audio_scale_shift_adaln_single": "av_cross_attn_audio_scale_shift", + "av_ca_v2a_gate_adaln_single": "av_cross_attn_audio_v2a_gate", + "scale_shift_table_a2v_ca_video": "video_a2v_cross_attn_scale_shift_table", + "scale_shift_table_a2v_ca_audio": "audio_a2v_cross_attn_scale_shift_table", + "q_norm": "norm_q", + "k_norm": "norm_k", + } + else: + rename_dict = {"aggregate_embed": "text_proj_in"} + + # Apply renaming + renamed_state_dict = {} + for key, value in converted_state_dict.items(): + new_key = key[:] + for old_pattern, new_pattern in rename_dict.items(): + new_key = new_key.replace(old_pattern, new_pattern) + renamed_state_dict[new_key] = value + + # Handle adaln_single -> time_embed and audio_adaln_single -> audio_time_embed + final_state_dict = {} + for key, value in renamed_state_dict.items(): + if key.startswith("adaln_single."): + new_key = key.replace("adaln_single.", "time_embed.") + final_state_dict[new_key] = value + elif key.startswith("audio_adaln_single."): + new_key = key.replace("audio_adaln_single.", "audio_time_embed.") + final_state_dict[new_key] = value + else: + final_state_dict[key] = value + + # Add transformer prefix + prefix = "transformer" if non_diffusers_prefix == "diffusion_model" else "connectors" + final_state_dict = {f"{prefix}.{k}": v for k, v in final_state_dict.items()} + + return final_state_dict + + +def _convert_non_diffusers_qwen_lora_to_diffusers(state_dict): + has_diffusion_model = any(k.startswith("diffusion_model.") for k in state_dict) + if has_diffusion_model: + state_dict = {k.removeprefix("diffusion_model."): v for k, v in state_dict.items()} + + has_lora_unet = any(k.startswith("lora_unet_") for k in state_dict) + if has_lora_unet: + state_dict = {k.removeprefix("lora_unet_"): v for k, v in state_dict.items()} + + def convert_key(key: str) -> str: + prefix = "transformer_blocks" + if "." in key: + base, suffix = key.rsplit(".", 1) + else: + base, suffix = key, "" + + start = f"{prefix}_" + rest = base[len(start) :] + + if "." in rest: + head, tail = rest.split(".", 1) + tail = "." + tail + else: + head, tail = rest, "" + + # Protected n-grams that must keep their internal underscores + protected = { + # pairs + ("to", "q"), + ("to", "k"), + ("to", "v"), + ("to", "out"), + ("add", "q"), + ("add", "k"), + ("add", "v"), + ("txt", "mlp"), + ("img", "mlp"), + ("txt", "mod"), + ("img", "mod"), + # triplets + ("add", "q", "proj"), + ("add", "k", "proj"), + ("add", "v", "proj"), + ("to", "add", "out"), + } + + prot_by_len = {} + for ng in protected: + prot_by_len.setdefault(len(ng), set()).add(ng) + + parts = head.split("_") + merged = [] + i = 0 + lengths_desc = sorted(prot_by_len.keys(), reverse=True) + + while i < len(parts): + matched = False + for L in lengths_desc: + if i + L <= len(parts) and tuple(parts[i : i + L]) in prot_by_len[L]: + merged.append("_".join(parts[i : i + L])) + i += L + matched = True + break + if not matched: + merged.append(parts[i]) + i += 1 + + head_converted = ".".join(merged) + converted_base = f"{prefix}.{head_converted}{tail}" + return converted_base + (("." + suffix) if suffix else "") + + state_dict = {convert_key(k): v for k, v in state_dict.items()} + + has_default = any("default." in k for k in state_dict) + if has_default: + state_dict = {k.replace("default.", ""): v for k, v in state_dict.items()} + + converted_state_dict = {} + all_keys = list(state_dict.keys()) + down_key = ".lora_down.weight" + up_key = ".lora_up.weight" + a_key = ".lora_A.weight" + b_key = ".lora_B.weight" + + has_non_diffusers_lora_id = any(down_key in k or up_key in k for k in all_keys) + has_diffusers_lora_id = any(a_key in k or b_key in k for k in all_keys) + + if has_non_diffusers_lora_id: + + def get_alpha_scales(down_weight, alpha_key): + rank = down_weight.shape[0] + alpha = state_dict.pop(alpha_key).item() + scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + return scale_down, scale_up + + for k in all_keys: + if k.endswith(down_key): + diffusers_down_key = k.replace(down_key, ".lora_A.weight") + diffusers_up_key = k.replace(down_key, up_key).replace(up_key, ".lora_B.weight") + alpha_key = k.replace(down_key, ".alpha") + + down_weight = state_dict.pop(k) + up_weight = state_dict.pop(k.replace(down_key, up_key)) + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) + converted_state_dict[diffusers_down_key] = down_weight * scale_down + converted_state_dict[diffusers_up_key] = up_weight * scale_up + + # Already in diffusers format (lora_A/lora_B), just pop + elif has_diffusers_lora_id: + for k in all_keys: + if a_key in k or b_key in k: + converted_state_dict[k] = state_dict.pop(k) + elif ".alpha" in k: + state_dict.pop(k) + + if len(state_dict) > 0: + raise ValueError(f"`state_dict` should be empty at this point but has {state_dict.keys()=}") + + converted_state_dict = {f"transformer.{k}": v for k, v in converted_state_dict.items()} + return converted_state_dict + + +def _convert_non_diffusers_flux2_lora_to_diffusers(state_dict): + converted_state_dict = {} + + prefix = "diffusion_model." + original_state_dict = {k[len(prefix) :]: v for k, v in state_dict.items()} + + has_lora_down_up = any("lora_down" in k or "lora_up" in k for k in original_state_dict.keys()) + if has_lora_down_up: + temp_state_dict = {} + for k, v in original_state_dict.items(): + new_key = k.replace("lora_down", "lora_A").replace("lora_up", "lora_B") + temp_state_dict[new_key] = v + original_state_dict = temp_state_dict + + num_double_layers = 0 + num_single_layers = 0 + for key in original_state_dict.keys(): + if key.startswith("single_blocks."): + num_single_layers = max(num_single_layers, int(key.split(".")[1]) + 1) + elif key.startswith("double_blocks."): + num_double_layers = max(num_double_layers, int(key.split(".")[1]) + 1) + + lora_keys = ("lora_A", "lora_B") + attn_types = ("img_attn", "txt_attn") + + for sl in range(num_single_layers): + single_block_prefix = f"single_blocks.{sl}" + attn_prefix = f"single_transformer_blocks.{sl}.attn" + + for lora_key in lora_keys: + linear1_key = f"{single_block_prefix}.linear1.{lora_key}.weight" + if linear1_key in original_state_dict: + converted_state_dict[f"{attn_prefix}.to_qkv_mlp_proj.{lora_key}.weight"] = original_state_dict.pop( + linear1_key + ) + + linear2_key = f"{single_block_prefix}.linear2.{lora_key}.weight" + if linear2_key in original_state_dict: + converted_state_dict[f"{attn_prefix}.to_out.{lora_key}.weight"] = original_state_dict.pop(linear2_key) + + for dl in range(num_double_layers): + transformer_block_prefix = f"transformer_blocks.{dl}" + + for lora_key in lora_keys: + for attn_type in attn_types: + attn_prefix = f"{transformer_block_prefix}.attn" + qkv_key = f"double_blocks.{dl}.{attn_type}.qkv.{lora_key}.weight" + + if qkv_key not in original_state_dict: + continue + + fused_qkv_weight = original_state_dict.pop(qkv_key) + + if lora_key == "lora_A": + diff_attn_proj_keys = ( + ["to_q", "to_k", "to_v"] + if attn_type == "img_attn" + else ["add_q_proj", "add_k_proj", "add_v_proj"] + ) + for proj_key in diff_attn_proj_keys: + converted_state_dict[f"{attn_prefix}.{proj_key}.{lora_key}.weight"] = torch.cat( + [fused_qkv_weight] + ) + else: + sample_q, sample_k, sample_v = torch.chunk(fused_qkv_weight, 3, dim=0) + + if attn_type == "img_attn": + converted_state_dict[f"{attn_prefix}.to_q.{lora_key}.weight"] = torch.cat([sample_q]) + converted_state_dict[f"{attn_prefix}.to_k.{lora_key}.weight"] = torch.cat([sample_k]) + converted_state_dict[f"{attn_prefix}.to_v.{lora_key}.weight"] = torch.cat([sample_v]) + else: + converted_state_dict[f"{attn_prefix}.add_q_proj.{lora_key}.weight"] = torch.cat([sample_q]) + converted_state_dict[f"{attn_prefix}.add_k_proj.{lora_key}.weight"] = torch.cat([sample_k]) + converted_state_dict[f"{attn_prefix}.add_v_proj.{lora_key}.weight"] = torch.cat([sample_v]) + + proj_mappings = [ + ("img_attn.proj", "attn.to_out.0"), + ("txt_attn.proj", "attn.to_add_out"), + ] + for org_proj, diff_proj in proj_mappings: + for lora_key in lora_keys: + original_key = f"double_blocks.{dl}.{org_proj}.{lora_key}.weight" + if original_key in original_state_dict: + diffusers_key = f"{transformer_block_prefix}.{diff_proj}.{lora_key}.weight" + converted_state_dict[diffusers_key] = original_state_dict.pop(original_key) + + mlp_mappings = [ + ("img_mlp.0", "ff.linear_in"), + ("img_mlp.2", "ff.linear_out"), + ("txt_mlp.0", "ff_context.linear_in"), + ("txt_mlp.2", "ff_context.linear_out"), + ] + for org_mlp, diff_mlp in mlp_mappings: + for lora_key in lora_keys: + original_key = f"double_blocks.{dl}.{org_mlp}.{lora_key}.weight" + if original_key in original_state_dict: + diffusers_key = f"{transformer_block_prefix}.{diff_mlp}.{lora_key}.weight" + converted_state_dict[diffusers_key] = original_state_dict.pop(original_key) + + extra_mappings = { + "img_in": "x_embedder", + "txt_in": "context_embedder", + "time_in.in_layer": "time_guidance_embed.timestep_embedder.linear_1", + "time_in.out_layer": "time_guidance_embed.timestep_embedder.linear_2", + "final_layer.linear": "proj_out", + "final_layer.adaLN_modulation.1": "norm_out.linear", + "single_stream_modulation.lin": "single_stream_modulation.linear", + "double_stream_modulation_img.lin": "double_stream_modulation_img.linear", + "double_stream_modulation_txt.lin": "double_stream_modulation_txt.linear", + } + + for org_key, diff_key in extra_mappings.items(): + for lora_key in lora_keys: + original_key = f"{org_key}.{lora_key}.weight" + if original_key in original_state_dict: + converted_state_dict[f"{diff_key}.{lora_key}.weight"] = original_state_dict.pop(original_key) + + if len(original_state_dict) > 0: + raise ValueError(f"`original_state_dict` should be empty at this point but has {original_state_dict.keys()=}.") + + for key in list(converted_state_dict.keys()): + converted_state_dict[f"transformer.{key}"] = converted_state_dict.pop(key) + + return converted_state_dict + + +def _convert_non_diffusers_z_image_lora_to_diffusers(state_dict): + """ + Convert non-diffusers ZImage LoRA state dict to diffusers format. + + Handles: + - `diffusion_model.` prefix removal + - `lora_unet_` prefix conversion with key mapping + - `default.` prefix removal + - `.lora_down.weight`/`.lora_up.weight` → `.lora_A.weight`/`.lora_B.weight` conversion with alpha scaling + """ + has_diffusion_model = any(k.startswith("diffusion_model.") for k in state_dict) + if has_diffusion_model: + state_dict = {k.removeprefix("diffusion_model."): v for k, v in state_dict.items()} + + has_lora_unet = any(k.startswith("lora_unet_") for k in state_dict) + if has_lora_unet: + state_dict = {k.removeprefix("lora_unet_"): v for k, v in state_dict.items()} + + def convert_key(key: str) -> str: + # ZImage has: layers, noise_refiner, context_refiner blocks + # Keys may be like: layers_0_attention_to_q.lora_down.weight + + if "." in key: + base, suffix = key.rsplit(".", 1) + else: + base, suffix = key, "" + + # Protected n-grams that must keep their internal underscores + protected = { + # pairs for attention + ("to", "q"), + ("to", "k"), + ("to", "v"), + ("to", "out"), + # feed_forward + ("feed", "forward"), + } + + prot_by_len = {} + for ng in protected: + prot_by_len.setdefault(len(ng), set()).add(ng) + + parts = base.split("_") + merged = [] + i = 0 + lengths_desc = sorted(prot_by_len.keys(), reverse=True) + + while i < len(parts): + matched = False + for L in lengths_desc: + if i + L <= len(parts) and tuple(parts[i : i + L]) in prot_by_len[L]: + merged.append("_".join(parts[i : i + L])) + i += L + matched = True + break + if not matched: + merged.append(parts[i]) + i += 1 + + converted_base = ".".join(merged) + return converted_base + (("." + suffix) if suffix else "") + + state_dict = {convert_key(k): v for k, v in state_dict.items()} + + def normalize_out_key(k: str) -> str: + if ".to_out" in k: + return k + return re.sub( + r"\.out(?=\.(?:lora_down|lora_up)\.weight$|\.alpha$)", + ".to_out.0", + k, + ) + + state_dict = {normalize_out_key(k): v for k, v in state_dict.items()} + + has_default = any("default." in k for k in state_dict) + if has_default: + state_dict = {k.replace("default.", ""): v for k, v in state_dict.items()} + + # Normalize ZImage-specific dot-separated module names to underscore form so they + # match the diffusers model parameter names (context_refiner, noise_refiner). + state_dict = { + k.replace("context.refiner.", "context_refiner.").replace("noise.refiner.", "noise_refiner."): v + for k, v in state_dict.items() + } + + converted_state_dict = {} + all_keys = list(state_dict.keys()) + down_key = ".lora_down.weight" + up_key = ".lora_up.weight" + a_key = ".lora_A.weight" + b_key = ".lora_B.weight" + + has_non_diffusers_lora_id = any(down_key in k or up_key in k for k in all_keys) + has_diffusers_lora_id = any(a_key in k or b_key in k for k in all_keys) + + def get_alpha_scales(down_weight, alpha_key): + rank = down_weight.shape[0] + alpha = state_dict.pop(alpha_key).item() + scale = alpha / rank # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here + scale_down = scale + scale_up = 1.0 + while scale_down * 2 < scale_up: + scale_down *= 2 + scale_up /= 2 + return scale_down, scale_up + + if has_non_diffusers_lora_id: + for k in all_keys: + if k.endswith(down_key): + diffusers_down_key = k.replace(down_key, ".lora_A.weight") + diffusers_up_key = k.replace(down_key, up_key).replace(up_key, ".lora_B.weight") + alpha_key = k.replace(down_key, ".alpha") + + down_weight = state_dict.pop(k) + up_weight = state_dict.pop(k.replace(down_key, up_key)) + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) + converted_state_dict[diffusers_down_key] = down_weight * scale_down + converted_state_dict[diffusers_up_key] = up_weight * scale_up + + # Already in diffusers format (lora_A/lora_B), apply alpha scaling and pop. + elif has_diffusers_lora_id: + for k in all_keys: + if k.endswith(a_key): + diffusers_up_key = k.replace(a_key, b_key) + alpha_key = k.replace(a_key, ".alpha") + + down_weight = state_dict.pop(k) + up_weight = state_dict.pop(diffusers_up_key) + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) + converted_state_dict[k] = down_weight * scale_down + converted_state_dict[diffusers_up_key] = up_weight * scale_up + + # Handle dot-format LoRA keys: ".lora.down.weight" / ".lora.up.weight". + # Some external ZImage trainers (e.g. Anime-Z) use dots instead of underscores in + # lora weight names and also include redundant keys: + # - "qkv.lora.*" duplicates individual "to.q/k/v.lora.*" keys → skip qkv + # - "out.lora.*" duplicates "to_out.0.lora.*" keys → skip bare out + # - "to.q/k/v.lora.*" → normalise to "to_q/k/v.lora_A/B.weight" + lora_dot_down_key = ".lora.down.weight" + lora_dot_up_key = ".lora.up.weight" + has_lora_dot_format = any(lora_dot_down_key in k for k in state_dict) + + if has_lora_dot_format: + dot_keys = list(state_dict.keys()) + for k in dot_keys: + if lora_dot_down_key not in k: + continue + if k not in state_dict: + continue # already popped by a prior iteration + + base = k[: -len(lora_dot_down_key)] + + # Skip combined "qkv" projection — individual to.q/k/v keys are also present. + if base.endswith(".qkv"): + state_dict.pop(k) + state_dict.pop(k.replace(lora_dot_down_key, lora_dot_up_key), None) + state_dict.pop(base + ".alpha", None) + continue + + # Skip bare "out.lora.*" — "to_out.0.lora.*" covers the same projection. + if re.search(r"\.out$", base) and ".to_out" not in base: + state_dict.pop(k) + state_dict.pop(k.replace(lora_dot_down_key, lora_dot_up_key), None) + continue + + # Normalise "to.q/k/v" → "to_q/k/v" for the diffusers output key. + norm_k = re.sub( + r"\.to\.([qkv])" + re.escape(lora_dot_down_key) + r"$", + r".to_\1" + lora_dot_down_key, + k, + ) + norm_base = norm_k[: -len(lora_dot_down_key)] + alpha_key = norm_base + ".alpha" + + diffusers_down = norm_k.replace(lora_dot_down_key, ".lora_A.weight") + diffusers_up = norm_k.replace(lora_dot_down_key, ".lora_B.weight") + + down_weight = state_dict.pop(k) + up_weight = state_dict.pop(k.replace(lora_dot_down_key, lora_dot_up_key)) + scale_down, scale_up = get_alpha_scales(down_weight, alpha_key) + converted_state_dict[diffusers_down] = down_weight * scale_down + converted_state_dict[diffusers_up] = up_weight * scale_up + + if len(state_dict) > 0: + raise ValueError(f"`state_dict` should be empty at this point but has {state_dict.keys()=}") + + converted_state_dict = {f"transformer.{k}": v for k, v in converted_state_dict.items()} + return converted_state_dict diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_pipeline.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..5d10f596f2e60847165e280a2c516ba11e761897 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/lora_pipeline.py @@ -0,0 +1,5828 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Callable + +import torch +from huggingface_hub.utils import validate_hf_hub_args + +from ..utils import ( + USE_PEFT_BACKEND, + deprecate, + get_submodule_by_name, + is_bitsandbytes_available, + is_gguf_available, + is_peft_available, + is_peft_version, + is_torch_version, + is_transformers_available, + is_transformers_version, + logging, +) +from .lora_base import ( # noqa + LORA_WEIGHT_NAME, + LORA_WEIGHT_NAME_SAFE, + LoraBaseMixin, + _fetch_state_dict, + _load_lora_into_text_encoder, + _pack_dict_with_prefix, +) +from .lora_conversion_utils import ( + _convert_bfl_flux_control_lora_to_diffusers, + _convert_fal_kontext_lora_to_diffusers, + _convert_hunyuan_video_lora_to_diffusers, + _convert_kohya_flux_lora_to_diffusers, + _convert_musubi_wan_lora_to_diffusers, + _convert_non_diffusers_flux2_lora_to_diffusers, + _convert_non_diffusers_hidream_lora_to_diffusers, + _convert_non_diffusers_lora_to_diffusers, + _convert_non_diffusers_ltx2_lora_to_diffusers, + _convert_non_diffusers_ltxv_lora_to_diffusers, + _convert_non_diffusers_lumina2_lora_to_diffusers, + _convert_non_diffusers_qwen_lora_to_diffusers, + _convert_non_diffusers_wan_lora_to_diffusers, + _convert_non_diffusers_z_image_lora_to_diffusers, + _convert_xlabs_flux_lora_to_diffusers, + _maybe_map_sgm_blocks_to_diffusers, +) + + +_LOW_CPU_MEM_USAGE_DEFAULT_LORA = False +if is_torch_version(">=", "1.9.0"): + if ( + is_peft_available() + and is_peft_version(">=", "0.13.1") + and is_transformers_available() + and is_transformers_version(">", "4.45.2") + ): + _LOW_CPU_MEM_USAGE_DEFAULT_LORA = True + + +logger = logging.get_logger(__name__) + +TEXT_ENCODER_NAME = "text_encoder" +UNET_NAME = "unet" +TRANSFORMER_NAME = "transformer" +LTX2_CONNECTOR_NAME = "connectors" + +_MODULE_NAME_TO_ATTRIBUTE_MAP_FLUX = {"x_embedder": "in_channels"} + + +def _maybe_dequantize_weight_for_expanded_lora(model, module): + if is_bitsandbytes_available(): + from ..quantizers.bitsandbytes import dequantize_bnb_weight + + if is_gguf_available(): + from ..quantizers.gguf.utils import dequantize_gguf_tensor + + is_bnb_4bit_quantized = module.weight.__class__.__name__ == "Params4bit" + is_bnb_8bit_quantized = module.weight.__class__.__name__ == "Int8Params" + is_gguf_quantized = module.weight.__class__.__name__ == "GGUFParameter" + + if is_bnb_4bit_quantized and not is_bitsandbytes_available(): + raise ValueError( + "The checkpoint seems to have been quantized with `bitsandbytes` (4bits). Install `bitsandbytes` to load quantized checkpoints." + ) + if is_bnb_8bit_quantized and not is_bitsandbytes_available(): + raise ValueError( + "The checkpoint seems to have been quantized with `bitsandbytes` (8bits). Install `bitsandbytes` to load quantized checkpoints." + ) + if is_gguf_quantized and not is_gguf_available(): + raise ValueError( + "The checkpoint seems to have been quantized with `gguf`. Install `gguf` to load quantized checkpoints." + ) + + weight_on_cpu = False + if module.weight.device.type == "cpu": + weight_on_cpu = True + + device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda" + if is_bnb_4bit_quantized or is_bnb_8bit_quantized: + module_weight = dequantize_bnb_weight( + module.weight.to(device) if weight_on_cpu else module.weight, + state=module.weight.quant_state if is_bnb_4bit_quantized else module.state, + dtype=model.dtype, + ).data + elif is_gguf_quantized: + module_weight = dequantize_gguf_tensor( + module.weight.to(device) if weight_on_cpu else module.weight, + ) + module_weight = module_weight.to(model.dtype) + else: + module_weight = module.weight.data + + if weight_on_cpu: + module_weight = module_weight.cpu() + + return module_weight + + +class StableDiffusionLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into Stable Diffusion [`UNet2DConditionModel`] and + [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel). + """ + + _lora_loadable_modules = ["unet", "text_encoder"] + unet_name = UNET_NAME + text_encoder_name = TEXT_ENCODER_NAME + + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """Load LoRA weights specified in `pretrained_model_name_or_path_or_dict` into `self.unet` and + `self.text_encoder`. + + All kwargs are forwarded to `self.lora_state_dict`. + + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details on how the state dict is + loaded. + + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details on how the state dict is + loaded into `self.unet`. + + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder`] for more details on how the state + dict is loaded into `self.text_encoder`. + + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. + adapter_name (`str`, *optional*): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap (`bool`, *optional*): + Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter + in-place. This means that, instead of loading an additional adapter, this will take the existing + adapter weights and replace them with the weights of the new adapter. This can be faster and more + memory efficient. However, the main advantage of hotswapping is that when the model is compiled with + torch.compile, loading the new adapter does not require recompilation of the model. When using + hotswapping, the passed `adapter_name` should be the name of an already loaded adapter. + + If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need + to call an additional method before loading the adapter: + + ```py + pipeline = ... # load diffusers pipeline + max_rank = ... # the highest rank among all LoRAs that you want to load + # call *before* compiling and loading the LoRA adapter + pipeline.enable_lora_hotswap(target_rank=max_rank) + pipeline.load_lora_weights(file_name) + # optionally compile the model now + ``` + + Note that hotswapping adapters of the text encoder is not yet supported. There are some further + limitations to this technique, which are documented here: + https://huggingface.co/docs/peft/main/en/package_reference/hotswap + kwargs (`dict`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`]. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, network_alphas, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_unet( + state_dict, + network_alphas=network_alphas, + unet=getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + self.load_lora_into_text_encoder( + state_dict, + network_alphas=network_alphas, + text_encoder=getattr(self, self.text_encoder_name) + if not hasattr(self, "text_encoder") + else self.text_encoder, + lora_scale=self.lora_scale, + adapter_name=adapter_name, + _pipeline=self, + metadata=metadata, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + Return state dict for lora weights and the network alphas. + + > [!WARNING] > We support loading A1111 formatted LoRA checkpoints in a limited capacity. > > This function is + experimental and might change in the future. + + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + weight_name (`str`, *optional*, defaults to None): + Name of the serialized state dict file. + return_lora_metadata (`bool`, *optional*, defaults to False): + When enabled, additionally return the LoRA adapter metadata, typically found in the state dict. + """ + # Load the main state dict first which has the LoRA layers for either of + # UNet and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + unet_config = kwargs.pop("unet_config", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + network_alphas = None + # TODO: replace it with a method from `state_dict_utils` + if all( + ( + k.startswith("lora_te_") + or k.startswith("lora_unet_") + or k.startswith("lora_te1_") + or k.startswith("lora_te2_") + ) + for k in state_dict.keys() + ): + # Map SDXL blocks correctly. + if unet_config is not None: + # use unet config to remap block numbers + state_dict = _maybe_map_sgm_blocks_to_diffusers(state_dict, unet_config) + state_dict, network_alphas = _convert_non_diffusers_lora_to_diffusers(state_dict) + + out = (state_dict, network_alphas, metadata) if return_lora_metadata else (state_dict, network_alphas) + return out + + @classmethod + def load_lora_into_unet( + cls, + state_dict, + network_alphas, + unet, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + This will load the LoRA layers specified in `state_dict` into `unet`. + + Parameters: + state_dict (`dict`): + A standard state dict containing the lora layer parameters. The keys can either be indexed directly + into the unet or prefixed with an additional `unet` which can be used to distinguish between text + encoder lora layers. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + unet (`UNet2DConditionModel`): + The UNet model to load the LoRA layers into. + adapter_name (`str`, *optional*): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap (`bool`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. + metadata (`dict`): + Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived + from the state dict. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), + # then the `state_dict` keys should have `cls.unet_name` and/or `cls.text_encoder_name` as + # their prefixes. + logger.info(f"Loading {cls.unet_name}.") + unet.load_lora_adapter( + state_dict, + prefix=cls.unet_name, + network_alphas=network_alphas, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + def load_lora_into_text_encoder( + cls, + state_dict, + network_alphas, + text_encoder, + prefix=None, + lora_scale=1.0, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + This will load the LoRA layers specified in `state_dict` into `text_encoder` + + Parameters: + state_dict (`dict`): + A standard state dict containing the lora layer parameters. The key should be prefixed with an + additional `text_encoder` to distinguish between unet lora layers. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + text_encoder (`CLIPTextModel`): + The text encoder model to load the LoRA layers into. + prefix (`str`): + Expected prefix of the `text_encoder` in the `state_dict`. + lora_scale (`float`): + How much to scale the output of the lora linear layer before it is added with the output of the regular + lora layer. + adapter_name (`str`, *optional*): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap (`bool`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. + metadata (`dict`): + Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived + from the state dict. + """ + _load_lora_into_text_encoder( + state_dict=state_dict, + network_alphas=network_alphas, + lora_scale=lora_scale, + text_encoder=text_encoder, + prefix=prefix, + text_encoder_name=cls.text_encoder_name, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + unet_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + text_encoder_lora_layers: dict[str, torch.nn.Module] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + unet_lora_adapter_metadata=None, + text_encoder_lora_adapter_metadata=None, + ): + r""" + Save the LoRA parameters corresponding to the UNet and text encoder. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to save LoRA parameters to. Will be created if it doesn't exist. + unet_lora_layers (`dict[str, torch.nn.Module]` or `dict[str, torch.Tensor]`): + State dict of the LoRA layers corresponding to the `unet`. + text_encoder_lora_layers (`dict[str, torch.nn.Module]` or `dict[str, torch.Tensor]`): + State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text + encoder LoRA state dict because it comes from 🤗 Transformers. + is_main_process (`bool`, *optional*, defaults to `True`): + Whether the process calling this is the main process or not. Useful during distributed training and you + need to call this function on all processes. In this case, set `is_main_process=True` only on the main + process to avoid race conditions. + save_function (`Callable`): + The function to use to save the state dictionary. Useful during distributed training when you need to + replace `torch.save` with another method. Can be configured with the environment variable + `DIFFUSERS_SAVE_MODE`. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`. + unet_lora_adapter_metadata: + LoRA adapter metadata associated with the unet to be serialized with the state dict. + text_encoder_lora_adapter_metadata: + LoRA adapter metadata associated with the text encoder to be serialized with the state dict. + """ + lora_layers = {} + lora_metadata = {} + + if unet_lora_layers: + lora_layers[cls.unet_name] = unet_lora_layers + lora_metadata[cls.unet_name] = unet_lora_adapter_metadata + + if text_encoder_lora_layers: + lora_layers[cls.text_encoder_name] = text_encoder_lora_layers + lora_metadata[cls.text_encoder_name] = text_encoder_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `unet_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + def fuse_lora( + self, + components: list[str] = ["unet", "text_encoder"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + Fuses the LoRA parameters into the original parameters of the corresponding blocks. + + > [!WARNING] > This is an experimental API. + + Args: + components: (`list[str]`): list of LoRA-injectable components to fuse the LoRAs into. + lora_scale (`float`, defaults to 1.0): + Controls how much to influence the outputs with the LoRA parameters. + safe_fusing (`bool`, defaults to `False`): + Whether to check fused weights for NaN values before fusing and if values are NaN not fusing them. + adapter_names (`list[str]`, *optional*): + Adapter names to be used for fusing. If nothing is passed, all active adapters will be fused. + + Example: + + ```py + from diffusers import DiffusionPipeline + import torch + + pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel") + pipeline.fuse_lora(lora_scale=0.7) + ``` + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + def unfuse_lora(self, components: list[str] = ["unet", "text_encoder"], **kwargs): + r""" + Reverses the effect of + [`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora). + + > [!WARNING] > This is an experimental API. + + Args: + components (`list[str]`): list of LoRA-injectable components to unfuse LoRA from. + unfuse_unet (`bool`, defaults to `True`): Whether to unfuse the UNet LoRA parameters. + unfuse_text_encoder (`bool`, defaults to `True`): + Whether to unfuse the text encoder LoRA parameters. If the text encoder wasn't monkey-patched with the + LoRA parameters then it won't have any effect. + """ + super().unfuse_lora(components=components, **kwargs) + + +class StableDiffusionXLLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into Stable Diffusion XL [`UNet2DConditionModel`], + [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), and + [`CLIPTextModelWithProjection`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection). + """ + + _lora_loadable_modules = ["unet", "text_encoder", "text_encoder_2"] + unet_name = UNET_NAME + text_encoder_name = TEXT_ENCODER_NAME + + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # We could have accessed the unet config from `lora_state_dict()` too. We pass + # it here explicitly to be able to tell that it's coming from an SDXL + # pipeline. + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, network_alphas, metadata = self.lora_state_dict( + pretrained_model_name_or_path_or_dict, + unet_config=self.unet.config, + **kwargs, + ) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_unet( + state_dict, + network_alphas=network_alphas, + unet=self.unet, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + self.load_lora_into_text_encoder( + state_dict, + network_alphas=network_alphas, + text_encoder=self.text_encoder, + prefix=self.text_encoder_name, + lora_scale=self.lora_scale, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + self.load_lora_into_text_encoder( + state_dict, + network_alphas=network_alphas, + text_encoder=self.text_encoder_2, + prefix=f"{self.text_encoder_name}_2", + lora_scale=self.lora_scale, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + @validate_hf_hub_args + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.lora_state_dict + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + Return state dict for lora weights and the network alphas. + + > [!WARNING] > We support loading A1111 formatted LoRA checkpoints in a limited capacity. > > This function is + experimental and might change in the future. + + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + weight_name (`str`, *optional*, defaults to None): + Name of the serialized state dict file. + return_lora_metadata (`bool`, *optional*, defaults to False): + When enabled, additionally return the LoRA adapter metadata, typically found in the state dict. + """ + # Load the main state dict first which has the LoRA layers for either of + # UNet and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + unet_config = kwargs.pop("unet_config", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + network_alphas = None + # TODO: replace it with a method from `state_dict_utils` + if all( + ( + k.startswith("lora_te_") + or k.startswith("lora_unet_") + or k.startswith("lora_te1_") + or k.startswith("lora_te2_") + ) + for k in state_dict.keys() + ): + # Map SDXL blocks correctly. + if unet_config is not None: + # use unet config to remap block numbers + state_dict = _maybe_map_sgm_blocks_to_diffusers(state_dict, unet_config) + state_dict, network_alphas = _convert_non_diffusers_lora_to_diffusers(state_dict) + + out = (state_dict, network_alphas, metadata) if return_lora_metadata else (state_dict, network_alphas) + return out + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_unet + def load_lora_into_unet( + cls, + state_dict, + network_alphas, + unet, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + This will load the LoRA layers specified in `state_dict` into `unet`. + + Parameters: + state_dict (`dict`): + A standard state dict containing the lora layer parameters. The keys can either be indexed directly + into the unet or prefixed with an additional `unet` which can be used to distinguish between text + encoder lora layers. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + unet (`UNet2DConditionModel`): + The UNet model to load the LoRA layers into. + adapter_name (`str`, *optional*): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap (`bool`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. + metadata (`dict`): + Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived + from the state dict. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # If the serialization format is new (introduced in https://github.com/huggingface/diffusers/pull/2918), + # then the `state_dict` keys should have `cls.unet_name` and/or `cls.text_encoder_name` as + # their prefixes. + logger.info(f"Loading {cls.unet_name}.") + unet.load_lora_adapter( + state_dict, + prefix=cls.unet_name, + network_alphas=network_alphas, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder + def load_lora_into_text_encoder( + cls, + state_dict, + network_alphas, + text_encoder, + prefix=None, + lora_scale=1.0, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + This will load the LoRA layers specified in `state_dict` into `text_encoder` + + Parameters: + state_dict (`dict`): + A standard state dict containing the lora layer parameters. The key should be prefixed with an + additional `text_encoder` to distinguish between unet lora layers. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + text_encoder (`CLIPTextModel`): + The text encoder model to load the LoRA layers into. + prefix (`str`): + Expected prefix of the `text_encoder` in the `state_dict`. + lora_scale (`float`): + How much to scale the output of the lora linear layer before it is added with the output of the regular + lora layer. + adapter_name (`str`, *optional*): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap (`bool`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. + metadata (`dict`): + Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived + from the state dict. + """ + _load_lora_into_text_encoder( + state_dict=state_dict, + network_alphas=network_alphas, + lora_scale=lora_scale, + text_encoder=text_encoder, + prefix=prefix, + text_encoder_name=cls.text_encoder_name, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + unet_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + text_encoder_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + text_encoder_2_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + unet_lora_adapter_metadata=None, + text_encoder_lora_adapter_metadata=None, + text_encoder_2_lora_adapter_metadata=None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if unet_lora_layers: + lora_layers[cls.unet_name] = unet_lora_layers + lora_metadata[cls.unet_name] = unet_lora_adapter_metadata + + if text_encoder_lora_layers: + lora_layers["text_encoder"] = text_encoder_lora_layers + lora_metadata["text_encoder"] = text_encoder_lora_adapter_metadata + + if text_encoder_2_lora_layers: + lora_layers["text_encoder_2"] = text_encoder_2_lora_layers + lora_metadata["text_encoder_2"] = text_encoder_2_lora_adapter_metadata + + if not lora_layers: + raise ValueError( + "You must pass at least one of `unet_lora_layers`, `text_encoder_lora_layers`, or `text_encoder_2_lora_layers`." + ) + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + def fuse_lora( + self, + components: list[str] = ["unet", "text_encoder", "text_encoder_2"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + def unfuse_lora(self, components: list[str] = ["unet", "text_encoder", "text_encoder_2"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class SD3LoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`SD3Transformer2DModel`], + [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), and + [`CLIPTextModelWithProjection`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection). + + Specific to [`StableDiffusion3Pipeline`]. + """ + + _lora_loadable_modules = ["transformer", "text_encoder", "text_encoder_2"] + transformer_name = TRANSFORMER_NAME + text_encoder_name = TEXT_ENCODER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name=None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + self.load_lora_into_text_encoder( + state_dict, + network_alphas=None, + text_encoder=self.text_encoder, + prefix=self.text_encoder_name, + lora_scale=self.lora_scale, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + self.load_lora_into_text_encoder( + state_dict, + network_alphas=None, + text_encoder=self.text_encoder_2, + prefix=f"{self.text_encoder_name}_2", + lora_scale=self.lora_scale, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder + def load_lora_into_text_encoder( + cls, + state_dict, + network_alphas, + text_encoder, + prefix=None, + lora_scale=1.0, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + This will load the LoRA layers specified in `state_dict` into `text_encoder` + + Parameters: + state_dict (`dict`): + A standard state dict containing the lora layer parameters. The key should be prefixed with an + additional `text_encoder` to distinguish between unet lora layers. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + text_encoder (`CLIPTextModel`): + The text encoder model to load the LoRA layers into. + prefix (`str`): + Expected prefix of the `text_encoder` in the `state_dict`. + lora_scale (`float`): + How much to scale the output of the lora linear layer before it is added with the output of the regular + lora layer. + adapter_name (`str`, *optional*): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap (`bool`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. + metadata (`dict`): + Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived + from the state dict. + """ + _load_lora_into_text_encoder( + state_dict=state_dict, + network_alphas=network_alphas, + lora_scale=lora_scale, + text_encoder=text_encoder, + prefix=prefix, + text_encoder_name=cls.text_encoder_name, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin.save_lora_weights with unet->transformer + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + text_encoder_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + text_encoder_2_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata=None, + text_encoder_lora_adapter_metadata=None, + text_encoder_2_lora_adapter_metadata=None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if text_encoder_lora_layers: + lora_layers["text_encoder"] = text_encoder_lora_layers + lora_metadata["text_encoder"] = text_encoder_lora_adapter_metadata + + if text_encoder_2_lora_layers: + lora_layers["text_encoder_2"] = text_encoder_2_lora_layers + lora_metadata["text_encoder_2"] = text_encoder_2_lora_adapter_metadata + + if not lora_layers: + raise ValueError( + "You must pass at least one of `transformer_lora_layers`, `text_encoder_lora_layers`, or `text_encoder_2_lora_layers`." + ) + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin.fuse_lora with unet->transformer + def fuse_lora( + self, + components: list[str] = ["transformer", "text_encoder", "text_encoder_2"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionXLLoraLoaderMixin.unfuse_lora with unet->transformer + def unfuse_lora(self, components: list[str] = ["transformer", "text_encoder", "text_encoder_2"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class AuraFlowLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`AuraFlowTransformer2DModel`] Specific to [`AuraFlowPipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.lora_state_dict + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->AuraFlowTransformer2DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer", "text_encoder"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class FluxLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`FluxTransformer2DModel`], + [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel). + + Specific to [`FluxPipeline`]. + """ + + _lora_loadable_modules = ["transformer", "text_encoder"] + transformer_name = TRANSFORMER_NAME + text_encoder_name = TEXT_ENCODER_NAME + _control_lora_supported_norm_keys = ["norm_q", "norm_k", "norm_added_q", "norm_added_k"] + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + return_alphas: bool = False, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + # TODO (sayakpaul): to a follow-up to clean and try to unify the conditions. + is_kohya = any(".lora_down.weight" in k for k in state_dict) + if is_kohya: + state_dict = _convert_kohya_flux_lora_to_diffusers(state_dict) + # Kohya already takes care of scaling the LoRA parameters with alpha. + return cls._prepare_outputs( + state_dict, + metadata=metadata, + alphas=None, + return_alphas=return_alphas, + return_metadata=return_lora_metadata, + ) + + is_xlabs = any("processor" in k for k in state_dict) + if is_xlabs: + state_dict = _convert_xlabs_flux_lora_to_diffusers(state_dict) + # xlabs doesn't use `alpha`. + return cls._prepare_outputs( + state_dict, + metadata=metadata, + alphas=None, + return_alphas=return_alphas, + return_metadata=return_lora_metadata, + ) + + is_bfl_control = any("query_norm.scale" in k for k in state_dict) + if is_bfl_control: + state_dict = _convert_bfl_flux_control_lora_to_diffusers(state_dict) + return cls._prepare_outputs( + state_dict, + metadata=metadata, + alphas=None, + return_alphas=return_alphas, + return_metadata=return_lora_metadata, + ) + + is_fal_kontext = any("base_model" in k for k in state_dict) + if is_fal_kontext: + state_dict = _convert_fal_kontext_lora_to_diffusers(state_dict) + return cls._prepare_outputs( + state_dict, + metadata=metadata, + alphas=None, + return_alphas=return_alphas, + return_metadata=return_lora_metadata, + ) + + # For state dicts like + # https://huggingface.co/TheLastBen/Jon_Snow_Flux_LoRA + keys = list(state_dict.keys()) + network_alphas = {} + for k in keys: + if "alpha" in k: + alpha_value = state_dict.get(k) + if (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance( + alpha_value, float + ): + network_alphas[k] = state_dict.pop(k) + else: + raise ValueError( + f"The alpha key ({k}) seems to be incorrect. If you think this error is unexpected, please open as issue." + ) + + if return_alphas or return_lora_metadata: + return cls._prepare_outputs( + state_dict, + metadata=metadata, + alphas=network_alphas, + return_alphas=return_alphas, + return_metadata=return_lora_metadata, + ) + else: + return state_dict + + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, network_alphas, metadata = self.lora_state_dict( + pretrained_model_name_or_path_or_dict, return_alphas=True, **kwargs + ) + + has_lora_keys = any("lora" in key for key in state_dict.keys()) + + # Flux Control LoRAs also have norm keys + has_norm_keys = any( + norm_key in key for key in state_dict.keys() for norm_key in self._control_lora_supported_norm_keys + ) + + if not (has_lora_keys or has_norm_keys): + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + transformer_lora_state_dict = { + k: state_dict.get(k) + for k in list(state_dict.keys()) + if k.startswith(f"{self.transformer_name}.") and "lora" in k + } + transformer_norm_state_dict = { + k: state_dict.pop(k) + for k in list(state_dict.keys()) + if k.startswith(f"{self.transformer_name}.") + and any(norm_key in k for norm_key in self._control_lora_supported_norm_keys) + } + + transformer = getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer + has_param_with_expanded_shape = False + if len(transformer_lora_state_dict) > 0: + has_param_with_expanded_shape = self._maybe_expand_transformer_param_shape_or_error_( + transformer, transformer_lora_state_dict, transformer_norm_state_dict + ) + + if has_param_with_expanded_shape: + logger.info( + "The LoRA weights contain parameters that have different shapes that expected by the transformer. " + "As a result, the state_dict of the transformer has been expanded to match the LoRA parameter shapes. " + "To get a comprehensive list of parameter names that were modified, enable debug logging." + ) + if len(transformer_lora_state_dict) > 0: + transformer_lora_state_dict = self._maybe_expand_lora_state_dict( + transformer=transformer, lora_state_dict=transformer_lora_state_dict + ) + for k in transformer_lora_state_dict: + state_dict.update({k: transformer_lora_state_dict[k]}) + + self.load_lora_into_transformer( + state_dict, + network_alphas=network_alphas, + transformer=transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + if len(transformer_norm_state_dict) > 0: + transformer._transformer_norm_layers = self._load_norm_into_transformer( + transformer_norm_state_dict, + transformer=transformer, + discard_original_layers=False, + ) + + self.load_lora_into_text_encoder( + state_dict, + network_alphas=network_alphas, + text_encoder=self.text_encoder, + prefix=self.text_encoder_name, + lora_scale=self.lora_scale, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + def load_lora_into_transformer( + cls, + state_dict, + network_alphas, + transformer, + adapter_name=None, + metadata=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=network_alphas, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + def _load_norm_into_transformer( + cls, + state_dict, + transformer, + prefix=None, + discard_original_layers=False, + ) -> dict[str, torch.Tensor]: + # Remove prefix if present + prefix = prefix or cls.transformer_name + for key in list(state_dict.keys()): + if key.split(".")[0] == prefix: + state_dict[key.removeprefix(f"{prefix}.")] = state_dict.pop(key) + + # Find invalid keys + transformer_state_dict = transformer.state_dict() + transformer_keys = set(transformer_state_dict.keys()) + state_dict_keys = set(state_dict.keys()) + extra_keys = list(state_dict_keys - transformer_keys) + + if extra_keys: + logger.warning( + f"Unsupported keys found in state dict when trying to load normalization layers into the transformer. The following keys will be ignored:\n{extra_keys}." + ) + + for key in extra_keys: + state_dict.pop(key) + + # Save the layers that are going to be overwritten so that unload_lora_weights can work as expected + overwritten_layers_state_dict = {} + if not discard_original_layers: + for key in state_dict.keys(): + overwritten_layers_state_dict[key] = transformer_state_dict[key].clone() + + logger.info( + "The provided state dict contains normalization layers in addition to LoRA layers. The normalization layers will directly update the state_dict of the transformer " + 'as opposed to the LoRA layers that will co-exist separately until the "fuse_lora()" method is called. That is to say, the normalization layers will always be directly ' + "fused into the transformer and can only be unfused if `discard_original_layers=True` is passed. This might also have implications when dealing with multiple LoRAs. " + "If you notice something unexpected, please open an issue: https://github.com/huggingface/diffusers/issues." + ) + + # We can't load with strict=True because the current state_dict does not contain all the transformer keys + incompatible_keys = transformer.load_state_dict(state_dict, strict=False) + unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None) + + # We shouldn't expect to see the supported norm keys here being present in the unexpected keys. + if unexpected_keys: + if any(norm_key in k for k in unexpected_keys for norm_key in cls._control_lora_supported_norm_keys): + raise ValueError( + f"Found {unexpected_keys} as unexpected keys while trying to load norm layers into the transformer." + ) + + return overwritten_layers_state_dict + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder + def load_lora_into_text_encoder( + cls, + state_dict, + network_alphas, + text_encoder, + prefix=None, + lora_scale=1.0, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + This will load the LoRA layers specified in `state_dict` into `text_encoder` + + Parameters: + state_dict (`dict`): + A standard state dict containing the lora layer parameters. The key should be prefixed with an + additional `text_encoder` to distinguish between unet lora layers. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + text_encoder (`CLIPTextModel`): + The text encoder model to load the LoRA layers into. + prefix (`str`): + Expected prefix of the `text_encoder` in the `state_dict`. + lora_scale (`float`): + How much to scale the output of the lora linear layer before it is added with the output of the regular + lora layer. + adapter_name (`str`, *optional*): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap (`bool`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. + metadata (`dict`): + Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived + from the state dict. + """ + _load_lora_into_text_encoder( + state_dict=state_dict, + network_alphas=network_alphas, + lora_scale=lora_scale, + text_encoder=text_encoder, + prefix=prefix, + text_encoder_name=cls.text_encoder_name, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.save_lora_weights with unet->transformer + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + text_encoder_lora_layers: dict[str, torch.nn.Module] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata=None, + text_encoder_lora_adapter_metadata=None, + ): + r""" + Save the LoRA parameters corresponding to the UNet and text encoder. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to save LoRA parameters to. Will be created if it doesn't exist. + transformer_lora_layers (`dict[str, torch.nn.Module]` or `dict[str, torch.Tensor]`): + State dict of the LoRA layers corresponding to the `transformer`. + text_encoder_lora_layers (`dict[str, torch.nn.Module]` or `dict[str, torch.Tensor]`): + State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text + encoder LoRA state dict because it comes from 🤗 Transformers. + is_main_process (`bool`, *optional*, defaults to `True`): + Whether the process calling this is the main process or not. Useful during distributed training and you + need to call this function on all processes. In this case, set `is_main_process=True` only on the main + process to avoid race conditions. + save_function (`Callable`): + The function to use to save the state dictionary. Useful during distributed training when you need to + replace `torch.save` with another method. Can be configured with the environment variable + `DIFFUSERS_SAVE_MODE`. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`. + transformer_lora_adapter_metadata: + LoRA adapter metadata associated with the transformer to be serialized with the state dict. + text_encoder_lora_adapter_metadata: + LoRA adapter metadata associated with the text encoder to be serialized with the state dict. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if text_encoder_lora_layers: + lora_layers[cls.text_encoder_name] = text_encoder_lora_layers + lora_metadata[cls.text_encoder_name] = text_encoder_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + + transformer = getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer + if ( + hasattr(transformer, "_transformer_norm_layers") + and isinstance(transformer._transformer_norm_layers, dict) + and len(transformer._transformer_norm_layers.keys()) > 0 + ): + logger.info( + "The provided state dict contains normalization layers in addition to LoRA layers. The normalization layers will be directly updated the state_dict of the transformer " + "as opposed to the LoRA layers that will co-exist separately until the 'fuse_lora()' method is called. That is to say, the normalization layers will always be directly " + "fused into the transformer and can only be unfused if `discard_original_layers=True` is passed." + ) + + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + def unfuse_lora(self, components: list[str] = ["transformer", "text_encoder"], **kwargs): + r""" + Reverses the effect of + [`pipe.fuse_lora()`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraBaseMixin.fuse_lora). + + > [!WARNING] > This is an experimental API. + + Args: + components (`list[str]`): list of LoRA-injectable components to unfuse LoRA from. + """ + transformer = getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer + if hasattr(transformer, "_transformer_norm_layers") and transformer._transformer_norm_layers: + transformer.load_state_dict(transformer._transformer_norm_layers, strict=False) + + super().unfuse_lora(components=components, **kwargs) + + # We override this here account for `_transformer_norm_layers` and `_overwritten_params`. + def unload_lora_weights(self, reset_to_overwritten_params=False): + """ + Unloads the LoRA parameters. + + Args: + reset_to_overwritten_params (`bool`, defaults to `False`): Whether to reset the LoRA-loaded modules + to their original params. Refer to the [Flux + documentation](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux) to learn more. + + Examples: + + ```python + >>> # Assuming `pipeline` is already loaded with the LoRA parameters. + >>> pipeline.unload_lora_weights() + >>> ... + ``` + """ + super().unload_lora_weights() + + transformer = getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer + if hasattr(transformer, "_transformer_norm_layers") and transformer._transformer_norm_layers: + transformer.load_state_dict(transformer._transformer_norm_layers, strict=False) + transformer._transformer_norm_layers = None + + if reset_to_overwritten_params and getattr(transformer, "_overwritten_params", None) is not None: + overwritten_params = transformer._overwritten_params + module_names = set() + + for param_name in overwritten_params: + if param_name.endswith(".weight"): + module_names.add(param_name.replace(".weight", "")) + + for name, module in transformer.named_modules(): + if isinstance(module, torch.nn.Linear) and name in module_names: + module_weight = module.weight.data + module_bias = module.bias.data if module.bias is not None else None + bias = module_bias is not None + + parent_module_name, _, current_module_name = name.rpartition(".") + parent_module = transformer.get_submodule(parent_module_name) + + current_param_weight = overwritten_params[f"{name}.weight"] + in_features, out_features = current_param_weight.shape[1], current_param_weight.shape[0] + with torch.device("meta"): + original_module = torch.nn.Linear( + in_features, + out_features, + bias=bias, + dtype=module_weight.dtype, + ) + + tmp_state_dict = {"weight": current_param_weight} + if module_bias is not None: + tmp_state_dict.update({"bias": overwritten_params[f"{name}.bias"]}) + original_module.load_state_dict(tmp_state_dict, assign=True, strict=True) + setattr(parent_module, current_module_name, original_module) + + del tmp_state_dict + + if current_module_name in _MODULE_NAME_TO_ATTRIBUTE_MAP_FLUX: + attribute_name = _MODULE_NAME_TO_ATTRIBUTE_MAP_FLUX[current_module_name] + new_value = int(current_param_weight.shape[1]) + old_value = getattr(transformer.config, attribute_name) + setattr(transformer.config, attribute_name, new_value) + logger.info( + f"Set the {attribute_name} attribute of the model to {new_value} from {old_value}." + ) + + @classmethod + def _maybe_expand_transformer_param_shape_or_error_( + cls, + transformer: torch.nn.Module, + lora_state_dict=None, + norm_state_dict=None, + prefix=None, + ) -> bool: + """ + Control LoRA expands the shape of the input layer from (3072, 64) to (3072, 128). This method handles that and + generalizes things a bit so that any parameter that needs expansion receives appropriate treatment. + """ + state_dict = {} + if lora_state_dict is not None: + state_dict.update(lora_state_dict) + if norm_state_dict is not None: + state_dict.update(norm_state_dict) + + # Remove prefix if present + prefix = prefix or cls.transformer_name + for key in list(state_dict.keys()): + if key.split(".")[0] == prefix: + state_dict[key.removeprefix(f"{prefix}.")] = state_dict.pop(key) + + # Expand transformer parameter shapes if they don't match lora + has_param_with_shape_update = False + overwritten_params = {} + + is_peft_loaded = getattr(transformer, "peft_config", None) is not None + is_quantized = hasattr(transformer, "hf_quantizer") + for name, module in transformer.named_modules(): + if isinstance(module, torch.nn.Linear): + module_weight = module.weight.data + module_bias = module.bias.data if module.bias is not None else None + bias = module_bias is not None + + lora_base_name = name.replace(".base_layer", "") if is_peft_loaded else name + lora_A_weight_name = f"{lora_base_name}.lora_A.weight" + lora_B_weight_name = f"{lora_base_name}.lora_B.weight" + if lora_A_weight_name not in state_dict: + continue + + in_features = state_dict[lora_A_weight_name].shape[1] + out_features = state_dict[lora_B_weight_name].shape[0] + + # Model maybe loaded with different quantization schemes which may flatten the params. + # `bitsandbytes`, for example, flatten the weights when using 4bit. 8bit bnb models + # preserve weight shape. + module_weight_shape = cls._calculate_module_shape(model=transformer, base_module=module) + + # This means there's no need for an expansion in the params, so we simply skip. + if tuple(module_weight_shape) == (out_features, in_features): + continue + + module_out_features, module_in_features = module_weight_shape + debug_message = "" + if in_features > module_in_features: + debug_message += ( + f'Expanding the nn.Linear input/output features for module="{name}" because the provided LoRA ' + f"checkpoint contains higher number of features than expected. The number of input_features will be " + f"expanded from {module_in_features} to {in_features}" + ) + if out_features > module_out_features: + debug_message += ( + ", and the number of output features will be " + f"expanded from {module_out_features} to {out_features}." + ) + else: + debug_message += "." + if debug_message: + logger.debug(debug_message) + + if out_features > module_out_features or in_features > module_in_features: + has_param_with_shape_update = True + parent_module_name, _, current_module_name = name.rpartition(".") + parent_module = transformer.get_submodule(parent_module_name) + + if is_quantized: + module_weight = _maybe_dequantize_weight_for_expanded_lora(transformer, module) + + # TODO: consider if this layer needs to be a quantized layer as well if `is_quantized` is True. + with torch.device("meta"): + expanded_module = torch.nn.Linear( + in_features, out_features, bias=bias, dtype=module_weight.dtype + ) + # Only weights are expanded and biases are not. This is because only the input dimensions + # are changed while the output dimensions remain the same. The shape of the weight tensor + # is (out_features, in_features), while the shape of bias tensor is (out_features,), which + # explains the reason why only weights are expanded. + new_weight = torch.zeros_like( + expanded_module.weight.data, device=module_weight.device, dtype=module_weight.dtype + ) + slices = tuple(slice(0, dim) for dim in module_weight_shape) + new_weight[slices] = module_weight + tmp_state_dict = {"weight": new_weight} + if module_bias is not None: + tmp_state_dict["bias"] = module_bias + expanded_module.load_state_dict(tmp_state_dict, strict=True, assign=True) + + setattr(parent_module, current_module_name, expanded_module) + + del tmp_state_dict + + if current_module_name in _MODULE_NAME_TO_ATTRIBUTE_MAP_FLUX: + attribute_name = _MODULE_NAME_TO_ATTRIBUTE_MAP_FLUX[current_module_name] + new_value = int(expanded_module.weight.data.shape[1]) + old_value = getattr(transformer.config, attribute_name) + setattr(transformer.config, attribute_name, new_value) + logger.info( + f"Set the {attribute_name} attribute of the model to {new_value} from {old_value}." + ) + + # For `unload_lora_weights()`. + # TODO: this could lead to more memory overhead if the number of overwritten params + # are large. Should be revisited later and tackled through a `discard_original_layers` arg. + overwritten_params[f"{current_module_name}.weight"] = module_weight + if module_bias is not None: + overwritten_params[f"{current_module_name}.bias"] = module_bias + + if len(overwritten_params) > 0: + transformer._overwritten_params = overwritten_params + + return has_param_with_shape_update + + @classmethod + def _maybe_expand_lora_state_dict(cls, transformer, lora_state_dict): + expanded_module_names = set() + transformer_state_dict = transformer.state_dict() + prefix = f"{cls.transformer_name}." + + lora_module_names = [ + key[: -len(".lora_A.weight")] for key in lora_state_dict if key.endswith(".lora_A.weight") + ] + lora_module_names = [name[len(prefix) :] for name in lora_module_names if name.startswith(prefix)] + lora_module_names = sorted(set(lora_module_names)) + transformer_module_names = sorted({name for name, _ in transformer.named_modules()}) + unexpected_modules = set(lora_module_names) - set(transformer_module_names) + if unexpected_modules: + logger.debug(f"Found unexpected modules: {unexpected_modules}. These will be ignored.") + + for k in lora_module_names: + if k in unexpected_modules: + continue + + base_param_name = ( + f"{k.replace(prefix, '')}.base_layer.weight" + if f"{k.replace(prefix, '')}.base_layer.weight" in transformer_state_dict + else f"{k.replace(prefix, '')}.weight" + ) + base_weight_param = transformer_state_dict[base_param_name] + lora_A_param = lora_state_dict[f"{prefix}{k}.lora_A.weight"] + + # TODO (sayakpaul): Handle the cases when we actually need to expand when using quantization. + base_module_shape = cls._calculate_module_shape(model=transformer, base_weight_param_name=base_param_name) + + if base_module_shape[1] > lora_A_param.shape[1]: + shape = (lora_A_param.shape[0], base_weight_param.shape[1]) + expanded_state_dict_weight = torch.zeros(shape, device=base_weight_param.device) + expanded_state_dict_weight[:, : lora_A_param.shape[1]].copy_(lora_A_param) + lora_state_dict[f"{prefix}{k}.lora_A.weight"] = expanded_state_dict_weight + expanded_module_names.add(k) + elif base_module_shape[1] < lora_A_param.shape[1]: + raise NotImplementedError( + f"This LoRA param ({k}.lora_A.weight) has an incompatible shape {lora_A_param.shape}. Please open an issue to file for a feature request - https://github.com/huggingface/diffusers/issues/new." + ) + + if expanded_module_names: + logger.info( + f"The following LoRA modules were zero padded to match the state dict of {cls.transformer_name}: {expanded_module_names}. Please open an issue if you think this was unexpected - https://github.com/huggingface/diffusers/issues/new." + ) + + return lora_state_dict + + @staticmethod + def _calculate_module_shape( + model: "torch.nn.Module", + base_module: "torch.nn.Linear" = None, + base_weight_param_name: str = None, + ) -> "torch.Size": + def _get_weight_shape(weight: torch.Tensor): + if weight.__class__.__name__ == "Params4bit": + return weight.quant_state.shape + elif weight.__class__.__name__ == "GGUFParameter": + return weight.quant_shape + else: + return weight.shape + + if base_module is not None: + return _get_weight_shape(base_module.weight) + elif base_weight_param_name is not None: + if not base_weight_param_name.endswith(".weight"): + raise ValueError( + f"Invalid `base_weight_param_name` passed as it does not end with '.weight' {base_weight_param_name=}." + ) + module_path = base_weight_param_name.rsplit(".weight", 1)[0] + submodule = get_submodule_by_name(model, module_path) + return _get_weight_shape(submodule.weight) + + raise ValueError("Either `base_module` or `base_weight_param_name` must be provided.") + + @staticmethod + def _prepare_outputs(state_dict, metadata, alphas=None, return_alphas=False, return_metadata=False): + outputs = [state_dict] + if return_alphas: + outputs.append(alphas) + if return_metadata: + outputs.append(metadata) + return tuple(outputs) if (return_alphas or return_metadata) else state_dict + + +# The reason why we subclass from `StableDiffusionLoraLoaderMixin` here is because Amused initially +# relied on `StableDiffusionLoraLoaderMixin` for its LoRA support. +class AmusedLoraLoaderMixin(StableDiffusionLoraLoaderMixin): + _lora_loadable_modules = ["transformer", "text_encoder"] + transformer_name = TRANSFORMER_NAME + text_encoder_name = TEXT_ENCODER_NAME + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.FluxLoraLoaderMixin.load_lora_into_transformer with FluxTransformer2DModel->UVit2DModel + def load_lora_into_transformer( + cls, + state_dict, + network_alphas, + transformer, + adapter_name=None, + metadata=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and not is_peft_version(">=", "0.13.1"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=network_alphas, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder + def load_lora_into_text_encoder( + cls, + state_dict, + network_alphas, + text_encoder, + prefix=None, + lora_scale=1.0, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + This will load the LoRA layers specified in `state_dict` into `text_encoder` + + Parameters: + state_dict (`dict`): + A standard state dict containing the lora layer parameters. The key should be prefixed with an + additional `text_encoder` to distinguish between unet lora layers. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + text_encoder (`CLIPTextModel`): + The text encoder model to load the LoRA layers into. + prefix (`str`): + Expected prefix of the `text_encoder` in the `state_dict`. + lora_scale (`float`): + How much to scale the output of the lora linear layer before it is added with the output of the regular + lora layer. + adapter_name (`str`, *optional*): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap (`bool`, *optional*): + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]. + metadata (`dict`): + Optional LoRA adapter metadata. When supplied, the `LoraConfig` arguments of `peft` won't be derived + from the state dict. + """ + _load_lora_into_text_encoder( + state_dict=state_dict, + network_alphas=network_alphas, + lora_scale=lora_scale, + text_encoder=text_encoder, + prefix=prefix, + text_encoder_name=cls.text_encoder_name, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + text_encoder_lora_layers: dict[str, torch.nn.Module] = None, + transformer_lora_layers: dict[str, torch.nn.Module] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + ): + r""" + Save the LoRA parameters corresponding to the UNet and text encoder. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to save LoRA parameters to. Will be created if it doesn't exist. + unet_lora_layers (`dict[str, torch.nn.Module]` or `dict[str, torch.Tensor]`): + State dict of the LoRA layers corresponding to the `unet`. + text_encoder_lora_layers (`dict[str, torch.nn.Module]` or `dict[str, torch.Tensor]`): + State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text + encoder LoRA state dict because it comes from 🤗 Transformers. + is_main_process (`bool`, *optional*, defaults to `True`): + Whether the process calling this is the main process or not. Useful during distributed training and you + need to call this function on all processes. In this case, set `is_main_process=True` only on the main + process to avoid race conditions. + save_function (`Callable`): + The function to use to save the state dictionary. Useful during distributed training when you need to + replace `torch.save` with another method. Can be configured with the environment variable + `DIFFUSERS_SAVE_MODE`. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`. + """ + state_dict = {} + + if not (transformer_lora_layers or text_encoder_lora_layers): + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + if transformer_lora_layers: + state_dict.update(cls.pack_weights(transformer_lora_layers, cls.transformer_name)) + + if text_encoder_lora_layers: + state_dict.update(cls.pack_weights(text_encoder_lora_layers, cls.text_encoder_name)) + + # Save the model + cls.write_lora_layers( + state_dict=state_dict, + save_directory=save_directory, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + +class CogVideoXLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`CogVideoXTransformer3DModel`]. Specific to [`CogVideoXPipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->CogVideoXTransformer3DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class Mochi1LoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`MochiTransformer3DModel`]. Specific to [`MochiPipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->MochiTransformer3DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class LTXVideoLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`LTXVideoTransformer3DModel`]. Specific to [`LTXPipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + is_non_diffusers_format = any(k.startswith("diffusion_model.") for k in state_dict) + if is_non_diffusers_format: + state_dict = _convert_non_diffusers_ltxv_lora_to_diffusers(state_dict) + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->LTXVideoTransformer3DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class LTX2LoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`LTX2VideoTransformer3DModel`]. Specific to [`LTX2Pipeline`]. + """ + + _lora_loadable_modules = ["transformer", "connectors"] + transformer_name = TRANSFORMER_NAME + connectors_name = LTX2_CONNECTOR_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + final_state_dict = state_dict + is_non_diffusers_format = any(k.startswith("diffusion_model.") for k in state_dict) + has_connector = any(k.startswith("text_embedding_projection.") for k in state_dict) + if is_non_diffusers_format: + final_state_dict = _convert_non_diffusers_ltx2_lora_to_diffusers(state_dict) + if has_connector: + connectors_state_dict = _convert_non_diffusers_ltx2_lora_to_diffusers( + state_dict, "text_embedding_projection" + ) + final_state_dict.update(connectors_state_dict) + out = (final_state_dict, metadata) if return_lora_metadata else final_state_dict + return out + + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + transformer_peft_state_dict = { + k: v for k, v in state_dict.items() if k.startswith(f"{self.transformer_name}.") + } + connectors_peft_state_dict = {k: v for k, v in state_dict.items() if k.startswith(f"{self.connectors_name}.")} + self.load_lora_into_transformer( + transformer_peft_state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + if connectors_peft_state_dict: + self.load_lora_into_transformer( + connectors_peft_state_dict, + transformer=getattr(self, self.connectors_name) + if not hasattr(self, "connectors") + else self.connectors, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + prefix=self.connectors_name, + ) + + @classmethod + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + prefix: str = "transformer", + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {prefix}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + prefix=prefix, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class SanaLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`SanaTransformer2DModel`]. Specific to [`SanaPipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->SanaTransformer2DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class HeliosLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`HeliosTransformer3DModel`]. Specific to [`HeliosPipeline`] and [`HeliosPyramidPipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + if any(k.startswith("diffusion_model.") for k in state_dict): + state_dict = _convert_non_diffusers_wan_lora_to_diffusers(state_dict) + elif any(k.startswith("lora_unet_") for k in state_dict): + state_dict = _convert_musubi_wan_lora_to_diffusers(state_dict) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->WanTransformer3DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class HunyuanVideoLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`HunyuanVideoTransformer3DModel`]. Specific to [`HunyuanVideoPipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + is_original_hunyuan_video = any("img_attn_qkv" in k for k in state_dict) + if is_original_hunyuan_video: + state_dict = _convert_hunyuan_video_lora_to_diffusers(state_dict) + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->HunyuanVideoTransformer3DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class Lumina2LoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`Lumina2Transformer2DModel`]. Specific to [`Lumina2Text2ImgPipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + # conversion. + non_diffusers = any(k.startswith("diffusion_model.") for k in state_dict) + if non_diffusers: + state_dict = _convert_non_diffusers_lumina2_lora_to_diffusers(state_dict) + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->Lumina2Transformer2DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class KandinskyLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`Kandinsky5Transformer3DModel`], + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.lora_state_dict + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class WanLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`WanPipeline`] and `[WanImageToVideoPipeline`]. + """ + + _lora_loadable_modules = ["transformer", "transformer_2"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + if any(k.startswith("diffusion_model.") for k in state_dict): + state_dict = _convert_non_diffusers_wan_lora_to_diffusers(state_dict) + elif any(k.startswith("lora_unet_") for k in state_dict): + state_dict = _convert_musubi_wan_lora_to_diffusers(state_dict) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + @classmethod + def _maybe_expand_t2v_lora_for_i2v( + cls, + transformer: torch.nn.Module, + state_dict, + ): + if transformer.config.image_dim is None: + return state_dict + + target_device = transformer.device + + if any(k.startswith("transformer.blocks.") for k in state_dict): + num_blocks = len({k.split("blocks.")[1].split(".")[0] for k in state_dict if "blocks." in k}) + is_i2v_lora = any("add_k_proj" in k for k in state_dict) and any("add_v_proj" in k for k in state_dict) + has_bias = any(".lora_B.bias" in k for k in state_dict) + + if is_i2v_lora: + return state_dict + + for i in range(num_blocks): + for o, c in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]): + # These keys should exist if the block `i` was part of the T2V LoRA. + ref_key_lora_A = f"transformer.blocks.{i}.attn2.to_k.lora_A.weight" + ref_key_lora_B = f"transformer.blocks.{i}.attn2.to_k.lora_B.weight" + + if ref_key_lora_A not in state_dict or ref_key_lora_B not in state_dict: + continue + + state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_A.weight"] = torch.zeros_like( + state_dict[f"transformer.blocks.{i}.attn2.to_k.lora_A.weight"], device=target_device + ) + state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_B.weight"] = torch.zeros_like( + state_dict[f"transformer.blocks.{i}.attn2.to_k.lora_B.weight"], device=target_device + ) + + # If the original LoRA had biases (indicated by has_bias) + # AND the specific reference bias key exists for this block. + + ref_key_lora_B_bias = f"transformer.blocks.{i}.attn2.to_k.lora_B.bias" + if has_bias and ref_key_lora_B_bias in state_dict: + ref_lora_B_bias_tensor = state_dict[ref_key_lora_B_bias] + state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_B.bias"] = torch.zeros_like( + ref_lora_B_bias_tensor, + device=target_device, + ) + + return state_dict + + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + # convert T2V LoRA to I2V LoRA (when loaded to Wan I2V) by adding zeros for the additional (missing) _img layers + state_dict = self._maybe_expand_t2v_lora_for_i2v( + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + state_dict=state_dict, + ) + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) + if load_into_transformer_2: + if not hasattr(self, "transformer_2"): + raise AttributeError( + f"'{type(self).__name__}' object has no attribute transformer_2" + "Note that Wan2.1 models do not have a transformer_2 component." + "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." + ) + self.load_lora_into_transformer( + state_dict, + transformer=self.transformer_2, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + else: + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) + if not hasattr(self, "transformer") + else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->WanTransformer3DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class SkyReelsV2LoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`SkyReelsV2Transformer3DModel`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + # Copied from diffusers.loaders.lora_pipeline.WanLoraLoaderMixin.lora_state_dict + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + if any(k.startswith("diffusion_model.") for k in state_dict): + state_dict = _convert_non_diffusers_wan_lora_to_diffusers(state_dict) + elif any(k.startswith("lora_unet_") for k in state_dict): + state_dict = _convert_musubi_wan_lora_to_diffusers(state_dict) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.WanLoraLoaderMixin._maybe_expand_t2v_lora_for_i2v + def _maybe_expand_t2v_lora_for_i2v( + cls, + transformer: torch.nn.Module, + state_dict, + ): + if transformer.config.image_dim is None: + return state_dict + + target_device = transformer.device + + if any(k.startswith("transformer.blocks.") for k in state_dict): + num_blocks = len({k.split("blocks.")[1].split(".")[0] for k in state_dict if "blocks." in k}) + is_i2v_lora = any("add_k_proj" in k for k in state_dict) and any("add_v_proj" in k for k in state_dict) + has_bias = any(".lora_B.bias" in k for k in state_dict) + + if is_i2v_lora: + return state_dict + + for i in range(num_blocks): + for o, c in zip(["k_img", "v_img"], ["add_k_proj", "add_v_proj"]): + # These keys should exist if the block `i` was part of the T2V LoRA. + ref_key_lora_A = f"transformer.blocks.{i}.attn2.to_k.lora_A.weight" + ref_key_lora_B = f"transformer.blocks.{i}.attn2.to_k.lora_B.weight" + + if ref_key_lora_A not in state_dict or ref_key_lora_B not in state_dict: + continue + + state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_A.weight"] = torch.zeros_like( + state_dict[f"transformer.blocks.{i}.attn2.to_k.lora_A.weight"], device=target_device + ) + state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_B.weight"] = torch.zeros_like( + state_dict[f"transformer.blocks.{i}.attn2.to_k.lora_B.weight"], device=target_device + ) + + # If the original LoRA had biases (indicated by has_bias) + # AND the specific reference bias key exists for this block. + + ref_key_lora_B_bias = f"transformer.blocks.{i}.attn2.to_k.lora_B.bias" + if has_bias and ref_key_lora_B_bias in state_dict: + ref_lora_B_bias_tensor = state_dict[ref_key_lora_B_bias] + state_dict[f"transformer.blocks.{i}.attn2.{c}.lora_B.bias"] = torch.zeros_like( + ref_lora_B_bias_tensor, + device=target_device, + ) + + return state_dict + + # Copied from diffusers.loaders.lora_pipeline.WanLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + # convert T2V LoRA to I2V LoRA (when loaded to Wan I2V) by adding zeros for the additional (missing) _img layers + state_dict = self._maybe_expand_t2v_lora_for_i2v( + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + state_dict=state_dict, + ) + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + load_into_transformer_2 = kwargs.pop("load_into_transformer_2", False) + if load_into_transformer_2: + if not hasattr(self, "transformer_2"): + raise AttributeError( + f"'{type(self).__name__}' object has no attribute transformer_2" + "Note that Wan2.1 models do not have a transformer_2 component." + "Ensure the model has a transformer_2 component before setting load_into_transformer_2=True." + ) + self.load_lora_into_transformer( + state_dict, + transformer=self.transformer_2, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + else: + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) + if not hasattr(self, "transformer") + else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->SkyReelsV2Transformer3DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class CogView4LoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`WanTransformer3DModel`]. Specific to [`CogView4Pipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.lora_state_dict + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->CogView4Transformer2DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class HiDreamImageLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`HiDreamImageTransformer2DModel`]. Specific to [`HiDreamImagePipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + is_non_diffusers_format = any("diffusion_model" in k for k in state_dict) + if is_non_diffusers_format: + state_dict = _convert_non_diffusers_hidream_lora_to_diffusers(state_dict) + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->HiDreamImageTransformer2DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.SanaLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class QwenImageLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`QwenImageTransformer2DModel`]. Specific to [`QwenImagePipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + has_alphas_in_sd = any(k.endswith(".alpha") for k in state_dict) + has_lora_unet = any(k.startswith("lora_unet_") for k in state_dict) + has_diffusion_model = any(k.startswith("diffusion_model.") for k in state_dict) + has_default = any("default." in k for k in state_dict) + if has_alphas_in_sd or has_lora_unet or has_diffusion_model or has_default: + state_dict = _convert_non_diffusers_qwen_lora_to_diffusers(state_dict) + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->QwenImageTransformer2DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class ZImageLoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`ZImageTransformer2DModel`]. Specific to [`ZImagePipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + has_alphas_in_sd = any(k.endswith(".alpha") for k in state_dict) + has_lora_unet = any(k.startswith("lora_unet_") for k in state_dict) + has_diffusion_model = any(k.startswith("diffusion_model.") for k in state_dict) + has_default = any("default." in k for k in state_dict) + if has_alphas_in_sd or has_lora_unet or has_diffusion_model or has_default: + state_dict = _convert_non_diffusers_z_image_lora_to_diffusers(state_dict) + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->ZImageTransformer2DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class Flux2LoraLoaderMixin(LoraBaseMixin): + r""" + Load LoRA layers into [`Flux2Transformer2DModel`]. Specific to [`Flux2Pipeline`]. + """ + + _lora_loadable_modules = ["transformer"] + transformer_name = TRANSFORMER_NAME + + @classmethod + @validate_hf_hub_args + def lora_state_dict( + cls, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.lora_state_dict`] for more details. + """ + # Load the main state dict first which has the LoRA layers for either of + # transformer and text encoder or both. + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + return_lora_metadata = kwargs.pop("return_lora_metadata", False) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + ) + + is_dora_scale_present = any("dora_scale" in k for k in state_dict) + if is_dora_scale_present: + warn_msg = "It seems like you are using a DoRA checkpoint that is not compatible in Diffusers at the moment. So, we are going to filter out the keys associated to 'dora_scale` from the state dict. If you think this is a mistake please open an issue https://github.com/huggingface/diffusers/issues/new." + logger.warning(warn_msg) + state_dict = {k: v for k, v in state_dict.items() if "dora_scale" not in k} + + is_peft_format = any(k.startswith("base_model.model.") for k in state_dict) + if is_peft_format: + state_dict = {k.replace("base_model.model.", "diffusion_model."): v for k, v in state_dict.items()} + + is_ai_toolkit = any(k.startswith("diffusion_model.") for k in state_dict) + if is_ai_toolkit: + state_dict = _convert_non_diffusers_flux2_lora_to_diffusers(state_dict) + + out = (state_dict, metadata) if return_lora_metadata else state_dict + return out + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.load_lora_weights + def load_lora_weights( + self, + pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], + adapter_name: str | None = None, + hotswap: bool = False, + **kwargs, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for more details. + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT_LORA) + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # if a dict is passed, copy it instead of modifying it inplace + if isinstance(pretrained_model_name_or_path_or_dict, dict): + pretrained_model_name_or_path_or_dict = pretrained_model_name_or_path_or_dict.copy() + + # First, ensure that the checkpoint is a compatible one and can be successfully loaded. + kwargs["return_lora_metadata"] = True + state_dict, metadata = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs) + + is_correct_format = all("lora" in key for key in state_dict.keys()) + if not is_correct_format: + raise ValueError("Invalid LoRA checkpoint. Make sure all LoRA param names contain `'lora'` substring.") + + self.load_lora_into_transformer( + state_dict, + transformer=getattr(self, self.transformer_name) if not hasattr(self, "transformer") else self.transformer, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=self, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.SD3LoraLoaderMixin.load_lora_into_transformer with SD3Transformer2DModel->CogView4Transformer2DModel + def load_lora_into_transformer( + cls, + state_dict, + transformer, + adapter_name=None, + _pipeline=None, + low_cpu_mem_usage=False, + hotswap: bool = False, + metadata=None, + ): + """ + See [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_into_unet`] for more details. + """ + if low_cpu_mem_usage and is_peft_version("<", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + # Load the layers corresponding to transformer. + logger.info(f"Loading {cls.transformer_name}.") + transformer.load_lora_adapter( + state_dict, + network_alphas=None, + adapter_name=adapter_name, + metadata=metadata, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + hotswap=hotswap, + ) + + @classmethod + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.save_lora_weights + def save_lora_weights( + cls, + save_directory: str | os.PathLike, + transformer_lora_layers: dict[str, torch.nn.Module | torch.Tensor] = None, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + transformer_lora_adapter_metadata: dict | None = None, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for more information. + """ + lora_layers = {} + lora_metadata = {} + + if transformer_lora_layers: + lora_layers[cls.transformer_name] = transformer_lora_layers + lora_metadata[cls.transformer_name] = transformer_lora_adapter_metadata + + if not lora_layers: + raise ValueError("You must pass at least one of `transformer_lora_layers` or `text_encoder_lora_layers`.") + + cls._save_lora_weights( + save_directory=save_directory, + lora_layers=lora_layers, + lora_metadata=lora_metadata, + is_main_process=is_main_process, + weight_name=weight_name, + save_function=save_function, + safe_serialization=safe_serialization, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.fuse_lora + def fuse_lora( + self, + components: list[str] = ["transformer"], + lora_scale: float = 1.0, + safe_fusing: bool = False, + adapter_names: list[str] | None = None, + **kwargs, + ): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.fuse_lora`] for more details. + """ + super().fuse_lora( + components=components, + lora_scale=lora_scale, + safe_fusing=safe_fusing, + adapter_names=adapter_names, + **kwargs, + ) + + # Copied from diffusers.loaders.lora_pipeline.CogVideoXLoraLoaderMixin.unfuse_lora + def unfuse_lora(self, components: list[str] = ["transformer"], **kwargs): + r""" + See [`~loaders.StableDiffusionLoraLoaderMixin.unfuse_lora`] for more details. + """ + super().unfuse_lora(components=components, **kwargs) + + +class LoraLoaderMixin(StableDiffusionLoraLoaderMixin): + def __init__(self, *args, **kwargs): + deprecation_message = "LoraLoaderMixin is deprecated and this will be removed in a future version. Please use `StableDiffusionLoraLoaderMixin`, instead." + deprecate("LoraLoaderMixin", "1.0.0", deprecation_message) + super().__init__(*args, **kwargs) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/peft.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/peft.py new file mode 100644 index 0000000000000000000000000000000000000000..a96542c2a50c64703f081d60c3b326de1b591274 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/peft.py @@ -0,0 +1,851 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import inspect +import json +import os +from functools import partial +from pathlib import Path +from typing import Literal + +import safetensors +import torch + +from ..hooks.group_offloading import _maybe_remove_and_reapply_group_offloading +from ..utils import ( + MIN_PEFT_VERSION, + USE_PEFT_BACKEND, + check_peft_version, + convert_sai_sd_control_lora_state_dict_to_peft, + convert_unet_state_dict_to_peft, + delete_adapter_layers, + get_adapter_name, + is_peft_available, + is_peft_version, + logging, + set_adapter_layers, + set_weights_and_activate_adapters, +) +from ..utils.peft_utils import _create_lora_config, _maybe_warn_for_unhandled_keys +from .lora_base import _fetch_state_dict, _func_optionally_disable_offloading +from .unet_loader_utils import _maybe_expand_lora_scales + + +logger = logging.get_logger(__name__) + +_SET_ADAPTER_SCALE_FN_MAPPING = { + "UNet2DConditionModel": _maybe_expand_lora_scales, + "UNetMotionModel": _maybe_expand_lora_scales, + "SD3Transformer2DModel": lambda model_cls, weights: weights, + "FluxTransformer2DModel": lambda model_cls, weights: weights, + "CogVideoXTransformer3DModel": lambda model_cls, weights: weights, + "ConsisIDTransformer3DModel": lambda model_cls, weights: weights, + "HeliosTransformer3DModel": lambda model_cls, weights: weights, + "MochiTransformer3DModel": lambda model_cls, weights: weights, + "HunyuanVideoTransformer3DModel": lambda model_cls, weights: weights, + "LTXVideoTransformer3DModel": lambda model_cls, weights: weights, + "SanaTransformer2DModel": lambda model_cls, weights: weights, + "AuraFlowTransformer2DModel": lambda model_cls, weights: weights, + "Lumina2Transformer2DModel": lambda model_cls, weights: weights, + "WanTransformer3DModel": lambda model_cls, weights: weights, + "CogView4Transformer2DModel": lambda model_cls, weights: weights, + "HiDreamImageTransformer2DModel": lambda model_cls, weights: weights, + "HunyuanVideoFramepackTransformer3DModel": lambda model_cls, weights: weights, + "WanVACETransformer3DModel": lambda model_cls, weights: weights, + "ChromaTransformer2DModel": lambda model_cls, weights: weights, + "ChronoEditTransformer3DModel": lambda model_cls, weights: weights, + "QwenImageTransformer2DModel": lambda model_cls, weights: weights, + "Flux2Transformer2DModel": lambda model_cls, weights: weights, + "ZImageTransformer2DModel": lambda model_cls, weights: weights, + "LTX2VideoTransformer3DModel": lambda model_cls, weights: weights, + "LTX2TextConnectors": lambda model_cls, weights: weights, +} + + +class PeftAdapterMixin: + """ + A class containing all functions for loading and using adapters weights that are supported in PEFT library. For + more details about adapters and injecting them in a base model, check out the PEFT + [documentation](https://huggingface.co/docs/peft/index). + + Install the latest version of PEFT, and use this mixin to: + + - Attach new adapters in the model. + - Attach multiple adapters and iteratively activate/deactivate them. + - Activate/deactivate all adapters from the model. + - Get a list of the active adapters. + """ + + _hf_peft_config_loaded = False + # kwargs for prepare_model_for_compiled_hotswap, if required + _prepare_lora_hotswap_kwargs: dict | None = None + + @classmethod + # Copied from diffusers.loaders.lora_base.LoraBaseMixin._optionally_disable_offloading + def _optionally_disable_offloading(cls, _pipeline): + return _func_optionally_disable_offloading(_pipeline=_pipeline) + + def load_lora_adapter( + self, pretrained_model_name_or_path_or_dict, prefix="transformer", hotswap: bool = False, **kwargs + ): + r""" + Loads a LoRA adapter into the underlying model. + + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + + prefix (`str`, *optional*): Prefix to filter the state dict. + + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random + weights. + hotswap : (`bool`, *optional*) + Defaults to `False`. Whether to substitute an existing (LoRA) adapter with the newly loaded adapter + in-place. This means that, instead of loading an additional adapter, this will take the existing + adapter weights and replace them with the weights of the new adapter. This can be faster and more + memory efficient. However, the main advantage of hotswapping is that when the model is compiled with + torch.compile, loading the new adapter does not require recompilation of the model. When using + hotswapping, the passed `adapter_name` should be the name of an already loaded adapter. + + If the new adapter and the old adapter have different ranks and/or LoRA alphas (i.e. scaling), you need + to call an additional method before loading the adapter: + + ```py + pipeline = ... # load diffusers pipeline + max_rank = ... # the highest rank among all LoRAs that you want to load + # call *before* compiling and loading the LoRA adapter + pipeline.enable_lora_hotswap(target_rank=max_rank) + pipeline.load_lora_weights(file_name) + # optionally compile the model now + ``` + + Note that hotswapping adapters of the text encoder is not yet supported. There are some further + limitations to this technique, which are documented here: + https://huggingface.co/docs/peft/main/en/package_reference/hotswap + metadata: + LoRA adapter metadata. When supplied, the metadata inferred through the state dict isn't used to + initialize `LoraConfig`. + """ + from peft import inject_adapter_in_model, set_peft_model_state_dict + from peft.tuners.tuners_utils import BaseTunerLayer + + from ..hooks.group_offloading import _maybe_remove_and_reapply_group_offloading + + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + adapter_name = kwargs.pop("adapter_name", None) + network_alphas = kwargs.pop("network_alphas", None) + _pipeline = kwargs.pop("_pipeline", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False) + metadata = kwargs.pop("metadata", None) + allow_pickle = False + + if low_cpu_mem_usage and is_peft_version("<=", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + state_dict, metadata = _fetch_state_dict( + pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict, + weight_name=weight_name, + use_safetensors=use_safetensors, + local_files_only=local_files_only, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + allow_pickle=allow_pickle, + metadata=metadata, + ) + if network_alphas is not None and prefix is None: + raise ValueError("`network_alphas` cannot be None when `prefix` is None.") + if network_alphas and metadata: + raise ValueError("Both `network_alphas` and `metadata` cannot be specified.") + + if prefix is not None: + state_dict = {k.removeprefix(f"{prefix}."): v for k, v in state_dict.items() if k.startswith(f"{prefix}.")} + if metadata is not None: + metadata = {k.removeprefix(f"{prefix}."): v for k, v in metadata.items() if k.startswith(f"{prefix}.")} + + if len(state_dict) > 0: + if adapter_name in getattr(self, "peft_config", {}) and not hotswap: + raise ValueError( + f"Adapter name {adapter_name} already in use in the model - please select a new adapter name." + ) + elif adapter_name not in getattr(self, "peft_config", {}) and hotswap: + raise ValueError( + f"Trying to hotswap LoRA adapter '{adapter_name}' but there is no existing adapter by that name. " + "Please choose an existing adapter name or set `hotswap=False` to prevent hotswapping." + ) + + # check with first key if is not in peft format + first_key = next(iter(state_dict.keys())) + if "lora_A" not in first_key: + state_dict = convert_unet_state_dict_to_peft(state_dict) + + # Control LoRA from SAI is different from BFL Control LoRA + # https://huggingface.co/stabilityai/control-lora + # https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors + is_sai_sd_control_lora = "lora_controlnet" in state_dict + if is_sai_sd_control_lora: + state_dict = convert_sai_sd_control_lora_state_dict_to_peft(state_dict) + + rank = {} + for key, val in state_dict.items(): + # Cannot figure out rank from lora layers that don't have at least 2 dimensions. + # Bias layers in LoRA only have a single dimension + if "lora_B" in key and val.ndim > 1: + # Check out https://github.com/huggingface/peft/pull/2419 for the `^` symbol. + # We may run into some ambiguous configuration values when a model has module + # names, sharing a common prefix (`proj_out.weight` and `blocks.transformer.proj_out.weight`, + # for example) and they have different LoRA ranks. + rank[f"^{key}"] = val.shape[1] + + if network_alphas is not None and len(network_alphas) >= 1: + alpha_keys = [k for k in network_alphas.keys() if k.startswith(f"{prefix}.")] + network_alphas = { + k.removeprefix(f"{prefix}."): v for k, v in network_alphas.items() if k in alpha_keys + } + + # adapter_name + if adapter_name is None: + adapter_name = get_adapter_name(self) + + # create LoraConfig + lora_config = _create_lora_config( + state_dict, + network_alphas, + metadata, + rank, + model_state_dict=self.state_dict(), + adapter_name=adapter_name, + ) + + # Adjust LoRA config for Control LoRA + if is_sai_sd_control_lora: + lora_config.lora_alpha = lora_config.r + lora_config.alpha_pattern = lora_config.rank_pattern + lora_config.bias = "all" + lora_config.modules_to_save = lora_config.exclude_modules + lora_config.exclude_modules = None + + # =", "0.13.1"): + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + + if hotswap or (self._prepare_lora_hotswap_kwargs is not None): + if is_peft_version(">", "0.14.0"): + from peft.utils.hotswap import ( + check_hotswap_configs_compatible, + hotswap_adapter_from_state_dict, + prepare_model_for_compiled_hotswap, + ) + else: + msg = ( + "Hotswapping requires PEFT > v0.14. Please upgrade PEFT to a higher version or install it " + "from source." + ) + raise ImportError(msg) + + if hotswap: + + def map_state_dict_for_hotswap(sd): + # For hotswapping, we need the adapter name to be present in the state dict keys + new_sd = {} + for k, v in sd.items(): + if k.endswith("lora_A.weight") or k.endswith("lora_B.weight"): + k = k[: -len(".weight")] + f".{adapter_name}.weight" + elif k.endswith("lora_B.bias"): # lora_bias=True option + k = k[: -len(".bias")] + f".{adapter_name}.bias" + new_sd[k] = v + return new_sd + + # To handle scenarios where we cannot successfully set state dict. If it's unsuccessful, + # we should also delete the `peft_config` associated to the `adapter_name`. + try: + if hotswap: + state_dict = map_state_dict_for_hotswap(state_dict) + check_hotswap_configs_compatible(self.peft_config[adapter_name], lora_config) + try: + hotswap_adapter_from_state_dict( + model=self, + state_dict=state_dict, + adapter_name=adapter_name, + config=lora_config, + ) + except Exception as e: + logger.error(f"Hotswapping {adapter_name} was unsuccessful with the following error: \n{e}") + raise + # the hotswap function raises if there are incompatible keys, so if we reach this point we can set + # it to None + incompatible_keys = None + else: + inject_adapter_in_model( + lora_config, self, adapter_name=adapter_name, state_dict=state_dict, **peft_kwargs + ) + incompatible_keys = set_peft_model_state_dict(self, state_dict, adapter_name, **peft_kwargs) + + if self._prepare_lora_hotswap_kwargs is not None: + # For hotswapping of compiled models or adapters with different ranks. + # If the user called enable_lora_hotswap, we need to ensure it is called: + # - after the first adapter was loaded + # - before the model is compiled and the 2nd adapter is being hotswapped in + # Therefore, it needs to be called here + prepare_model_for_compiled_hotswap( + self, config=lora_config, **self._prepare_lora_hotswap_kwargs + ) + # We only want to call prepare_model_for_compiled_hotswap once + self._prepare_lora_hotswap_kwargs = None + + # Set peft config loaded flag to True if module has been successfully injected and incompatible keys retrieved + if not self._hf_peft_config_loaded: + self._hf_peft_config_loaded = True + except Exception as e: + # In case `inject_adapter_in_model()` was unsuccessful even before injecting the `peft_config`. + if hasattr(self, "peft_config"): + for module in self.modules(): + if isinstance(module, BaseTunerLayer): + active_adapters = module.active_adapters + for active_adapter in active_adapters: + if adapter_name in active_adapter: + module.delete_adapter(adapter_name) + + self.peft_config.pop(adapter_name) + logger.error(f"Loading {adapter_name} was unsuccessful with the following error: \n{e}") + raise + + _maybe_warn_for_unhandled_keys(incompatible_keys, adapter_name) + + # Offload back. + if is_model_cpu_offload: + _pipeline.enable_model_cpu_offload() + elif is_sequential_cpu_offload: + _pipeline.enable_sequential_cpu_offload() + elif is_group_offload: + for component in _pipeline.components.values(): + if isinstance(component, torch.nn.Module): + _maybe_remove_and_reapply_group_offloading(component) + # Unsafe code /> + + if prefix is not None and not state_dict: + model_class_name = self.__class__.__name__ + logger.warning( + f"No LoRA keys associated to {model_class_name} found with the {prefix=}. " + "This is safe to ignore if LoRA state dict didn't originally have any " + f"{model_class_name} related params. You can also try specifying `prefix=None` " + "to resolve the warning. Otherwise, open an issue if you think it's unexpected: " + "https://github.com/huggingface/diffusers/issues/new" + ) + + def save_lora_adapter( + self, + save_directory, + adapter_name: str = "default", + upcast_before_saving: bool = False, + safe_serialization: bool = True, + weight_name: str | None = None, + ): + """ + Save the LoRA parameters corresponding to the underlying model. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to save LoRA parameters to. Will be created if it doesn't exist. + adapter_name: (`str`, defaults to "default"): The name of the adapter to serialize. Useful when the + underlying model has multiple adapters loaded. + upcast_before_saving (`bool`, defaults to `False`): + Whether to cast the underlying model to `torch.float32` before serialization. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`. + weight_name: (`str`, *optional*, defaults to `None`): Name of the file to serialize the state dict with. + """ + from peft.utils import get_peft_model_state_dict + + from .lora_base import LORA_ADAPTER_METADATA_KEY, LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE + + if adapter_name is None: + adapter_name = get_adapter_name(self) + + if adapter_name not in getattr(self, "peft_config", {}): + raise ValueError(f"Adapter name {adapter_name} not found in the model.") + + lora_adapter_metadata = self.peft_config[adapter_name].to_dict() + + lora_layers_to_save = get_peft_model_state_dict( + self.to(dtype=torch.float32 if upcast_before_saving else None), adapter_name=adapter_name + ) + if os.path.isfile(save_directory): + raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file") + + if safe_serialization: + + def save_function(weights, filename): + # Inject framework format. + metadata = {"format": "pt"} + if lora_adapter_metadata is not None: + for key, value in lora_adapter_metadata.items(): + if isinstance(value, set): + lora_adapter_metadata[key] = list(value) + metadata[LORA_ADAPTER_METADATA_KEY] = json.dumps(lora_adapter_metadata, indent=2, sort_keys=True) + + return safetensors.torch.save_file(weights, filename, metadata=metadata) + + else: + save_function = torch.save + + os.makedirs(save_directory, exist_ok=True) + + if weight_name is None: + if safe_serialization: + weight_name = LORA_WEIGHT_NAME_SAFE + else: + weight_name = LORA_WEIGHT_NAME + + save_path = Path(save_directory, weight_name).as_posix() + save_function(lora_layers_to_save, save_path) + logger.info(f"Model weights saved in {save_path}") + + def set_adapters( + self, + adapter_names: list[str] | str, + weights: float | dict | list[float] | list[dict] | list[None] | None = None, + ): + """ + Set the currently active adapters for use in the diffusion network (e.g. unet, transformer, etc.). + + Args: + adapter_names (`list[str]` or `str`): + The names of the adapters to use. + weights (`Union[List[float], float]`, *optional*): + The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all the + adapters. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic" + ) + pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel") + pipeline.unet.set_adapters(["cinematic", "pixel"], weights=[0.5, 0.5]) + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for `set_adapters()`.") + + adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names + + # Expand weights into a list, one entry per adapter + # examples for e.g. 2 adapters: [{...}, 7] -> [7,7] ; None -> [None, None] + if not isinstance(weights, list): + weights = [weights] * len(adapter_names) + + if len(adapter_names) != len(weights): + raise ValueError( + f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}." + ) + + # Set None values to default of 1.0 + # e.g. [{...}, 7] -> [{...}, 7] ; [None, None] -> [1.0, 1.0] + weights = [w if w is not None else 1.0 for w in weights] + + # e.g. [{...}, 7] -> [{expanded dict...}, 7] + scale_expansion_fn = _SET_ADAPTER_SCALE_FN_MAPPING[self.__class__.__name__] + weights = scale_expansion_fn(self, weights) + + set_weights_and_activate_adapters(self, adapter_names, weights) + + def add_adapter(self, adapter_config, adapter_name: str = "default") -> None: + r""" + Adds a new adapter to the current model for training. If no adapter name is passed, a default name is assigned + to the adapter to follow the convention of the PEFT library. + + If you are not familiar with adapters and PEFT methods, we invite you to read more about them in the PEFT + [documentation](https://huggingface.co/docs/peft). + + Args: + adapter_config (`[~peft.PeftConfig]`): + The configuration of the adapter to add; supported adapters are non-prefix tuning and adaption prompt + methods. + adapter_name (`str`, *optional*, defaults to `"default"`): + The name of the adapter to add. If no name is passed, a default name is assigned to the adapter. + """ + check_peft_version(min_version=MIN_PEFT_VERSION) + + if not is_peft_available(): + raise ImportError("PEFT is not available. Please install PEFT to use this function: `pip install peft`.") + + from peft import PeftConfig, inject_adapter_in_model + + if not self._hf_peft_config_loaded: + self._hf_peft_config_loaded = True + elif adapter_name in self.peft_config: + raise ValueError(f"Adapter with name {adapter_name} already exists. Please use a different name.") + + if not isinstance(adapter_config, PeftConfig): + raise ValueError( + f"adapter_config should be an instance of PeftConfig. Got {type(adapter_config)} instead." + ) + + # Unlike transformers, here we don't need to retrieve the name_or_path of the unet as the loading logic is + # handled by the `load_lora_layers` or `StableDiffusionLoraLoaderMixin`. Therefore we set it to `None` here. + adapter_config.base_model_name_or_path = None + inject_adapter_in_model(adapter_config, self, adapter_name) + self.set_adapter(adapter_name) + + def set_adapter(self, adapter_name: str | list[str]) -> None: + """ + Sets a specific adapter by forcing the model to only use that adapter and disables the other adapters. + + If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT + [documentation](https://huggingface.co/docs/peft). + + Args: + adapter_name (str | list[str])): + The list of adapters to set or the adapter name in the case of a single adapter. + """ + check_peft_version(min_version=MIN_PEFT_VERSION) + + if not self._hf_peft_config_loaded: + raise ValueError("No adapter loaded. Please load an adapter first.") + + if isinstance(adapter_name, str): + adapter_name = [adapter_name] + + missing = set(adapter_name) - set(self.peft_config) + if len(missing) > 0: + raise ValueError( + f"Following adapter(s) could not be found: {', '.join(missing)}. Make sure you are passing the correct adapter name(s)." + f" current loaded adapters are: {list(self.peft_config.keys())}" + ) + + from peft.tuners.tuners_utils import BaseTunerLayer + + _adapters_has_been_set = False + + for _, module in self.named_modules(): + if isinstance(module, BaseTunerLayer): + if hasattr(module, "set_adapter"): + module.set_adapter(adapter_name) + # Previous versions of PEFT does not support multi-adapter inference + elif not hasattr(module, "set_adapter") and len(adapter_name) != 1: + raise ValueError( + "You are trying to set multiple adapters and you have a PEFT version that does not support multi-adapter inference. Please upgrade to the latest version of PEFT." + " `pip install -U peft` or `pip install -U git+https://github.com/huggingface/peft.git`" + ) + else: + module.active_adapter = adapter_name + _adapters_has_been_set = True + + if not _adapters_has_been_set: + raise ValueError( + "Did not succeeded in setting the adapter. Please make sure you are using a model that supports adapters." + ) + + def disable_adapters(self) -> None: + r""" + Disable all adapters attached to the model and fallback to inference with the base model only. + + If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT + [documentation](https://huggingface.co/docs/peft). + """ + check_peft_version(min_version=MIN_PEFT_VERSION) + + if not self._hf_peft_config_loaded: + raise ValueError("No adapter loaded. Please load an adapter first.") + + from peft.tuners.tuners_utils import BaseTunerLayer + + for _, module in self.named_modules(): + if isinstance(module, BaseTunerLayer): + if hasattr(module, "enable_adapters"): + module.enable_adapters(enabled=False) + else: + # support for older PEFT versions + module.disable_adapters = True + + def enable_adapters(self) -> None: + """ + Enable adapters that are attached to the model. The model uses `self.active_adapters()` to retrieve the list of + adapters to enable. + + If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT + [documentation](https://huggingface.co/docs/peft). + """ + check_peft_version(min_version=MIN_PEFT_VERSION) + + if not self._hf_peft_config_loaded: + raise ValueError("No adapter loaded. Please load an adapter first.") + + from peft.tuners.tuners_utils import BaseTunerLayer + + for _, module in self.named_modules(): + if isinstance(module, BaseTunerLayer): + if hasattr(module, "enable_adapters"): + module.enable_adapters(enabled=True) + else: + # support for older PEFT versions + module.disable_adapters = False + + def active_adapters(self) -> list[str]: + """ + Gets the current list of active adapters of the model. + + If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT + [documentation](https://huggingface.co/docs/peft). + """ + check_peft_version(min_version=MIN_PEFT_VERSION) + + if not is_peft_available(): + raise ImportError("PEFT is not available. Please install PEFT to use this function: `pip install peft`.") + + if not self._hf_peft_config_loaded: + raise ValueError("No adapter loaded. Please load an adapter first.") + + from peft.tuners.tuners_utils import BaseTunerLayer + + for _, module in self.named_modules(): + if isinstance(module, BaseTunerLayer): + return module.active_adapter + + def fuse_lora(self, lora_scale=1.0, safe_fusing=False, adapter_names=None): + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for `fuse_lora()`.") + + self.lora_scale = lora_scale + self._safe_fusing = safe_fusing + self.apply(partial(self._fuse_lora_apply, adapter_names=adapter_names)) + + def _fuse_lora_apply(self, module, adapter_names=None): + from peft.tuners.tuners_utils import BaseTunerLayer + + merge_kwargs = {"safe_merge": self._safe_fusing} + + if isinstance(module, BaseTunerLayer): + if self.lora_scale != 1.0: + module.scale_layer(self.lora_scale) + + # For BC with previous PEFT versions, we need to check the signature + # of the `merge` method to see if it supports the `adapter_names` argument. + supported_merge_kwargs = list(inspect.signature(module.merge).parameters) + if "adapter_names" in supported_merge_kwargs: + merge_kwargs["adapter_names"] = adapter_names + elif "adapter_names" not in supported_merge_kwargs and adapter_names is not None: + raise ValueError( + "The `adapter_names` argument is not supported with your PEFT version. Please upgrade" + " to the latest version of PEFT. `pip install -U peft`" + ) + + module.merge(**merge_kwargs) + + def unfuse_lora(self): + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for `unfuse_lora()`.") + self.apply(self._unfuse_lora_apply) + + def _unfuse_lora_apply(self, module): + from peft.tuners.tuners_utils import BaseTunerLayer + + if isinstance(module, BaseTunerLayer): + module.unmerge() + + def unload_lora(self): + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for `unload_lora()`.") + + from ..hooks.group_offloading import _maybe_remove_and_reapply_group_offloading + from ..utils import recurse_remove_peft_layers + + recurse_remove_peft_layers(self) + if hasattr(self, "peft_config"): + del self.peft_config + if hasattr(self, "_hf_peft_config_loaded"): + self._hf_peft_config_loaded = None + + _maybe_remove_and_reapply_group_offloading(self) + + def disable_lora(self): + """ + Disables the active LoRA layers of the underlying model. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic" + ) + pipeline.unet.disable_lora() + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + set_adapter_layers(self, enabled=False) + + def enable_lora(self): + """ + Enables the active LoRA layers of the underlying model. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic" + ) + pipeline.unet.enable_lora() + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + set_adapter_layers(self, enabled=True) + + def delete_adapters(self, adapter_names: list[str] | str): + """ + Delete an adapter's LoRA layers from the underlying model. + + Args: + adapter_names (`list[str, str]`): + The names (single string or list of strings) of the adapter to delete. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.load_lora_weights( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_names="cinematic" + ) + pipeline.unet.delete_adapters("cinematic") + ``` + """ + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + if isinstance(adapter_names, str): + adapter_names = [adapter_names] + + for adapter_name in adapter_names: + delete_adapter_layers(self, adapter_name) + + # Pop also the corresponding adapter from the config + if hasattr(self, "peft_config"): + self.peft_config.pop(adapter_name, None) + + _maybe_remove_and_reapply_group_offloading(self) + + def enable_lora_hotswap( + self, target_rank: int = 128, check_compiled: Literal["error", "warn", "ignore"] = "error" + ) -> None: + """Enables the possibility to hotswap LoRA adapters. + + Calling this method is only required when hotswapping adapters and if the model is compiled or if the ranks of + the loaded adapters differ. + + Args: + target_rank (`int`, *optional*, defaults to `128`): + The highest rank among all the adapters that will be loaded. + + check_compiled (`str`, *optional*, defaults to `"error"`): + How to handle the case when the model is already compiled, which should generally be avoided. The + options are: + - "error" (default): raise an error + - "warn": issue a warning + - "ignore": do nothing + """ + if getattr(self, "peft_config", {}): + if check_compiled == "error": + raise RuntimeError("Call `enable_lora_hotswap` before loading the first adapter.") + elif check_compiled == "warn": + logger.warning( + "It is recommended to call `enable_lora_hotswap` before loading the first adapter to avoid recompilation." + ) + elif check_compiled != "ignore": + raise ValueError( + f"check_compiles should be one of 'error', 'warn', or 'ignore', got '{check_compiled}' instead." + ) + + self._prepare_lora_hotswap_kwargs = {"target_rank": target_rank, "check_compiled": check_compiled} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file.py new file mode 100644 index 0000000000000000000000000000000000000000..0b0d52d4a4121e05b16b7d283002145959b7e18d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file.py @@ -0,0 +1,565 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import inspect +import os + +import torch +from huggingface_hub import snapshot_download +from huggingface_hub.utils import LocalEntryNotFoundError, validate_hf_hub_args +from packaging import version +from typing_extensions import Self + +from ..utils import deprecate, is_transformers_available, logging +from .single_file_utils import ( + SingleFileComponentError, + _is_legacy_scheduler_kwargs, + _is_model_weights_in_cached_folder, + _legacy_load_clip_tokenizer, + _legacy_load_safety_checker, + _legacy_load_scheduler, + create_diffusers_clip_model_from_ldm, + create_diffusers_t5_model_from_checkpoint, + fetch_diffusers_config, + fetch_original_config, + is_clip_model_in_single_file, + is_t5_in_single_file, + load_single_file_checkpoint, +) + + +logger = logging.get_logger(__name__) + +# Legacy behaviour. `from_single_file` does not load the safety checker unless explicitly provided +SINGLE_FILE_OPTIONAL_COMPONENTS = ["safety_checker"] + +if is_transformers_available(): + import transformers + from transformers import PreTrainedModel, PreTrainedTokenizer + + +def load_single_file_sub_model( + library_name, + class_name, + name, + checkpoint, + pipelines, + is_pipeline_module, + cached_model_config_path, + original_config=None, + local_files_only=False, + torch_dtype=None, + is_legacy_loading=False, + disable_mmap=False, + **kwargs, +): + if is_pipeline_module: + pipeline_module = getattr(pipelines, library_name) + class_obj = getattr(pipeline_module, class_name) + else: + # else we just import it from the library. + library = importlib.import_module(library_name) + class_obj = getattr(library, class_name) + + if is_transformers_available(): + transformers_version = version.parse(version.parse(transformers.__version__).base_version) + else: + transformers_version = "N/A" + + is_transformers_model = ( + is_transformers_available() + and issubclass(class_obj, PreTrainedModel) + and transformers_version >= version.parse("4.20.0") + ) + is_tokenizer = ( + is_transformers_available() + and issubclass(class_obj, PreTrainedTokenizer) + and transformers_version >= version.parse("4.20.0") + ) + + diffusers_module = importlib.import_module(__name__.split(".")[0]) + is_diffusers_single_file_model = issubclass(class_obj, diffusers_module.FromOriginalModelMixin) + is_diffusers_model = issubclass(class_obj, diffusers_module.ModelMixin) + is_diffusers_scheduler = issubclass(class_obj, diffusers_module.SchedulerMixin) + + if is_diffusers_single_file_model: + load_method = getattr(class_obj, "from_single_file") + + # We cannot provide two different config options to the `from_single_file` method + # Here we have to ignore loading the config from `cached_model_config_path` if `original_config` is provided + if original_config: + cached_model_config_path = None + + loaded_sub_model = load_method( + pretrained_model_link_or_path_or_dict=checkpoint, + original_config=original_config, + config=cached_model_config_path, + subfolder=name, + torch_dtype=torch_dtype, + local_files_only=local_files_only, + disable_mmap=disable_mmap, + **kwargs, + ) + + elif is_transformers_model and is_clip_model_in_single_file(class_obj, checkpoint): + loaded_sub_model = create_diffusers_clip_model_from_ldm( + class_obj, + checkpoint=checkpoint, + config=cached_model_config_path, + subfolder=name, + torch_dtype=torch_dtype, + local_files_only=local_files_only, + is_legacy_loading=is_legacy_loading, + ) + + elif is_transformers_model and is_t5_in_single_file(checkpoint): + loaded_sub_model = create_diffusers_t5_model_from_checkpoint( + class_obj, + checkpoint=checkpoint, + config=cached_model_config_path, + subfolder=name, + torch_dtype=torch_dtype, + local_files_only=local_files_only, + ) + + elif is_tokenizer and is_legacy_loading: + loaded_sub_model = _legacy_load_clip_tokenizer( + class_obj, checkpoint=checkpoint, config=cached_model_config_path, local_files_only=local_files_only + ) + + elif is_diffusers_scheduler and (is_legacy_loading or _is_legacy_scheduler_kwargs(kwargs)): + loaded_sub_model = _legacy_load_scheduler( + class_obj, checkpoint=checkpoint, component_name=name, original_config=original_config, **kwargs + ) + + else: + if not hasattr(class_obj, "from_pretrained"): + raise ValueError( + ( + f"The component {class_obj.__name__} cannot be loaded as it does not seem to have" + " a supported loading method." + ) + ) + + loading_kwargs = {} + loading_kwargs.update( + { + "pretrained_model_name_or_path": cached_model_config_path, + "subfolder": name, + "local_files_only": local_files_only, + } + ) + + # Schedulers and Tokenizers don't make use of torch_dtype + # Skip passing it to those objects + if issubclass(class_obj, torch.nn.Module): + loading_kwargs.update({"torch_dtype": torch_dtype}) + + if is_diffusers_model or is_transformers_model: + if not _is_model_weights_in_cached_folder(cached_model_config_path, name): + raise SingleFileComponentError( + f"Failed to load {class_name}. Weights for this component appear to be missing in the checkpoint." + ) + + load_method = getattr(class_obj, "from_pretrained") + loaded_sub_model = load_method(**loading_kwargs) + + return loaded_sub_model + + +def _map_component_types_to_config_dict(component_types): + diffusers_module = importlib.import_module(__name__.split(".")[0]) + config_dict = {} + component_types.pop("self", None) + + if is_transformers_available(): + transformers_version = version.parse(version.parse(transformers.__version__).base_version) + else: + transformers_version = "N/A" + + for component_name, component_value in component_types.items(): + is_diffusers_model = issubclass(component_value[0], diffusers_module.ModelMixin) + is_scheduler_enum = component_value[0].__name__ == "KarrasDiffusionSchedulers" + is_scheduler = issubclass(component_value[0], diffusers_module.SchedulerMixin) + + is_transformers_model = ( + is_transformers_available() + and issubclass(component_value[0], PreTrainedModel) + and transformers_version >= version.parse("4.20.0") + ) + is_transformers_tokenizer = ( + is_transformers_available() + and issubclass(component_value[0], PreTrainedTokenizer) + and transformers_version >= version.parse("4.20.0") + ) + + if is_diffusers_model and component_name not in SINGLE_FILE_OPTIONAL_COMPONENTS: + config_dict[component_name] = ["diffusers", component_value[0].__name__] + + elif is_scheduler_enum or is_scheduler: + if is_scheduler_enum: + # Since we cannot fetch a scheduler config from the hub, we default to DDIMScheduler + # if the type hint is a KarrassDiffusionSchedulers enum + config_dict[component_name] = ["diffusers", "DDIMScheduler"] + + elif is_scheduler: + config_dict[component_name] = ["diffusers", component_value[0].__name__] + + elif ( + is_transformers_model or is_transformers_tokenizer + ) and component_name not in SINGLE_FILE_OPTIONAL_COMPONENTS: + config_dict[component_name] = ["transformers", component_value[0].__name__] + + else: + config_dict[component_name] = [None, None] + + return config_dict + + +def _infer_pipeline_config_dict(pipeline_class): + parameters = inspect.signature(pipeline_class.__init__).parameters + required_parameters = {k: v for k, v in parameters.items() if v.default == inspect._empty} + component_types = pipeline_class._get_signature_types() + + # Ignore parameters that are not required for the pipeline + component_types = {k: v for k, v in component_types.items() if k in required_parameters} + config_dict = _map_component_types_to_config_dict(component_types) + + return config_dict + + +def _download_diffusers_model_config_from_hub( + pretrained_model_name_or_path, + cache_dir, + revision, + proxies, + force_download=None, + local_files_only=None, + token=None, +): + allow_patterns = ["**/*.json", "*.json", "*.txt", "**/*.txt", "**/*.model"] + cached_model_path = snapshot_download( + pretrained_model_name_or_path, + cache_dir=cache_dir, + revision=revision, + proxies=proxies, + force_download=force_download, + local_files_only=local_files_only, + token=token, + allow_patterns=allow_patterns, + ) + + return cached_model_path + + +class FromSingleFileMixin: + """ + Load model weights saved in the `.ckpt` format into a [`DiffusionPipeline`]. + """ + + @classmethod + @validate_hf_hub_args + def from_single_file(cls, pretrained_model_link_or_path, **kwargs) -> Self: + r""" + Instantiate a [`DiffusionPipeline`] from pretrained pipeline weights saved in the `.ckpt` or `.safetensors` + format. The pipeline is set in evaluation mode (`model.eval()`) by default. + + Parameters: + pretrained_model_link_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + - A link to the `.ckpt` file (for example + `"https://huggingface.co//blob/main/.ckpt"`) on the Hub. + - A path to a *file* containing all pipeline weights. + torch_dtype (`str` or `torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model with another dtype. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + original_config_file (`str`, *optional*): + The path to the original config file that was used to train the model. If not provided, the config file + will be inferred from the checkpoint file. + config (`str`, *optional*): + Can be either: + - A string, the *repo id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline + hosted on the Hub. + - A path to a *directory* (for example `./my_pipeline_directory/`) containing the pipeline + component configs in Diffusers format. + disable_mmap ('bool', *optional*, defaults to 'False'): + Whether to disable mmap when loading a Safetensors model. This option can perform better when the model + is on a network mount or hard drive. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load and saveable variables (the pipeline components of the specific pipeline + class). The overwritten components are passed directly to the pipelines `__init__` method. See example + below for more information. + + Examples: + + ```py + >>> from diffusers import StableDiffusionPipeline + + >>> # Download pipeline from huggingface.co and cache. + >>> pipeline = StableDiffusionPipeline.from_single_file( + ... "https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors" + ... ) + + >>> # Download pipeline from local file + >>> # file is downloaded under ./v1-5-pruned-emaonly.ckpt + >>> pipeline = StableDiffusionPipeline.from_single_file("./v1-5-pruned-emaonly.ckpt") + + >>> # Enable float16 and move to GPU + >>> pipeline = StableDiffusionPipeline.from_single_file( + ... "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt", + ... torch_dtype=torch.float16, + ... ) + >>> pipeline.to("cuda") + ``` + + """ + original_config_file = kwargs.pop("original_config_file", None) + config = kwargs.pop("config", None) + original_config = kwargs.pop("original_config", None) + + if original_config_file is not None: + deprecation_message = ( + "`original_config_file` argument is deprecated and will be removed in future versions." + "please use the `original_config` argument instead." + ) + deprecate("original_config_file", "1.0.0", deprecation_message) + original_config = original_config_file + + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + token = kwargs.pop("token", None) + cache_dir = kwargs.pop("cache_dir", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + torch_dtype = kwargs.pop("torch_dtype", None) + disable_mmap = kwargs.pop("disable_mmap", False) + + is_legacy_loading = False + + if torch_dtype is not None and not isinstance(torch_dtype, torch.dtype): + torch_dtype = torch.float32 + logger.warning( + f"Passed `torch_dtype` {torch_dtype} is not a `torch.dtype`. Defaulting to `torch.float32`." + ) + + # We shouldn't allow configuring individual models components through a Pipeline creation method + # These model kwargs should be deprecated + scaling_factor = kwargs.get("scaling_factor", None) + if scaling_factor is not None: + deprecation_message = ( + "Passing the `scaling_factor` argument to `from_single_file is deprecated " + "and will be ignored in future versions." + ) + deprecate("scaling_factor", "1.0.0", deprecation_message) + + if original_config is not None: + original_config = fetch_original_config(original_config, local_files_only=local_files_only) + + from ..pipelines.pipeline_utils import _get_pipeline_class + + pipeline_class = _get_pipeline_class(cls, config=None) + + checkpoint = load_single_file_checkpoint( + pretrained_model_link_or_path, + force_download=force_download, + proxies=proxies, + token=token, + cache_dir=cache_dir, + local_files_only=local_files_only, + revision=revision, + disable_mmap=disable_mmap, + ) + + if config is None: + config = fetch_diffusers_config(checkpoint) + default_pretrained_model_config_name = config["pretrained_model_name_or_path"] + else: + default_pretrained_model_config_name = config + + if not os.path.isdir(default_pretrained_model_config_name): + # Provided config is a repo_id + if default_pretrained_model_config_name.count("/") > 1: + raise ValueError( + f'The provided config "{config}"' + " is neither a valid local path nor a valid repo id. Please check the parameter." + ) + try: + # Attempt to download the config files for the pipeline + cached_model_config_path = _download_diffusers_model_config_from_hub( + default_pretrained_model_config_name, + cache_dir=cache_dir, + revision=revision, + proxies=proxies, + force_download=force_download, + local_files_only=local_files_only, + token=token, + ) + config_dict = pipeline_class.load_config(cached_model_config_path) + + except LocalEntryNotFoundError: + # `local_files_only=True` but a local diffusers format model config is not available in the cache + # If `original_config` is not provided, we need override `local_files_only` to False + # to fetch the config files from the hub so that we have a way + # to configure the pipeline components. + + if original_config is None: + logger.warning( + "`local_files_only` is True but no local configs were found for this checkpoint.\n" + "Attempting to download the necessary config files for this pipeline.\n" + ) + cached_model_config_path = _download_diffusers_model_config_from_hub( + default_pretrained_model_config_name, + cache_dir=cache_dir, + revision=revision, + proxies=proxies, + force_download=force_download, + local_files_only=False, + token=token, + ) + config_dict = pipeline_class.load_config(cached_model_config_path) + + else: + # For backwards compatibility + # If `original_config` is provided, then we need to assume we are using legacy loading for pipeline components + logger.warning( + "Detected legacy `from_single_file` loading behavior. Attempting to create the pipeline based on inferred components.\n" + "This may lead to errors if the model components are not correctly inferred. \n" + "To avoid this warning, please explicitly pass the `config` argument to `from_single_file` with a path to a local diffusers model repo \n" + "e.g. `from_single_file(, config=) \n" + "or run `from_single_file` with `local_files_only=False` first to update the local cache directory with " + "the necessary config files.\n" + ) + is_legacy_loading = True + cached_model_config_path = None + + config_dict = _infer_pipeline_config_dict(pipeline_class) + config_dict["_class_name"] = pipeline_class.__name__ + + else: + # Provided config is a path to a local directory attempt to load directly. + cached_model_config_path = default_pretrained_model_config_name + config_dict = pipeline_class.load_config(cached_model_config_path) + + # pop out "_ignore_files" as it is only needed for download + config_dict.pop("_ignore_files", None) + + expected_modules, optional_kwargs = pipeline_class._get_signature_keys(cls) + passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs} + passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs} + + init_dict, unused_kwargs, _ = pipeline_class.extract_init_dict(config_dict, **kwargs) + init_kwargs = {k: init_dict.pop(k) for k in optional_kwargs if k in init_dict} + init_kwargs = {**init_kwargs, **passed_pipe_kwargs} + + from diffusers import pipelines + + # remove `null` components + def load_module(name, value): + if value[0] is None: + return False + if name in passed_class_obj and passed_class_obj[name] is None: + return False + if name in SINGLE_FILE_OPTIONAL_COMPONENTS: + return False + + return True + + init_dict = {k: v for k, v in init_dict.items() if load_module(k, v)} + + for name, (library_name, class_name) in logging.tqdm( + sorted(init_dict.items()), desc="Loading pipeline components..." + ): + loaded_sub_model = None + is_pipeline_module = hasattr(pipelines, library_name) + + if name in passed_class_obj: + loaded_sub_model = passed_class_obj[name] + + else: + try: + loaded_sub_model = load_single_file_sub_model( + library_name=library_name, + class_name=class_name, + name=name, + checkpoint=checkpoint, + is_pipeline_module=is_pipeline_module, + cached_model_config_path=cached_model_config_path, + pipelines=pipelines, + torch_dtype=torch_dtype, + original_config=original_config, + local_files_only=local_files_only, + is_legacy_loading=is_legacy_loading, + disable_mmap=disable_mmap, + **kwargs, + ) + except SingleFileComponentError as e: + raise SingleFileComponentError( + ( + f"{e.message}\n" + f"Please load the component before passing it in as an argument to `from_single_file`.\n" + f"\n" + f"{name} = {class_name}.from_pretrained('...')\n" + f"pipe = {pipeline_class.__name__}.from_single_file(, {name}={name})\n" + f"\n" + ) + ) + + init_kwargs[name] = loaded_sub_model + + missing_modules = set(expected_modules) - set(init_kwargs.keys()) + passed_modules = list(passed_class_obj.keys()) + optional_modules = pipeline_class._optional_components + + if len(missing_modules) > 0 and missing_modules <= set(passed_modules + optional_modules): + for module in missing_modules: + init_kwargs[module] = passed_class_obj.get(module, None) + elif len(missing_modules) > 0: + passed_modules = set(list(init_kwargs.keys()) + list(passed_class_obj.keys())) - optional_kwargs + raise ValueError( + f"Pipeline {pipeline_class} expected {expected_modules}, but only {passed_modules} were passed." + ) + + # deprecated kwargs + load_safety_checker = kwargs.pop("load_safety_checker", None) + if load_safety_checker is not None: + deprecation_message = ( + "Please pass instances of `StableDiffusionSafetyChecker` and `AutoImageProcessor`" + "using the `safety_checker` and `feature_extractor` arguments in `from_single_file`" + ) + deprecate("load_safety_checker", "1.0.0", deprecation_message) + + safety_checker_components = _legacy_load_safety_checker(local_files_only, torch_dtype) + init_kwargs.update(safety_checker_components) + + pipe = pipeline_class(**init_kwargs) + + return pipe diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file_model.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c7bb2de4437ac88b21b6a9b1825f14e70b01a28c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file_model.py @@ -0,0 +1,529 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import inspect +import re +from contextlib import nullcontext + +import torch +from huggingface_hub.utils import validate_hf_hub_args +from typing_extensions import Self + +from .. import __version__ +from ..models.model_loading_utils import _caching_allocator_warmup, _determine_device_map, _expand_device_map +from ..quantizers import DiffusersAutoQuantizer +from ..utils import deprecate, is_accelerate_available, is_torch_version, logging +from ..utils.torch_utils import empty_device_cache +from .single_file_utils import ( + SingleFileComponentError, + convert_animatediff_checkpoint_to_diffusers, + convert_auraflow_transformer_checkpoint_to_diffusers, + convert_autoencoder_dc_checkpoint_to_diffusers, + convert_chroma_transformer_checkpoint_to_diffusers, + convert_controlnet_checkpoint, + convert_cosmos_transformer_checkpoint_to_diffusers, + convert_flux2_transformer_checkpoint_to_diffusers, + convert_flux_transformer_checkpoint_to_diffusers, + convert_hidream_transformer_to_diffusers, + convert_hunyuan_video_transformer_to_diffusers, + convert_ldm_unet_checkpoint, + convert_ldm_vae_checkpoint, + convert_ltx2_audio_vae_to_diffusers, + convert_ltx2_transformer_to_diffusers, + convert_ltx2_vae_to_diffusers, + convert_ltx_transformer_checkpoint_to_diffusers, + convert_ltx_vae_checkpoint_to_diffusers, + convert_lumina2_to_diffusers, + convert_mochi_transformer_checkpoint_to_diffusers, + convert_sana_transformer_to_diffusers, + convert_sd3_transformer_checkpoint_to_diffusers, + convert_stable_cascade_unet_single_file_to_diffusers, + convert_wan_transformer_to_diffusers, + convert_wan_vae_to_diffusers, + convert_z_image_controlnet_checkpoint_to_diffusers, + convert_z_image_transformer_checkpoint_to_diffusers, + create_controlnet_diffusers_config_from_ldm, + create_unet_diffusers_config_from_ldm, + create_vae_diffusers_config_from_ldm, + fetch_diffusers_config, + fetch_original_config, + load_single_file_checkpoint, +) + + +logger = logging.get_logger(__name__) + + +if is_accelerate_available(): + from accelerate import dispatch_model, init_empty_weights + + from ..models.model_loading_utils import load_model_dict_into_meta + +if is_torch_version(">=", "1.9.0") and is_accelerate_available(): + _LOW_CPU_MEM_USAGE_DEFAULT = True +else: + _LOW_CPU_MEM_USAGE_DEFAULT = False + +SINGLE_FILE_LOADABLE_CLASSES = { + "StableCascadeUNet": { + "checkpoint_mapping_fn": convert_stable_cascade_unet_single_file_to_diffusers, + }, + "UNet2DConditionModel": { + "checkpoint_mapping_fn": convert_ldm_unet_checkpoint, + "config_mapping_fn": create_unet_diffusers_config_from_ldm, + "default_subfolder": "unet", + "legacy_kwargs": { + "num_in_channels": "in_channels", # Legacy kwargs supported by `from_single_file` mapped to new args + }, + }, + "AutoencoderKL": { + "checkpoint_mapping_fn": convert_ldm_vae_checkpoint, + "config_mapping_fn": create_vae_diffusers_config_from_ldm, + "default_subfolder": "vae", + }, + "ControlNetModel": { + "checkpoint_mapping_fn": convert_controlnet_checkpoint, + "config_mapping_fn": create_controlnet_diffusers_config_from_ldm, + }, + "SD3Transformer2DModel": { + "checkpoint_mapping_fn": convert_sd3_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "MotionAdapter": { + "checkpoint_mapping_fn": convert_animatediff_checkpoint_to_diffusers, + }, + "SparseControlNetModel": { + "checkpoint_mapping_fn": convert_animatediff_checkpoint_to_diffusers, + }, + "FluxTransformer2DModel": { + "checkpoint_mapping_fn": convert_flux_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "ChromaTransformer2DModel": { + "checkpoint_mapping_fn": convert_chroma_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "LTXVideoTransformer3DModel": { + "checkpoint_mapping_fn": convert_ltx_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "AutoencoderKLLTXVideo": { + "checkpoint_mapping_fn": convert_ltx_vae_checkpoint_to_diffusers, + "default_subfolder": "vae", + }, + "AutoencoderDC": {"checkpoint_mapping_fn": convert_autoencoder_dc_checkpoint_to_diffusers}, + "MochiTransformer3DModel": { + "checkpoint_mapping_fn": convert_mochi_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "HunyuanVideoTransformer3DModel": { + "checkpoint_mapping_fn": convert_hunyuan_video_transformer_to_diffusers, + "default_subfolder": "transformer", + }, + "AuraFlowTransformer2DModel": { + "checkpoint_mapping_fn": convert_auraflow_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "Lumina2Transformer2DModel": { + "checkpoint_mapping_fn": convert_lumina2_to_diffusers, + "default_subfolder": "transformer", + }, + "SanaTransformer2DModel": { + "checkpoint_mapping_fn": convert_sana_transformer_to_diffusers, + "default_subfolder": "transformer", + }, + "WanTransformer3DModel": { + "checkpoint_mapping_fn": convert_wan_transformer_to_diffusers, + "default_subfolder": "transformer", + }, + "WanVACETransformer3DModel": { + "checkpoint_mapping_fn": convert_wan_transformer_to_diffusers, + "default_subfolder": "transformer", + }, + "WanAnimateTransformer3DModel": { + "checkpoint_mapping_fn": convert_wan_transformer_to_diffusers, + "default_subfolder": "transformer", + }, + "AutoencoderKLWan": { + "checkpoint_mapping_fn": convert_wan_vae_to_diffusers, + "default_subfolder": "vae", + }, + "HiDreamImageTransformer2DModel": { + "checkpoint_mapping_fn": convert_hidream_transformer_to_diffusers, + "default_subfolder": "transformer", + }, + "CosmosTransformer3DModel": { + "checkpoint_mapping_fn": convert_cosmos_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "QwenImageTransformer2DModel": { + "checkpoint_mapping_fn": lambda checkpoint, **kwargs: checkpoint, + "default_subfolder": "transformer", + }, + "Flux2Transformer2DModel": { + "checkpoint_mapping_fn": convert_flux2_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "ZImageTransformer2DModel": { + "checkpoint_mapping_fn": convert_z_image_transformer_checkpoint_to_diffusers, + "default_subfolder": "transformer", + }, + "ZImageControlNetModel": { + "checkpoint_mapping_fn": convert_z_image_controlnet_checkpoint_to_diffusers, + }, + "LTX2VideoTransformer3DModel": { + "checkpoint_mapping_fn": convert_ltx2_transformer_to_diffusers, + "default_subfolder": "transformer", + }, + "AutoencoderKLLTX2Video": { + "checkpoint_mapping_fn": convert_ltx2_vae_to_diffusers, + "default_subfolder": "vae", + }, + "AutoencoderKLLTX2Audio": { + "checkpoint_mapping_fn": convert_ltx2_audio_vae_to_diffusers, + "default_subfolder": "audio_vae", + }, +} + + +def _should_convert_state_dict_to_diffusers(model_state_dict, checkpoint_state_dict): + model_state_dict_keys = set(model_state_dict.keys()) + checkpoint_state_dict_keys = set(checkpoint_state_dict.keys()) + is_subset = model_state_dict_keys.issubset(checkpoint_state_dict_keys) + is_match = model_state_dict_keys == checkpoint_state_dict_keys + return not (is_subset and is_match) + + +def _get_single_file_loadable_mapping_class(cls): + diffusers_module = importlib.import_module(__name__.split(".")[0]) + for loadable_class_str in SINGLE_FILE_LOADABLE_CLASSES: + loadable_class = getattr(diffusers_module, loadable_class_str) + + if issubclass(cls, loadable_class): + return loadable_class_str + + return None + + +def _get_mapping_function_kwargs(mapping_fn, **kwargs): + parameters = inspect.signature(mapping_fn).parameters + + mapping_kwargs = {} + for parameter in parameters: + if parameter in kwargs: + mapping_kwargs[parameter] = kwargs[parameter] + + return mapping_kwargs + + +class FromOriginalModelMixin: + """ + Load pretrained weights saved in the `.ckpt` or `.safetensors` format into a model. + """ + + @classmethod + @validate_hf_hub_args + def from_single_file(cls, pretrained_model_link_or_path_or_dict: str | None = None, **kwargs) -> Self: + r""" + Instantiate a model from pretrained weights saved in the original `.ckpt` or `.safetensors` format. The model + is set in evaluation mode (`model.eval()`) by default. + + Parameters: + pretrained_model_link_or_path_or_dict (`str`, *optional*): + Can be either: + - A link to the `.safetensors` or `.ckpt` file (for example + `"https://huggingface.co//blob/main/.safetensors"`) on the Hub. + - A path to a local *file* containing the weights of the component model. + - A state dict containing the component model weights. + config (`str`, *optional*): + - A string, the *repo id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline hosted + on the Hub. + - A path to a *directory* (for example `./my_pipeline_directory/`) containing the pipeline component + configs in Diffusers format. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + original_config (`str`, *optional*): + Dict or path to a yaml file containing the configuration for the model in its original format. + If a dict is provided, it will be used to initialize the model configuration. + torch_dtype (`torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model with another dtype. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to True, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 and + is_accelerate_available() else `False`): Speed up model loading only loading the pretrained weights and + not initializing the weights. This also tries to not use more than 1x model size in CPU memory + (including peak memory) while loading the model. Only supported for PyTorch >= 1.9.0. If you are using + an older version of PyTorch, setting this argument to `True` will raise an error. + disable_mmap ('bool', *optional*, defaults to 'False'): + Whether to disable mmap when loading a Safetensors model. This option can perform better when the model + is on a network mount or hard drive, which may not handle the seeky-ness of mmap very well. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load and saveable variables (for example the pipeline components of the + specific pipeline class). The overwritten components are directly passed to the pipelines `__init__` + method. See example below for more information. + + ```py + >>> from diffusers import StableCascadeUNet + + >>> ckpt_path = "https://huggingface.co/stabilityai/stable-cascade/blob/main/stage_b_lite.safetensors" + >>> model = StableCascadeUNet.from_single_file(ckpt_path) + ``` + """ + + mapping_class_name = _get_single_file_loadable_mapping_class(cls) + # if class_name not in SINGLE_FILE_LOADABLE_CLASSES: + if mapping_class_name is None: + raise ValueError( + f"FromOriginalModelMixin is currently only compatible with {', '.join(SINGLE_FILE_LOADABLE_CLASSES.keys())}" + ) + + pretrained_model_link_or_path = kwargs.get("pretrained_model_link_or_path", None) + if pretrained_model_link_or_path is not None: + deprecation_message = ( + "Please use `pretrained_model_link_or_path_or_dict` argument instead for model classes" + ) + deprecate("pretrained_model_link_or_path", "1.0.0", deprecation_message) + pretrained_model_link_or_path_or_dict = pretrained_model_link_or_path + + config = kwargs.pop("config", None) + original_config = kwargs.pop("original_config", None) + + if config is not None and original_config is not None: + raise ValueError( + "`from_single_file` cannot accept both `config` and `original_config` arguments. Please provide only one of these arguments" + ) + + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + token = kwargs.pop("token", None) + cache_dir = kwargs.pop("cache_dir", None) + local_files_only = kwargs.pop("local_files_only", None) + subfolder = kwargs.pop("subfolder", None) + revision = kwargs.pop("revision", None) + config_revision = kwargs.pop("config_revision", None) + torch_dtype = kwargs.pop("torch_dtype", None) + quantization_config = kwargs.pop("quantization_config", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + device = kwargs.pop("device", None) + disable_mmap = kwargs.pop("disable_mmap", False) + device_map = kwargs.pop("device_map", None) + + user_agent = {"diffusers": __version__, "file_type": "single_file", "framework": "pytorch"} + # In order to ensure popular quantization methods are supported. Can be disable with `disable_telemetry` + if quantization_config is not None: + user_agent["quant"] = quantization_config.quant_method.value + + if torch_dtype is not None and not isinstance(torch_dtype, torch.dtype): + torch_dtype = torch.float32 + logger.warning( + f"Passed `torch_dtype` {torch_dtype} is not a `torch.dtype`. Defaulting to `torch.float32`." + ) + + if isinstance(pretrained_model_link_or_path_or_dict, dict): + checkpoint = pretrained_model_link_or_path_or_dict + else: + checkpoint = load_single_file_checkpoint( + pretrained_model_link_or_path_or_dict, + force_download=force_download, + proxies=proxies, + token=token, + cache_dir=cache_dir, + local_files_only=local_files_only, + revision=revision, + disable_mmap=disable_mmap, + user_agent=user_agent, + ) + if quantization_config is not None: + hf_quantizer = DiffusersAutoQuantizer.from_config(quantization_config) + hf_quantizer.validate_environment() + torch_dtype = hf_quantizer.update_torch_dtype(torch_dtype) + + else: + hf_quantizer = None + + mapping_functions = SINGLE_FILE_LOADABLE_CLASSES[mapping_class_name] + + checkpoint_mapping_fn = mapping_functions["checkpoint_mapping_fn"] + if original_config is not None: + if "config_mapping_fn" in mapping_functions: + config_mapping_fn = mapping_functions["config_mapping_fn"] + else: + config_mapping_fn = None + + if config_mapping_fn is None: + raise ValueError( + ( + f"`original_config` has been provided for {mapping_class_name} but no mapping function" + "was found to convert the original config to a Diffusers config in" + "`diffusers.loaders.single_file_utils`" + ) + ) + + if isinstance(original_config, str): + # If original_config is a URL or filepath fetch the original_config dict + original_config = fetch_original_config(original_config, local_files_only=local_files_only) + + config_mapping_kwargs = _get_mapping_function_kwargs(config_mapping_fn, **kwargs) + diffusers_model_config = config_mapping_fn( + original_config=original_config, checkpoint=checkpoint, **config_mapping_kwargs + ) + else: + if config is not None: + if isinstance(config, str): + default_pretrained_model_config_name = config + else: + raise ValueError( + ( + "Invalid `config` argument. Please provide a string representing a repo id" + "or path to a local Diffusers model repo." + ) + ) + + else: + config = fetch_diffusers_config(checkpoint) + default_pretrained_model_config_name = config["pretrained_model_name_or_path"] + + if "default_subfolder" in mapping_functions: + subfolder = mapping_functions["default_subfolder"] + + subfolder = subfolder or config.pop( + "subfolder", None + ) # some configs contain a subfolder key, e.g. StableCascadeUNet + + diffusers_model_config = cls.load_config( + pretrained_model_name_or_path=default_pretrained_model_config_name, + subfolder=subfolder, + local_files_only=local_files_only, + token=token, + revision=config_revision, + ) + expected_kwargs, optional_kwargs = cls._get_signature_keys(cls) + + # Map legacy kwargs to new kwargs + if "legacy_kwargs" in mapping_functions: + legacy_kwargs = mapping_functions["legacy_kwargs"] + for legacy_key, new_key in legacy_kwargs.items(): + if legacy_key in kwargs: + kwargs[new_key] = kwargs.pop(legacy_key) + + model_kwargs = {k: kwargs.get(k) for k in kwargs if k in expected_kwargs or k in optional_kwargs} + diffusers_model_config.update(model_kwargs) + + ctx = init_empty_weights if low_cpu_mem_usage else nullcontext + with ctx(): + model = cls.from_config(diffusers_model_config) + + model_state_dict = model.state_dict() + + # Check if `_keep_in_fp32_modules` is not None + use_keep_in_fp32_modules = (cls._keep_in_fp32_modules is not None) and ( + (torch_dtype == torch.float16) or hasattr(hf_quantizer, "use_keep_in_fp32_modules") + ) + if use_keep_in_fp32_modules: + keep_in_fp32_modules = cls._keep_in_fp32_modules + if not isinstance(keep_in_fp32_modules, list): + keep_in_fp32_modules = [keep_in_fp32_modules] + + else: + keep_in_fp32_modules = [] + + # Now that the model is loaded, we can determine the `device_map` + device_map = _determine_device_map(model, device_map, None, torch_dtype, keep_in_fp32_modules, hf_quantizer) + if device_map is not None: + expanded_device_map = _expand_device_map(device_map, model_state_dict.keys()) + _caching_allocator_warmup(model, expanded_device_map, torch_dtype, hf_quantizer) + + checkpoint_mapping_kwargs = _get_mapping_function_kwargs(checkpoint_mapping_fn, **kwargs) + + if _should_convert_state_dict_to_diffusers(model_state_dict, checkpoint): + diffusers_format_checkpoint = checkpoint_mapping_fn( + config=diffusers_model_config, checkpoint=checkpoint, **checkpoint_mapping_kwargs + ) + else: + diffusers_format_checkpoint = checkpoint + + if not diffusers_format_checkpoint: + raise SingleFileComponentError( + f"Failed to load {mapping_class_name}. Weights for this component appear to be missing in the checkpoint." + ) + + if hf_quantizer is not None: + hf_quantizer.preprocess_model( + model=model, + device_map=None, + state_dict=diffusers_format_checkpoint, + keep_in_fp32_modules=keep_in_fp32_modules, + ) + + device_map = None + if low_cpu_mem_usage: + param_device = torch.device(device) if device else torch.device("cpu") + empty_state_dict = model.state_dict() + unexpected_keys = [ + param_name for param_name in diffusers_format_checkpoint if param_name not in empty_state_dict + ] + device_map = {"": param_device} + load_model_dict_into_meta( + model, + diffusers_format_checkpoint, + dtype=torch_dtype, + device_map=device_map, + hf_quantizer=hf_quantizer, + keep_in_fp32_modules=keep_in_fp32_modules, + unexpected_keys=unexpected_keys, + ) + empty_device_cache() + else: + _, unexpected_keys = model.load_state_dict(diffusers_format_checkpoint, strict=False) + + if model._keys_to_ignore_on_load_unexpected is not None: + for pat in model._keys_to_ignore_on_load_unexpected: + unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None] + + if len(unexpected_keys) > 0: + logger.warning( + f"Some weights of the model checkpoint were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}" + ) + + if hf_quantizer is not None: + hf_quantizer.postprocess_model(model) + model.hf_quantizer = hf_quantizer + + if torch_dtype is not None and hf_quantizer is None: + model.to(torch_dtype) + + model.eval() + + if device_map is not None: + device_map_kwargs = {"device_map": device_map} + dispatch_model(model, **device_map_kwargs) + + return model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5e11acb51c17d66f4ec9ea567662e6f23a302f04 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/single_file_utils.py @@ -0,0 +1,4161 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Conversion script for the Stable Diffusion checkpoints.""" + +import copy +import os +import re +from contextlib import nullcontext +from io import BytesIO +from urllib.parse import urlparse + +import requests +import torch +import yaml + +from ..models.modeling_utils import load_state_dict +from ..schedulers import ( + DDIMScheduler, + DPMSolverMultistepScheduler, + EDMDPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, +) +from ..utils import ( + SAFETENSORS_WEIGHTS_NAME, + WEIGHTS_NAME, + deprecate, + is_accelerate_available, + is_transformers_available, + logging, +) +from ..utils.constants import DIFFUSERS_REQUEST_TIMEOUT +from ..utils.hub_utils import _get_model_file +from ..utils.torch_utils import empty_device_cache + + +if is_transformers_available(): + from transformers import AutoImageProcessor + +if is_accelerate_available(): + from accelerate import init_empty_weights + + from ..models.model_loading_utils import load_model_dict_into_meta + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + +CHECKPOINT_KEY_NAMES = { + "v1": "model.diffusion_model.output_blocks.11.0.skip_connection.weight", + "v2": "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight", + "xl_base": "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.bias", + "xl_refiner": "conditioner.embedders.0.model.transformer.resblocks.9.mlp.c_proj.bias", + "upscale": "model.diffusion_model.input_blocks.10.0.skip_connection.bias", + "controlnet": [ + "control_model.time_embed.0.weight", + "controlnet_cond_embedding.conv_in.weight", + ], + # TODO: find non-Diffusers keys for controlnet_xl + "controlnet_xl": "add_embedding.linear_1.weight", + "controlnet_xl_large": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", + "controlnet_xl_mid": "down_blocks.1.attentions.0.norm.weight", + "playground-v2-5": "edm_mean", + "inpainting": "model.diffusion_model.input_blocks.0.0.weight", + "clip": "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight", + "clip_sdxl": "conditioner.embedders.0.transformer.text_model.embeddings.position_embedding.weight", + "clip_sd3": "text_encoders.clip_l.transformer.text_model.embeddings.position_embedding.weight", + "open_clip": "cond_stage_model.model.token_embedding.weight", + "open_clip_sdxl": "conditioner.embedders.1.model.positional_embedding", + "open_clip_sdxl_refiner": "conditioner.embedders.0.model.text_projection", + "open_clip_sd3": "text_encoders.clip_g.transformer.text_model.embeddings.position_embedding.weight", + "stable_cascade_stage_b": "down_blocks.1.0.channelwise.0.weight", + "stable_cascade_stage_c": "clip_txt_mapper.weight", + "sd3": [ + "joint_blocks.0.context_block.adaLN_modulation.1.bias", + "model.diffusion_model.joint_blocks.0.context_block.adaLN_modulation.1.bias", + ], + "sd35_large": [ + "joint_blocks.37.x_block.mlp.fc1.weight", + "model.diffusion_model.joint_blocks.37.x_block.mlp.fc1.weight", + ], + "animatediff": "down_blocks.0.motion_modules.0.temporal_transformer.transformer_blocks.0.attention_blocks.0.pos_encoder.pe", + "animatediff_v2": "mid_block.motion_modules.0.temporal_transformer.norm.bias", + "animatediff_sdxl_beta": "up_blocks.2.motion_modules.0.temporal_transformer.norm.weight", + "animatediff_scribble": "controlnet_cond_embedding.conv_in.weight", + "animatediff_rgb": "controlnet_cond_embedding.weight", + "auraflow": [ + "double_layers.0.attn.w2q.weight", + "double_layers.0.attn.w1q.weight", + "cond_seq_linear.weight", + "t_embedder.mlp.0.weight", + ], + "flux": [ + "double_blocks.0.img_attn.norm.key_norm.scale", + "model.diffusion_model.double_blocks.0.img_attn.norm.key_norm.scale", + ], + "ltx-video": [ + "model.diffusion_model.patchify_proj.weight", + "model.diffusion_model.transformer_blocks.27.scale_shift_table", + "patchify_proj.weight", + "transformer_blocks.27.scale_shift_table", + "vae.decoder.last_scale_shift_table", # 0.9.1, 0.9.5, 0.9.7, 0.9.8 + "vae.decoder.up_blocks.9.res_blocks.0.conv1.conv.weight", # 0.9.0 + ], + "autoencoder-dc": "decoder.stages.1.op_list.0.main.conv.conv.bias", + "autoencoder-dc-sana": "encoder.project_in.conv.bias", + "mochi-1-preview": ["model.diffusion_model.blocks.0.attn.qkv_x.weight", "blocks.0.attn.qkv_x.weight"], + "hunyuan-video": "txt_in.individual_token_refiner.blocks.0.adaLN_modulation.1.bias", + "instruct-pix2pix": "model.diffusion_model.input_blocks.0.0.weight", + "lumina2": ["model.diffusion_model.cap_embedder.0.weight", "cap_embedder.0.weight"], + "z-image-turbo": [ + "model.diffusion_model.layers.0.adaLN_modulation.0.weight", + "layers.0.adaLN_modulation.0.weight", + ], + "z-image-turbo-controlnet": "control_all_x_embedder.2-1.weight", + "z-image-turbo-controlnet-2.x": "control_layers.14.adaLN_modulation.0.weight", + "sana": [ + "blocks.0.cross_attn.q_linear.weight", + "blocks.0.cross_attn.q_linear.bias", + "blocks.0.cross_attn.kv_linear.weight", + "blocks.0.cross_attn.kv_linear.bias", + ], + "wan": ["model.diffusion_model.head.modulation", "head.modulation"], + "wan_vae": "decoder.middle.0.residual.0.gamma", + "wan_vace": "vace_blocks.0.after_proj.bias", + "wan_animate": "motion_encoder.dec.direction.weight", + "hidream": "double_stream_blocks.0.block.adaLN_modulation.1.bias", + "cosmos-1.0": [ + "net.x_embedder.proj.1.weight", + "net.blocks.block1.blocks.0.block.attn.to_q.0.weight", + "net.extra_pos_embedder.pos_emb_h", + ], + "cosmos-2.0": [ + "net.x_embedder.proj.1.weight", + "net.blocks.0.self_attn.q_proj.weight", + "net.pos_embedder.dim_spatial_range", + ], + "flux2": ["model.diffusion_model.single_stream_modulation.lin.weight", "single_stream_modulation.lin.weight"], + "ltx2": [ + "model.diffusion_model.av_ca_a2v_gate_adaln_single.emb.timestep_embedder.linear_1.weight", + "vae.per_channel_statistics.mean-of-means", + "audio_vae.per_channel_statistics.mean-of-means", + ], +} + +DIFFUSERS_DEFAULT_PIPELINE_PATHS = { + "xl_base": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0"}, + "xl_refiner": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-refiner-1.0"}, + "xl_inpaint": {"pretrained_model_name_or_path": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1"}, + "playground-v2-5": {"pretrained_model_name_or_path": "playgroundai/playground-v2.5-1024px-aesthetic"}, + "upscale": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-x4-upscaler"}, + "inpainting": {"pretrained_model_name_or_path": "stable-diffusion-v1-5/stable-diffusion-inpainting"}, + "inpainting_v2": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-2-inpainting"}, + "controlnet": {"pretrained_model_name_or_path": "lllyasviel/control_v11p_sd15_canny"}, + "controlnet_xl_large": {"pretrained_model_name_or_path": "diffusers/controlnet-canny-sdxl-1.0"}, + "controlnet_xl_mid": {"pretrained_model_name_or_path": "diffusers/controlnet-canny-sdxl-1.0-mid"}, + "controlnet_xl_small": {"pretrained_model_name_or_path": "diffusers/controlnet-canny-sdxl-1.0-small"}, + "v2": {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-2-1"}, + "v1": {"pretrained_model_name_or_path": "stable-diffusion-v1-5/stable-diffusion-v1-5"}, + "stable_cascade_stage_b": {"pretrained_model_name_or_path": "stabilityai/stable-cascade", "subfolder": "decoder"}, + "stable_cascade_stage_b_lite": { + "pretrained_model_name_or_path": "stabilityai/stable-cascade", + "subfolder": "decoder_lite", + }, + "stable_cascade_stage_c": { + "pretrained_model_name_or_path": "stabilityai/stable-cascade-prior", + "subfolder": "prior", + }, + "stable_cascade_stage_c_lite": { + "pretrained_model_name_or_path": "stabilityai/stable-cascade-prior", + "subfolder": "prior_lite", + }, + "sd3": { + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-3-medium-diffusers", + }, + "sd35_large": { + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-3.5-large", + }, + "sd35_medium": { + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-3.5-medium", + }, + "animatediff_v1": {"pretrained_model_name_or_path": "guoyww/animatediff-motion-adapter-v1-5"}, + "animatediff_v2": {"pretrained_model_name_or_path": "guoyww/animatediff-motion-adapter-v1-5-2"}, + "animatediff_v3": {"pretrained_model_name_or_path": "guoyww/animatediff-motion-adapter-v1-5-3"}, + "animatediff_sdxl_beta": {"pretrained_model_name_or_path": "guoyww/animatediff-motion-adapter-sdxl-beta"}, + "animatediff_scribble": {"pretrained_model_name_or_path": "guoyww/animatediff-sparsectrl-scribble"}, + "animatediff_rgb": {"pretrained_model_name_or_path": "guoyww/animatediff-sparsectrl-rgb"}, + "auraflow": {"pretrained_model_name_or_path": "fal/AuraFlow-v0.3"}, + "flux-dev": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.1-dev"}, + "flux-fill": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.1-Fill-dev"}, + "flux-depth": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.1-Depth-dev"}, + "flux-schnell": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.1-schnell"}, + "flux-2-dev": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.2-dev"}, + "ltx-video": {"pretrained_model_name_or_path": "diffusers/LTX-Video-0.9.0"}, + "ltx-video-0.9.1": {"pretrained_model_name_or_path": "diffusers/LTX-Video-0.9.1"}, + "ltx-video-0.9.5": {"pretrained_model_name_or_path": "Lightricks/LTX-Video-0.9.5"}, + "ltx-video-0.9.7": {"pretrained_model_name_or_path": "Lightricks/LTX-Video-0.9.7-dev"}, + "autoencoder-dc-f128c512": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f128c512-mix-1.0-diffusers"}, + "autoencoder-dc-f64c128": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f64c128-mix-1.0-diffusers"}, + "autoencoder-dc-f32c32": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f32c32-mix-1.0-diffusers"}, + "autoencoder-dc-f32c32-sana": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers"}, + "mochi-1-preview": {"pretrained_model_name_or_path": "genmo/mochi-1-preview"}, + "hunyuan-video": {"pretrained_model_name_or_path": "hunyuanvideo-community/HunyuanVideo"}, + "instruct-pix2pix": {"pretrained_model_name_or_path": "timbrooks/instruct-pix2pix"}, + "lumina2": {"pretrained_model_name_or_path": "Alpha-VLLM/Lumina-Image-2.0"}, + "sana": {"pretrained_model_name_or_path": "Efficient-Large-Model/Sana_1600M_1024px_diffusers"}, + "wan-t2v-1.3B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"}, + "wan-t2v-14B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.1-T2V-14B-Diffusers"}, + "wan-i2v-14B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"}, + "wan-animate-14B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.2-Animate-14B-Diffusers"}, + "wan-vace-1.3B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.1-VACE-1.3B-diffusers"}, + "wan-vace-14B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.1-VACE-14B-diffusers"}, + "hidream": {"pretrained_model_name_or_path": "HiDream-ai/HiDream-I1-Dev"}, + "cosmos-1.0-t2w-7B": {"pretrained_model_name_or_path": "nvidia/Cosmos-1.0-Diffusion-7B-Text2World"}, + "cosmos-1.0-t2w-14B": {"pretrained_model_name_or_path": "nvidia/Cosmos-1.0-Diffusion-14B-Text2World"}, + "cosmos-1.0-v2w-7B": {"pretrained_model_name_or_path": "nvidia/Cosmos-1.0-Diffusion-7B-Video2World"}, + "cosmos-1.0-v2w-14B": {"pretrained_model_name_or_path": "nvidia/Cosmos-1.0-Diffusion-14B-Video2World"}, + "cosmos-2.0-t2i-2B": {"pretrained_model_name_or_path": "nvidia/Cosmos-Predict2-2B-Text2Image"}, + "cosmos-2.0-t2i-14B": {"pretrained_model_name_or_path": "nvidia/Cosmos-Predict2-14B-Text2Image"}, + "cosmos-2.0-v2w-2B": {"pretrained_model_name_or_path": "nvidia/Cosmos-Predict2-2B-Video2World"}, + "cosmos-2.0-v2w-14B": {"pretrained_model_name_or_path": "nvidia/Cosmos-Predict2-14B-Video2World"}, + "z-image-turbo": {"pretrained_model_name_or_path": "Tongyi-MAI/Z-Image-Turbo"}, + "z-image-turbo-controlnet": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union"}, + "z-image-turbo-controlnet-2.0": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union-2.0"}, + "z-image-turbo-controlnet-2.1": {"pretrained_model_name_or_path": "hlky/Z-Image-Turbo-Fun-Controlnet-Union-2.1"}, + "ltx2-dev": {"pretrained_model_name_or_path": "Lightricks/LTX-2"}, +} + +# Use to configure model sample size when original config is provided +DIFFUSERS_TO_LDM_DEFAULT_IMAGE_SIZE_MAP = { + "xl_base": 1024, + "xl_refiner": 1024, + "xl_inpaint": 1024, + "playground-v2-5": 1024, + "upscale": 512, + "inpainting": 512, + "inpainting_v2": 512, + "controlnet": 512, + "instruct-pix2pix": 512, + "v2": 768, + "v1": 512, +} + + +DIFFUSERS_TO_LDM_MAPPING = { + "unet": { + "layers": { + "time_embedding.linear_1.weight": "time_embed.0.weight", + "time_embedding.linear_1.bias": "time_embed.0.bias", + "time_embedding.linear_2.weight": "time_embed.2.weight", + "time_embedding.linear_2.bias": "time_embed.2.bias", + "conv_in.weight": "input_blocks.0.0.weight", + "conv_in.bias": "input_blocks.0.0.bias", + "conv_norm_out.weight": "out.0.weight", + "conv_norm_out.bias": "out.0.bias", + "conv_out.weight": "out.2.weight", + "conv_out.bias": "out.2.bias", + }, + "class_embed_type": { + "class_embedding.linear_1.weight": "label_emb.0.0.weight", + "class_embedding.linear_1.bias": "label_emb.0.0.bias", + "class_embedding.linear_2.weight": "label_emb.0.2.weight", + "class_embedding.linear_2.bias": "label_emb.0.2.bias", + }, + "addition_embed_type": { + "add_embedding.linear_1.weight": "label_emb.0.0.weight", + "add_embedding.linear_1.bias": "label_emb.0.0.bias", + "add_embedding.linear_2.weight": "label_emb.0.2.weight", + "add_embedding.linear_2.bias": "label_emb.0.2.bias", + }, + }, + "controlnet": { + "layers": { + "time_embedding.linear_1.weight": "time_embed.0.weight", + "time_embedding.linear_1.bias": "time_embed.0.bias", + "time_embedding.linear_2.weight": "time_embed.2.weight", + "time_embedding.linear_2.bias": "time_embed.2.bias", + "conv_in.weight": "input_blocks.0.0.weight", + "conv_in.bias": "input_blocks.0.0.bias", + "controlnet_cond_embedding.conv_in.weight": "input_hint_block.0.weight", + "controlnet_cond_embedding.conv_in.bias": "input_hint_block.0.bias", + "controlnet_cond_embedding.conv_out.weight": "input_hint_block.14.weight", + "controlnet_cond_embedding.conv_out.bias": "input_hint_block.14.bias", + }, + "class_embed_type": { + "class_embedding.linear_1.weight": "label_emb.0.0.weight", + "class_embedding.linear_1.bias": "label_emb.0.0.bias", + "class_embedding.linear_2.weight": "label_emb.0.2.weight", + "class_embedding.linear_2.bias": "label_emb.0.2.bias", + }, + "addition_embed_type": { + "add_embedding.linear_1.weight": "label_emb.0.0.weight", + "add_embedding.linear_1.bias": "label_emb.0.0.bias", + "add_embedding.linear_2.weight": "label_emb.0.2.weight", + "add_embedding.linear_2.bias": "label_emb.0.2.bias", + }, + }, + "vae": { + "encoder.conv_in.weight": "encoder.conv_in.weight", + "encoder.conv_in.bias": "encoder.conv_in.bias", + "encoder.conv_out.weight": "encoder.conv_out.weight", + "encoder.conv_out.bias": "encoder.conv_out.bias", + "encoder.conv_norm_out.weight": "encoder.norm_out.weight", + "encoder.conv_norm_out.bias": "encoder.norm_out.bias", + "decoder.conv_in.weight": "decoder.conv_in.weight", + "decoder.conv_in.bias": "decoder.conv_in.bias", + "decoder.conv_out.weight": "decoder.conv_out.weight", + "decoder.conv_out.bias": "decoder.conv_out.bias", + "decoder.conv_norm_out.weight": "decoder.norm_out.weight", + "decoder.conv_norm_out.bias": "decoder.norm_out.bias", + "quant_conv.weight": "quant_conv.weight", + "quant_conv.bias": "quant_conv.bias", + "post_quant_conv.weight": "post_quant_conv.weight", + "post_quant_conv.bias": "post_quant_conv.bias", + }, + "openclip": { + "layers": { + "text_model.embeddings.position_embedding.weight": "positional_embedding", + "text_model.embeddings.token_embedding.weight": "token_embedding.weight", + "text_model.final_layer_norm.weight": "ln_final.weight", + "text_model.final_layer_norm.bias": "ln_final.bias", + "text_projection.weight": "text_projection", + }, + "transformer": { + "text_model.encoder.layers.": "resblocks.", + "layer_norm1": "ln_1", + "layer_norm2": "ln_2", + ".fc1.": ".c_fc.", + ".fc2.": ".c_proj.", + ".self_attn": ".attn", + "transformer.text_model.final_layer_norm.": "ln_final.", + "transformer.text_model.embeddings.token_embedding.weight": "token_embedding.weight", + "transformer.text_model.embeddings.position_embedding.weight": "positional_embedding", + }, + }, +} + +SD_2_TEXT_ENCODER_KEYS_TO_IGNORE = [ + "cond_stage_model.model.transformer.resblocks.23.attn.in_proj_bias", + "cond_stage_model.model.transformer.resblocks.23.attn.in_proj_weight", + "cond_stage_model.model.transformer.resblocks.23.attn.out_proj.bias", + "cond_stage_model.model.transformer.resblocks.23.attn.out_proj.weight", + "cond_stage_model.model.transformer.resblocks.23.ln_1.bias", + "cond_stage_model.model.transformer.resblocks.23.ln_1.weight", + "cond_stage_model.model.transformer.resblocks.23.ln_2.bias", + "cond_stage_model.model.transformer.resblocks.23.ln_2.weight", + "cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.bias", + "cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.weight", + "cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.bias", + "cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.weight", + "cond_stage_model.model.text_projection", +] + +# To support legacy scheduler_type argument +SCHEDULER_DEFAULT_CONFIG = { + "beta_schedule": "scaled_linear", + "beta_start": 0.00085, + "beta_end": 0.012, + "interpolation_type": "linear", + "num_train_timesteps": 1000, + "prediction_type": "epsilon", + "sample_max_value": 1.0, + "set_alpha_to_one": False, + "skip_prk_steps": True, + "steps_offset": 1, + "timestep_spacing": "leading", +} + +LDM_VAE_KEYS = ["first_stage_model.", "vae."] +LDM_VAE_DEFAULT_SCALING_FACTOR = 0.18215 +PLAYGROUND_VAE_SCALING_FACTOR = 0.5 +LDM_UNET_KEY = "model.diffusion_model." +LDM_CONTROLNET_KEY = "control_model." +LDM_CLIP_PREFIX_TO_REMOVE = [ + "cond_stage_model.transformer.", + "conditioner.embedders.0.transformer.", +] +LDM_OPEN_CLIP_TEXT_PROJECTION_DIM = 1024 +SCHEDULER_LEGACY_KWARGS = ["prediction_type", "scheduler_type"] + +VALID_URL_PREFIXES = ["https://huggingface.co/", "huggingface.co/", "hf.co/", "https://hf.co/"] + + +class SingleFileComponentError(Exception): + def __init__(self, message=None): + self.message = message + super().__init__(self.message) + + +def is_valid_url(url): + result = urlparse(url) + if result.scheme and result.netloc: + return True + + return False + + +def _is_single_file_path_or_url(pretrained_model_name_or_path): + if not os.path.isfile(pretrained_model_name_or_path) or not is_valid_url(pretrained_model_name_or_path): + return False + + repo_id, weight_name = _extract_repo_id_and_weights_name(pretrained_model_name_or_path) + return bool(repo_id and weight_name) + + +def _extract_repo_id_and_weights_name(pretrained_model_name_or_path): + if not is_valid_url(pretrained_model_name_or_path): + raise ValueError("Invalid `pretrained_model_name_or_path` provided. Please set it to a valid URL.") + + pattern = r"([^/]+)/([^/]+)/(?:blob/main/)?(.+)" + weights_name = None + repo_id = (None,) + for prefix in VALID_URL_PREFIXES: + pretrained_model_name_or_path = pretrained_model_name_or_path.replace(prefix, "") + match = re.match(pattern, pretrained_model_name_or_path) + if not match: + return repo_id, weights_name + + repo_id = f"{match.group(1)}/{match.group(2)}" + weights_name = match.group(3) + + return repo_id, weights_name + + +def _is_model_weights_in_cached_folder(cached_folder, name): + pretrained_model_name_or_path = os.path.join(cached_folder, name) + weights_exist = False + + for weights_name in [WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME]: + if os.path.isfile(os.path.join(pretrained_model_name_or_path, weights_name)): + weights_exist = True + + return weights_exist + + +def _is_legacy_scheduler_kwargs(kwargs): + return any(k in SCHEDULER_LEGACY_KWARGS for k in kwargs.keys()) + + +def load_single_file_checkpoint( + pretrained_model_link_or_path, + force_download=False, + proxies=None, + token=None, + cache_dir=None, + local_files_only=None, + revision=None, + disable_mmap=False, + user_agent=None, +): + if user_agent is None: + user_agent = {"file_type": "single_file", "framework": "pytorch"} + + if os.path.isfile(pretrained_model_link_or_path): + pretrained_model_link_or_path = pretrained_model_link_or_path + + else: + repo_id, weights_name = _extract_repo_id_and_weights_name(pretrained_model_link_or_path) + pretrained_model_link_or_path = _get_model_file( + repo_id, + weights_name=weights_name, + force_download=force_download, + cache_dir=cache_dir, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + user_agent=user_agent, + ) + + checkpoint = load_state_dict(pretrained_model_link_or_path, disable_mmap=disable_mmap) + + # some checkpoints contain the model state dict under a "state_dict" key + while "state_dict" in checkpoint: + checkpoint = checkpoint["state_dict"] + + return checkpoint + + +def fetch_original_config(original_config_file, local_files_only=False): + if os.path.isfile(original_config_file): + with open(original_config_file, "r") as fp: + original_config_file = fp.read() + + elif is_valid_url(original_config_file): + if local_files_only: + raise ValueError( + "`local_files_only` is set to True, but a URL was provided as `original_config_file`. " + "Please provide a valid local file path." + ) + + original_config_file = BytesIO(requests.get(original_config_file, timeout=DIFFUSERS_REQUEST_TIMEOUT).content) + + else: + raise ValueError("Invalid `original_config_file` provided. Please set it to a valid file path or URL.") + + original_config = yaml.safe_load(original_config_file) + + return original_config + + +def is_clip_model(checkpoint): + if CHECKPOINT_KEY_NAMES["clip"] in checkpoint: + return True + + return False + + +def is_clip_sdxl_model(checkpoint): + if CHECKPOINT_KEY_NAMES["clip_sdxl"] in checkpoint: + return True + + return False + + +def is_clip_sd3_model(checkpoint): + if CHECKPOINT_KEY_NAMES["clip_sd3"] in checkpoint: + return True + + return False + + +def is_open_clip_model(checkpoint): + if CHECKPOINT_KEY_NAMES["open_clip"] in checkpoint: + return True + + return False + + +def is_open_clip_sdxl_model(checkpoint): + if CHECKPOINT_KEY_NAMES["open_clip_sdxl"] in checkpoint: + return True + + return False + + +def is_open_clip_sd3_model(checkpoint): + if CHECKPOINT_KEY_NAMES["open_clip_sd3"] in checkpoint: + return True + + return False + + +def is_open_clip_sdxl_refiner_model(checkpoint): + if CHECKPOINT_KEY_NAMES["open_clip_sdxl_refiner"] in checkpoint: + return True + + return False + + +def is_clip_model_in_single_file(class_obj, checkpoint): + is_clip_in_checkpoint = any( + [ + is_clip_model(checkpoint), + is_clip_sd3_model(checkpoint), + is_open_clip_model(checkpoint), + is_open_clip_sdxl_model(checkpoint), + is_open_clip_sdxl_refiner_model(checkpoint), + is_open_clip_sd3_model(checkpoint), + ] + ) + if ( + class_obj.__name__ == "CLIPTextModel" or class_obj.__name__ == "CLIPTextModelWithProjection" + ) and is_clip_in_checkpoint: + return True + + return False + + +def infer_diffusers_model_type(checkpoint): + if ( + CHECKPOINT_KEY_NAMES["inpainting"] in checkpoint + and checkpoint[CHECKPOINT_KEY_NAMES["inpainting"]].shape[1] == 9 + ): + if CHECKPOINT_KEY_NAMES["v2"] in checkpoint and checkpoint[CHECKPOINT_KEY_NAMES["v2"]].shape[-1] == 1024: + model_type = "inpainting_v2" + elif CHECKPOINT_KEY_NAMES["xl_base"] in checkpoint: + model_type = "xl_inpaint" + else: + model_type = "inpainting" + + elif CHECKPOINT_KEY_NAMES["v2"] in checkpoint and checkpoint[CHECKPOINT_KEY_NAMES["v2"]].shape[-1] == 1024: + model_type = "v2" + + elif CHECKPOINT_KEY_NAMES["playground-v2-5"] in checkpoint: + model_type = "playground-v2-5" + + elif CHECKPOINT_KEY_NAMES["xl_base"] in checkpoint: + model_type = "xl_base" + + elif CHECKPOINT_KEY_NAMES["xl_refiner"] in checkpoint: + model_type = "xl_refiner" + + elif CHECKPOINT_KEY_NAMES["upscale"] in checkpoint: + model_type = "upscale" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["controlnet"]): + if CHECKPOINT_KEY_NAMES["controlnet_xl"] in checkpoint: + if CHECKPOINT_KEY_NAMES["controlnet_xl_large"] in checkpoint: + model_type = "controlnet_xl_large" + elif CHECKPOINT_KEY_NAMES["controlnet_xl_mid"] in checkpoint: + model_type = "controlnet_xl_mid" + else: + model_type = "controlnet_xl_small" + else: + model_type = "controlnet" + + elif ( + CHECKPOINT_KEY_NAMES["stable_cascade_stage_c"] in checkpoint + and checkpoint[CHECKPOINT_KEY_NAMES["stable_cascade_stage_c"]].shape[0] == 1536 + ): + model_type = "stable_cascade_stage_c_lite" + + elif ( + CHECKPOINT_KEY_NAMES["stable_cascade_stage_c"] in checkpoint + and checkpoint[CHECKPOINT_KEY_NAMES["stable_cascade_stage_c"]].shape[0] == 2048 + ): + model_type = "stable_cascade_stage_c" + + elif ( + CHECKPOINT_KEY_NAMES["stable_cascade_stage_b"] in checkpoint + and checkpoint[CHECKPOINT_KEY_NAMES["stable_cascade_stage_b"]].shape[-1] == 576 + ): + model_type = "stable_cascade_stage_b_lite" + + elif ( + CHECKPOINT_KEY_NAMES["stable_cascade_stage_b"] in checkpoint + and checkpoint[CHECKPOINT_KEY_NAMES["stable_cascade_stage_b"]].shape[-1] == 640 + ): + model_type = "stable_cascade_stage_b" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["sd3"]) and any( + checkpoint[key].shape[-1] == 9216 if key in checkpoint else False for key in CHECKPOINT_KEY_NAMES["sd3"] + ): + if "model.diffusion_model.pos_embed" in checkpoint: + key = "model.diffusion_model.pos_embed" + else: + key = "pos_embed" + + if checkpoint[key].shape[1] == 36864: + model_type = "sd3" + elif checkpoint[key].shape[1] == 147456: + model_type = "sd35_medium" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["sd35_large"]): + model_type = "sd35_large" + + elif CHECKPOINT_KEY_NAMES["animatediff"] in checkpoint: + if CHECKPOINT_KEY_NAMES["animatediff_scribble"] in checkpoint: + model_type = "animatediff_scribble" + + elif CHECKPOINT_KEY_NAMES["animatediff_rgb"] in checkpoint: + model_type = "animatediff_rgb" + + elif CHECKPOINT_KEY_NAMES["animatediff_v2"] in checkpoint: + model_type = "animatediff_v2" + + elif checkpoint[CHECKPOINT_KEY_NAMES["animatediff_sdxl_beta"]].shape[-1] == 320: + model_type = "animatediff_sdxl_beta" + + elif checkpoint[CHECKPOINT_KEY_NAMES["animatediff"]].shape[1] == 24: + model_type = "animatediff_v1" + + else: + model_type = "animatediff_v3" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["flux2"]): + model_type = "flux-2-dev" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["flux"]): + if any( + g in checkpoint for g in ["guidance_in.in_layer.bias", "model.diffusion_model.guidance_in.in_layer.bias"] + ): + if "model.diffusion_model.img_in.weight" in checkpoint: + key = "model.diffusion_model.img_in.weight" + else: + key = "img_in.weight" + + if checkpoint[key].shape[1] == 384: + model_type = "flux-fill" + elif checkpoint[key].shape[1] == 128: + model_type = "flux-depth" + else: + model_type = "flux-dev" + else: + model_type = "flux-schnell" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["ltx-video"]): + has_vae = "vae.encoder.conv_in.conv.bias" in checkpoint + if any(key.endswith("transformer_blocks.47.scale_shift_table") for key in checkpoint): + model_type = "ltx-video-0.9.7" + elif has_vae and checkpoint["vae.encoder.conv_out.conv.weight"].shape[1] == 2048: + model_type = "ltx-video-0.9.5" + elif "vae.decoder.last_time_embedder.timestep_embedder.linear_1.weight" in checkpoint: + model_type = "ltx-video-0.9.1" + else: + model_type = "ltx-video" + + elif CHECKPOINT_KEY_NAMES["autoencoder-dc"] in checkpoint: + encoder_key = "encoder.project_in.conv.conv.bias" + decoder_key = "decoder.project_in.main.conv.weight" + + if CHECKPOINT_KEY_NAMES["autoencoder-dc-sana"] in checkpoint: + model_type = "autoencoder-dc-f32c32-sana" + + elif checkpoint[encoder_key].shape[-1] == 64 and checkpoint[decoder_key].shape[1] == 32: + model_type = "autoencoder-dc-f32c32" + + elif checkpoint[encoder_key].shape[-1] == 64 and checkpoint[decoder_key].shape[1] == 128: + model_type = "autoencoder-dc-f64c128" + + else: + model_type = "autoencoder-dc-f128c512" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["mochi-1-preview"]): + model_type = "mochi-1-preview" + + elif CHECKPOINT_KEY_NAMES["hunyuan-video"] in checkpoint: + model_type = "hunyuan-video" + + elif all(key in checkpoint for key in CHECKPOINT_KEY_NAMES["auraflow"]): + model_type = "auraflow" + + elif ( + CHECKPOINT_KEY_NAMES["instruct-pix2pix"] in checkpoint + and checkpoint[CHECKPOINT_KEY_NAMES["instruct-pix2pix"]].shape[1] == 8 + ): + model_type = "instruct-pix2pix" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["z-image-turbo"]): + model_type = "z-image-turbo" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["lumina2"]): + model_type = "lumina2" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["sana"]): + model_type = "sana" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["wan"]): + if "model.diffusion_model.patch_embedding.weight" in checkpoint: + target_key = "model.diffusion_model.patch_embedding.weight" + else: + target_key = "patch_embedding.weight" + + if CHECKPOINT_KEY_NAMES["wan_vace"] in checkpoint: + if checkpoint[target_key].shape[0] == 1536: + model_type = "wan-vace-1.3B" + elif checkpoint[target_key].shape[0] == 5120: + model_type = "wan-vace-14B" + + if CHECKPOINT_KEY_NAMES["wan_animate"] in checkpoint: + model_type = "wan-animate-14B" + + elif checkpoint[target_key].shape[0] == 1536: + model_type = "wan-t2v-1.3B" + elif checkpoint[target_key].shape[0] == 5120 and checkpoint[target_key].shape[1] == 16: + model_type = "wan-t2v-14B" + else: + model_type = "wan-i2v-14B" + + elif CHECKPOINT_KEY_NAMES["wan_vae"] in checkpoint: + # All Wan models use the same VAE so we can use the same default model repo to fetch the config + model_type = "wan-t2v-14B" + + elif CHECKPOINT_KEY_NAMES["hidream"] in checkpoint: + model_type = "hidream" + + elif all(key in checkpoint for key in CHECKPOINT_KEY_NAMES["cosmos-1.0"]): + x_embedder_shape = checkpoint[CHECKPOINT_KEY_NAMES["cosmos-1.0"][0]].shape + if x_embedder_shape[1] == 68: + model_type = "cosmos-1.0-t2w-7B" if x_embedder_shape[0] == 4096 else "cosmos-1.0-t2w-14B" + elif x_embedder_shape[1] == 72: + model_type = "cosmos-1.0-v2w-7B" if x_embedder_shape[0] == 4096 else "cosmos-1.0-v2w-14B" + else: + raise ValueError(f"Unexpected x_embedder shape: {x_embedder_shape} when loading Cosmos 1.0 model.") + + elif all(key in checkpoint for key in CHECKPOINT_KEY_NAMES["cosmos-2.0"]): + x_embedder_shape = checkpoint[CHECKPOINT_KEY_NAMES["cosmos-2.0"][0]].shape + if x_embedder_shape[1] == 68: + model_type = "cosmos-2.0-t2i-2B" if x_embedder_shape[0] == 2048 else "cosmos-2.0-t2i-14B" + elif x_embedder_shape[1] == 72: + model_type = "cosmos-2.0-v2w-2B" if x_embedder_shape[0] == 2048 else "cosmos-2.0-v2w-14B" + else: + raise ValueError(f"Unexpected x_embedder shape: {x_embedder_shape} when loading Cosmos 2.0 model.") + + elif CHECKPOINT_KEY_NAMES["z-image-turbo-controlnet-2.x"] in checkpoint: + before_proj_weight = checkpoint.get("control_noise_refiner.0.before_proj.weight", None) + if before_proj_weight is None: + model_type = "z-image-turbo-controlnet-2.0" + elif before_proj_weight is not None and torch.all(before_proj_weight == 0.0): + model_type = "z-image-turbo-controlnet-2.0" + else: + model_type = "z-image-turbo-controlnet-2.1" + + elif CHECKPOINT_KEY_NAMES["z-image-turbo-controlnet"] in checkpoint: + model_type = "z-image-turbo-controlnet" + + elif any(key in checkpoint for key in CHECKPOINT_KEY_NAMES["ltx2"]): + model_type = "ltx2-dev" + + else: + model_type = "v1" + + return model_type + + +def fetch_diffusers_config(checkpoint): + model_type = infer_diffusers_model_type(checkpoint) + model_path = DIFFUSERS_DEFAULT_PIPELINE_PATHS[model_type] + model_path = copy.deepcopy(model_path) + + return model_path + + +def set_image_size(checkpoint, image_size=None): + if image_size: + return image_size + + model_type = infer_diffusers_model_type(checkpoint) + image_size = DIFFUSERS_TO_LDM_DEFAULT_IMAGE_SIZE_MAP[model_type] + + return image_size + + +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.conv_attn_to_linear +def conv_attn_to_linear(checkpoint): + keys = list(checkpoint.keys()) + attn_keys = ["query.weight", "key.weight", "value.weight"] + for key in keys: + if ".".join(key.split(".")[-2:]) in attn_keys: + if checkpoint[key].ndim > 2: + checkpoint[key] = checkpoint[key][:, :, 0, 0] + elif "proj_attn.weight" in key: + if checkpoint[key].ndim > 2: + checkpoint[key] = checkpoint[key][:, :, 0] + + +def create_unet_diffusers_config_from_ldm( + original_config, checkpoint, image_size=None, upcast_attention=None, num_in_channels=None +): + """ + Creates a config for the diffusers based on the config of the LDM model. + """ + if image_size is not None: + deprecation_message = ( + "Configuring UNet2DConditionModel with the `image_size` argument to `from_single_file`" + "is deprecated and will be ignored in future versions." + ) + deprecate("image_size", "1.0.0", deprecation_message) + + image_size = set_image_size(checkpoint, image_size=image_size) + + if ( + "unet_config" in original_config["model"]["params"] + and original_config["model"]["params"]["unet_config"] is not None + ): + unet_params = original_config["model"]["params"]["unet_config"]["params"] + else: + unet_params = original_config["model"]["params"]["network_config"]["params"] + + if num_in_channels is not None: + deprecation_message = ( + "Configuring UNet2DConditionModel with the `num_in_channels` argument to `from_single_file`" + "is deprecated and will be ignored in future versions." + ) + deprecate("image_size", "1.0.0", deprecation_message) + in_channels = num_in_channels + else: + in_channels = unet_params["in_channels"] + + vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"] + block_out_channels = [unet_params["model_channels"] * mult for mult in unet_params["channel_mult"]] + + down_block_types = [] + resolution = 1 + for i in range(len(block_out_channels)): + block_type = "CrossAttnDownBlock2D" if resolution in unet_params["attention_resolutions"] else "DownBlock2D" + down_block_types.append(block_type) + if i != len(block_out_channels) - 1: + resolution *= 2 + + up_block_types = [] + for i in range(len(block_out_channels)): + block_type = "CrossAttnUpBlock2D" if resolution in unet_params["attention_resolutions"] else "UpBlock2D" + up_block_types.append(block_type) + resolution //= 2 + + if unet_params["transformer_depth"] is not None: + transformer_layers_per_block = ( + unet_params["transformer_depth"] + if isinstance(unet_params["transformer_depth"], int) + else list(unet_params["transformer_depth"]) + ) + else: + transformer_layers_per_block = 1 + + vae_scale_factor = 2 ** (len(vae_params["ch_mult"]) - 1) + + head_dim = unet_params["num_heads"] if "num_heads" in unet_params else None + use_linear_projection = ( + unet_params["use_linear_in_transformer"] if "use_linear_in_transformer" in unet_params else False + ) + if use_linear_projection: + # stable diffusion 2-base-512 and 2-768 + if head_dim is None: + head_dim_mult = unet_params["model_channels"] // unet_params["num_head_channels"] + head_dim = [head_dim_mult * c for c in list(unet_params["channel_mult"])] + + class_embed_type = None + addition_embed_type = None + addition_time_embed_dim = None + projection_class_embeddings_input_dim = None + context_dim = None + + if unet_params["context_dim"] is not None: + context_dim = ( + unet_params["context_dim"] + if isinstance(unet_params["context_dim"], int) + else unet_params["context_dim"][0] + ) + + if "num_classes" in unet_params: + if unet_params["num_classes"] == "sequential": + if context_dim in [2048, 1280]: + # SDXL + addition_embed_type = "text_time" + addition_time_embed_dim = 256 + else: + class_embed_type = "projection" + assert "adm_in_channels" in unet_params + projection_class_embeddings_input_dim = unet_params["adm_in_channels"] + + config = { + "sample_size": image_size // vae_scale_factor, + "in_channels": in_channels, + "down_block_types": down_block_types, + "block_out_channels": block_out_channels, + "layers_per_block": unet_params["num_res_blocks"], + "cross_attention_dim": context_dim, + "attention_head_dim": head_dim, + "use_linear_projection": use_linear_projection, + "class_embed_type": class_embed_type, + "addition_embed_type": addition_embed_type, + "addition_time_embed_dim": addition_time_embed_dim, + "projection_class_embeddings_input_dim": projection_class_embeddings_input_dim, + "transformer_layers_per_block": transformer_layers_per_block, + } + + if upcast_attention is not None: + deprecation_message = ( + "Configuring UNet2DConditionModel with the `upcast_attention` argument to `from_single_file`" + "is deprecated and will be ignored in future versions." + ) + deprecate("image_size", "1.0.0", deprecation_message) + config["upcast_attention"] = upcast_attention + + if "disable_self_attentions" in unet_params: + config["only_cross_attention"] = unet_params["disable_self_attentions"] + + if "num_classes" in unet_params and isinstance(unet_params["num_classes"], int): + config["num_class_embeds"] = unet_params["num_classes"] + + config["out_channels"] = unet_params["out_channels"] + config["up_block_types"] = up_block_types + + return config + + +def create_controlnet_diffusers_config_from_ldm(original_config, checkpoint, image_size=None, **kwargs): + if image_size is not None: + deprecation_message = ( + "Configuring ControlNetModel with the `image_size` argument" + "is deprecated and will be ignored in future versions." + ) + deprecate("image_size", "1.0.0", deprecation_message) + + image_size = set_image_size(checkpoint, image_size=image_size) + + unet_params = original_config["model"]["params"]["control_stage_config"]["params"] + diffusers_unet_config = create_unet_diffusers_config_from_ldm(original_config, image_size=image_size) + + controlnet_config = { + "conditioning_channels": unet_params["hint_channels"], + "in_channels": diffusers_unet_config["in_channels"], + "down_block_types": diffusers_unet_config["down_block_types"], + "block_out_channels": diffusers_unet_config["block_out_channels"], + "layers_per_block": diffusers_unet_config["layers_per_block"], + "cross_attention_dim": diffusers_unet_config["cross_attention_dim"], + "attention_head_dim": diffusers_unet_config["attention_head_dim"], + "use_linear_projection": diffusers_unet_config["use_linear_projection"], + "class_embed_type": diffusers_unet_config["class_embed_type"], + "addition_embed_type": diffusers_unet_config["addition_embed_type"], + "addition_time_embed_dim": diffusers_unet_config["addition_time_embed_dim"], + "projection_class_embeddings_input_dim": diffusers_unet_config["projection_class_embeddings_input_dim"], + "transformer_layers_per_block": diffusers_unet_config["transformer_layers_per_block"], + } + + return controlnet_config + + +def create_vae_diffusers_config_from_ldm(original_config, checkpoint, image_size=None, scaling_factor=None): + """ + Creates a config for the diffusers based on the config of the LDM model. + """ + if image_size is not None: + deprecation_message = ( + "Configuring AutoencoderKL with the `image_size` argument" + "is deprecated and will be ignored in future versions." + ) + deprecate("image_size", "1.0.0", deprecation_message) + + image_size = set_image_size(checkpoint, image_size=image_size) + + if "edm_mean" in checkpoint and "edm_std" in checkpoint: + latents_mean = checkpoint["edm_mean"] + latents_std = checkpoint["edm_std"] + else: + latents_mean = None + latents_std = None + + vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"] + if (scaling_factor is None) and (latents_mean is not None) and (latents_std is not None): + scaling_factor = PLAYGROUND_VAE_SCALING_FACTOR + + elif (scaling_factor is None) and ("scale_factor" in original_config["model"]["params"]): + scaling_factor = original_config["model"]["params"]["scale_factor"] + + elif scaling_factor is None: + scaling_factor = LDM_VAE_DEFAULT_SCALING_FACTOR + + block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]] + down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels) + up_block_types = ["UpDecoderBlock2D"] * len(block_out_channels) + + config = { + "sample_size": image_size, + "in_channels": vae_params["in_channels"], + "out_channels": vae_params["out_ch"], + "down_block_types": down_block_types, + "up_block_types": up_block_types, + "block_out_channels": block_out_channels, + "latent_channels": vae_params["z_channels"], + "layers_per_block": vae_params["num_res_blocks"], + "scaling_factor": scaling_factor, + } + if latents_mean is not None and latents_std is not None: + config.update({"latents_mean": latents_mean, "latents_std": latents_std}) + + return config + + +def update_unet_resnet_ldm_to_diffusers(ldm_keys, new_checkpoint, checkpoint, mapping=None): + for ldm_key in ldm_keys: + diffusers_key = ( + ldm_key.replace("in_layers.0", "norm1") + .replace("in_layers.2", "conv1") + .replace("out_layers.0", "norm2") + .replace("out_layers.3", "conv2") + .replace("emb_layers.1", "time_emb_proj") + .replace("skip_connection", "conv_shortcut") + ) + if mapping: + diffusers_key = diffusers_key.replace(mapping["old"], mapping["new"]) + new_checkpoint[diffusers_key] = checkpoint.get(ldm_key) + + +def update_unet_attention_ldm_to_diffusers(ldm_keys, new_checkpoint, checkpoint, mapping): + for ldm_key in ldm_keys: + diffusers_key = ldm_key.replace(mapping["old"], mapping["new"]) + new_checkpoint[diffusers_key] = checkpoint.get(ldm_key) + + +def update_vae_resnet_ldm_to_diffusers(keys, new_checkpoint, checkpoint, mapping): + for ldm_key in keys: + diffusers_key = ldm_key.replace(mapping["old"], mapping["new"]).replace("nin_shortcut", "conv_shortcut") + new_checkpoint[diffusers_key] = checkpoint.get(ldm_key) + + +def update_vae_attentions_ldm_to_diffusers(keys, new_checkpoint, checkpoint, mapping): + for ldm_key in keys: + diffusers_key = ( + ldm_key.replace(mapping["old"], mapping["new"]) + .replace("norm.weight", "group_norm.weight") + .replace("norm.bias", "group_norm.bias") + .replace("q.weight", "to_q.weight") + .replace("q.bias", "to_q.bias") + .replace("k.weight", "to_k.weight") + .replace("k.bias", "to_k.bias") + .replace("v.weight", "to_v.weight") + .replace("v.bias", "to_v.bias") + .replace("proj_out.weight", "to_out.0.weight") + .replace("proj_out.bias", "to_out.0.bias") + ) + new_checkpoint[diffusers_key] = checkpoint.get(ldm_key) + + # proj_attn.weight has to be converted from conv 1D to linear + shape = new_checkpoint[diffusers_key].shape + + if len(shape) == 3: + new_checkpoint[diffusers_key] = new_checkpoint[diffusers_key][:, :, 0] + elif len(shape) == 4: + new_checkpoint[diffusers_key] = new_checkpoint[diffusers_key][:, :, 0, 0] + + +def convert_stable_cascade_unet_single_file_to_diffusers(checkpoint, **kwargs): + is_stage_c = "clip_txt_mapper.weight" in checkpoint + + if is_stage_c: + state_dict = {} + for key in checkpoint.keys(): + if key.endswith("in_proj_weight"): + weights = checkpoint[key].chunk(3, 0) + state_dict[key.replace("attn.in_proj_weight", "to_q.weight")] = weights[0] + state_dict[key.replace("attn.in_proj_weight", "to_k.weight")] = weights[1] + state_dict[key.replace("attn.in_proj_weight", "to_v.weight")] = weights[2] + elif key.endswith("in_proj_bias"): + weights = checkpoint[key].chunk(3, 0) + state_dict[key.replace("attn.in_proj_bias", "to_q.bias")] = weights[0] + state_dict[key.replace("attn.in_proj_bias", "to_k.bias")] = weights[1] + state_dict[key.replace("attn.in_proj_bias", "to_v.bias")] = weights[2] + elif key.endswith("out_proj.weight"): + weights = checkpoint[key] + state_dict[key.replace("attn.out_proj.weight", "to_out.0.weight")] = weights + elif key.endswith("out_proj.bias"): + weights = checkpoint[key] + state_dict[key.replace("attn.out_proj.bias", "to_out.0.bias")] = weights + else: + state_dict[key] = checkpoint[key] + else: + state_dict = {} + for key in checkpoint.keys(): + if key.endswith("in_proj_weight"): + weights = checkpoint[key].chunk(3, 0) + state_dict[key.replace("attn.in_proj_weight", "to_q.weight")] = weights[0] + state_dict[key.replace("attn.in_proj_weight", "to_k.weight")] = weights[1] + state_dict[key.replace("attn.in_proj_weight", "to_v.weight")] = weights[2] + elif key.endswith("in_proj_bias"): + weights = checkpoint[key].chunk(3, 0) + state_dict[key.replace("attn.in_proj_bias", "to_q.bias")] = weights[0] + state_dict[key.replace("attn.in_proj_bias", "to_k.bias")] = weights[1] + state_dict[key.replace("attn.in_proj_bias", "to_v.bias")] = weights[2] + elif key.endswith("out_proj.weight"): + weights = checkpoint[key] + state_dict[key.replace("attn.out_proj.weight", "to_out.0.weight")] = weights + elif key.endswith("out_proj.bias"): + weights = checkpoint[key] + state_dict[key.replace("attn.out_proj.bias", "to_out.0.bias")] = weights + # rename clip_mapper to clip_txt_pooled_mapper + elif key.endswith("clip_mapper.weight"): + weights = checkpoint[key] + state_dict[key.replace("clip_mapper.weight", "clip_txt_pooled_mapper.weight")] = weights + elif key.endswith("clip_mapper.bias"): + weights = checkpoint[key] + state_dict[key.replace("clip_mapper.bias", "clip_txt_pooled_mapper.bias")] = weights + else: + state_dict[key] = checkpoint[key] + + return state_dict + + +def convert_ldm_unet_checkpoint(checkpoint, config, extract_ema=False, **kwargs): + """ + Takes a state dict and a config, and returns a converted checkpoint. + """ + # extract state_dict for UNet + unet_state_dict = {} + keys = list(checkpoint.keys()) + unet_key = LDM_UNET_KEY + + # at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA + if sum(k.startswith("model_ema") for k in keys) > 100 and extract_ema: + logger.warning("Checkpoint has both EMA and non-EMA weights.") + logger.warning( + "In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA" + " weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag." + ) + for key in keys: + if key.startswith("model.diffusion_model"): + flat_ema_key = "model_ema." + "".join(key.split(".")[1:]) + unet_state_dict[key.replace(unet_key, "")] = checkpoint.get(flat_ema_key) + else: + if sum(k.startswith("model_ema") for k in keys) > 100: + logger.warning( + "In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA" + " weights (usually better for inference), please make sure to add the `--extract_ema` flag." + ) + for key in keys: + if key.startswith(unet_key): + unet_state_dict[key.replace(unet_key, "")] = checkpoint.get(key) + + new_checkpoint = {} + ldm_unet_keys = DIFFUSERS_TO_LDM_MAPPING["unet"]["layers"] + for diffusers_key, ldm_key in ldm_unet_keys.items(): + if ldm_key not in unet_state_dict: + continue + new_checkpoint[diffusers_key] = unet_state_dict[ldm_key] + + if ("class_embed_type" in config) and (config["class_embed_type"] in ["timestep", "projection"]): + class_embed_keys = DIFFUSERS_TO_LDM_MAPPING["unet"]["class_embed_type"] + for diffusers_key, ldm_key in class_embed_keys.items(): + new_checkpoint[diffusers_key] = unet_state_dict[ldm_key] + + if ("addition_embed_type" in config) and (config["addition_embed_type"] == "text_time"): + addition_embed_keys = DIFFUSERS_TO_LDM_MAPPING["unet"]["addition_embed_type"] + for diffusers_key, ldm_key in addition_embed_keys.items(): + new_checkpoint[diffusers_key] = unet_state_dict[ldm_key] + + # Relevant to StableDiffusionUpscalePipeline + if "num_class_embeds" in config: + if (config["num_class_embeds"] is not None) and ("label_emb.weight" in unet_state_dict): + new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"] + + # Retrieves the keys for the input blocks only + num_input_blocks = len({".".join(layer.split(".")[:2]) for layer in unet_state_dict if "input_blocks" in layer}) + input_blocks = { + layer_id: [key for key in unet_state_dict if f"input_blocks.{layer_id}" in key] + for layer_id in range(num_input_blocks) + } + + # Retrieves the keys for the middle blocks only + num_middle_blocks = len({".".join(layer.split(".")[:2]) for layer in unet_state_dict if "middle_block" in layer}) + middle_blocks = { + layer_id: [key for key in unet_state_dict if f"middle_block.{layer_id}" in key] + for layer_id in range(num_middle_blocks) + } + + # Retrieves the keys for the output blocks only + num_output_blocks = len({".".join(layer.split(".")[:2]) for layer in unet_state_dict if "output_blocks" in layer}) + output_blocks = { + layer_id: [key for key in unet_state_dict if f"output_blocks.{layer_id}" in key] + for layer_id in range(num_output_blocks) + } + + # Down blocks + for i in range(1, num_input_blocks): + block_id = (i - 1) // (config["layers_per_block"] + 1) + layer_in_block_id = (i - 1) % (config["layers_per_block"] + 1) + + resnets = [ + key for key in input_blocks[i] if f"input_blocks.{i}.0" in key and f"input_blocks.{i}.0.op" not in key + ] + update_unet_resnet_ldm_to_diffusers( + resnets, + new_checkpoint, + unet_state_dict, + {"old": f"input_blocks.{i}.0", "new": f"down_blocks.{block_id}.resnets.{layer_in_block_id}"}, + ) + + if f"input_blocks.{i}.0.op.weight" in unet_state_dict: + new_checkpoint[f"down_blocks.{block_id}.downsamplers.0.conv.weight"] = unet_state_dict.get( + f"input_blocks.{i}.0.op.weight" + ) + new_checkpoint[f"down_blocks.{block_id}.downsamplers.0.conv.bias"] = unet_state_dict.get( + f"input_blocks.{i}.0.op.bias" + ) + + attentions = [key for key in input_blocks[i] if f"input_blocks.{i}.1" in key] + if attentions: + update_unet_attention_ldm_to_diffusers( + attentions, + new_checkpoint, + unet_state_dict, + {"old": f"input_blocks.{i}.1", "new": f"down_blocks.{block_id}.attentions.{layer_in_block_id}"}, + ) + + # Mid blocks + for key in middle_blocks.keys(): + diffusers_key = max(key - 1, 0) + if key % 2 == 0: + update_unet_resnet_ldm_to_diffusers( + middle_blocks[key], + new_checkpoint, + unet_state_dict, + mapping={"old": f"middle_block.{key}", "new": f"mid_block.resnets.{diffusers_key}"}, + ) + else: + update_unet_attention_ldm_to_diffusers( + middle_blocks[key], + new_checkpoint, + unet_state_dict, + mapping={"old": f"middle_block.{key}", "new": f"mid_block.attentions.{diffusers_key}"}, + ) + + # Up Blocks + for i in range(num_output_blocks): + block_id = i // (config["layers_per_block"] + 1) + layer_in_block_id = i % (config["layers_per_block"] + 1) + + resnets = [ + key for key in output_blocks[i] if f"output_blocks.{i}.0" in key and f"output_blocks.{i}.0.op" not in key + ] + update_unet_resnet_ldm_to_diffusers( + resnets, + new_checkpoint, + unet_state_dict, + {"old": f"output_blocks.{i}.0", "new": f"up_blocks.{block_id}.resnets.{layer_in_block_id}"}, + ) + + attentions = [ + key for key in output_blocks[i] if f"output_blocks.{i}.1" in key and f"output_blocks.{i}.1.conv" not in key + ] + if attentions: + update_unet_attention_ldm_to_diffusers( + attentions, + new_checkpoint, + unet_state_dict, + {"old": f"output_blocks.{i}.1", "new": f"up_blocks.{block_id}.attentions.{layer_in_block_id}"}, + ) + + if f"output_blocks.{i}.1.conv.weight" in unet_state_dict: + new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.weight"] = unet_state_dict[ + f"output_blocks.{i}.1.conv.weight" + ] + new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.bias"] = unet_state_dict[ + f"output_blocks.{i}.1.conv.bias" + ] + if f"output_blocks.{i}.2.conv.weight" in unet_state_dict: + new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.weight"] = unet_state_dict[ + f"output_blocks.{i}.2.conv.weight" + ] + new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.bias"] = unet_state_dict[ + f"output_blocks.{i}.2.conv.bias" + ] + + return new_checkpoint + + +def convert_controlnet_checkpoint( + checkpoint, + config, + **kwargs, +): + # Return checkpoint if it's already been converted + if "time_embedding.linear_1.weight" in checkpoint: + return checkpoint + # Some controlnet ckpt files are distributed independently from the rest of the + # model components i.e. https://huggingface.co/thibaud/controlnet-sd21/ + if "time_embed.0.weight" in checkpoint: + controlnet_state_dict = checkpoint + + else: + controlnet_state_dict = {} + keys = list(checkpoint.keys()) + controlnet_key = LDM_CONTROLNET_KEY + for key in keys: + if key.startswith(controlnet_key): + controlnet_state_dict[key.replace(controlnet_key, "")] = checkpoint.get(key) + + new_checkpoint = {} + ldm_controlnet_keys = DIFFUSERS_TO_LDM_MAPPING["controlnet"]["layers"] + for diffusers_key, ldm_key in ldm_controlnet_keys.items(): + if ldm_key not in controlnet_state_dict: + continue + new_checkpoint[diffusers_key] = controlnet_state_dict[ldm_key] + + # Retrieves the keys for the input blocks only + num_input_blocks = len( + {".".join(layer.split(".")[:2]) for layer in controlnet_state_dict if "input_blocks" in layer} + ) + input_blocks = { + layer_id: [key for key in controlnet_state_dict if f"input_blocks.{layer_id}" in key] + for layer_id in range(num_input_blocks) + } + + # Down blocks + for i in range(1, num_input_blocks): + block_id = (i - 1) // (config["layers_per_block"] + 1) + layer_in_block_id = (i - 1) % (config["layers_per_block"] + 1) + + resnets = [ + key for key in input_blocks[i] if f"input_blocks.{i}.0" in key and f"input_blocks.{i}.0.op" not in key + ] + update_unet_resnet_ldm_to_diffusers( + resnets, + new_checkpoint, + controlnet_state_dict, + {"old": f"input_blocks.{i}.0", "new": f"down_blocks.{block_id}.resnets.{layer_in_block_id}"}, + ) + + if f"input_blocks.{i}.0.op.weight" in controlnet_state_dict: + new_checkpoint[f"down_blocks.{block_id}.downsamplers.0.conv.weight"] = controlnet_state_dict.get( + f"input_blocks.{i}.0.op.weight" + ) + new_checkpoint[f"down_blocks.{block_id}.downsamplers.0.conv.bias"] = controlnet_state_dict.get( + f"input_blocks.{i}.0.op.bias" + ) + + attentions = [key for key in input_blocks[i] if f"input_blocks.{i}.1" in key] + if attentions: + update_unet_attention_ldm_to_diffusers( + attentions, + new_checkpoint, + controlnet_state_dict, + {"old": f"input_blocks.{i}.1", "new": f"down_blocks.{block_id}.attentions.{layer_in_block_id}"}, + ) + + # controlnet down blocks + for i in range(num_input_blocks): + new_checkpoint[f"controlnet_down_blocks.{i}.weight"] = controlnet_state_dict.get(f"zero_convs.{i}.0.weight") + new_checkpoint[f"controlnet_down_blocks.{i}.bias"] = controlnet_state_dict.get(f"zero_convs.{i}.0.bias") + + # Retrieves the keys for the middle blocks only + num_middle_blocks = len( + {".".join(layer.split(".")[:2]) for layer in controlnet_state_dict if "middle_block" in layer} + ) + middle_blocks = { + layer_id: [key for key in controlnet_state_dict if f"middle_block.{layer_id}" in key] + for layer_id in range(num_middle_blocks) + } + + # Mid blocks + for key in middle_blocks.keys(): + diffusers_key = max(key - 1, 0) + if key % 2 == 0: + update_unet_resnet_ldm_to_diffusers( + middle_blocks[key], + new_checkpoint, + controlnet_state_dict, + mapping={"old": f"middle_block.{key}", "new": f"mid_block.resnets.{diffusers_key}"}, + ) + else: + update_unet_attention_ldm_to_diffusers( + middle_blocks[key], + new_checkpoint, + controlnet_state_dict, + mapping={"old": f"middle_block.{key}", "new": f"mid_block.attentions.{diffusers_key}"}, + ) + + # mid block + new_checkpoint["controlnet_mid_block.weight"] = controlnet_state_dict.get("middle_block_out.0.weight") + new_checkpoint["controlnet_mid_block.bias"] = controlnet_state_dict.get("middle_block_out.0.bias") + + # controlnet cond embedding blocks + cond_embedding_blocks = { + ".".join(layer.split(".")[:2]) + for layer in controlnet_state_dict + if "input_hint_block" in layer and ("input_hint_block.0" not in layer) and ("input_hint_block.14" not in layer) + } + num_cond_embedding_blocks = len(cond_embedding_blocks) + + for idx in range(1, num_cond_embedding_blocks + 1): + diffusers_idx = idx - 1 + cond_block_id = 2 * idx + + new_checkpoint[f"controlnet_cond_embedding.blocks.{diffusers_idx}.weight"] = controlnet_state_dict.get( + f"input_hint_block.{cond_block_id}.weight" + ) + new_checkpoint[f"controlnet_cond_embedding.blocks.{diffusers_idx}.bias"] = controlnet_state_dict.get( + f"input_hint_block.{cond_block_id}.bias" + ) + + return new_checkpoint + + +def convert_ldm_vae_checkpoint(checkpoint, config): + # extract state dict for VAE + # remove the LDM_VAE_KEY prefix from the ldm checkpoint keys so that it is easier to map them to diffusers keys + vae_state_dict = {} + keys = list(checkpoint.keys()) + vae_key = "" + for ldm_vae_key in LDM_VAE_KEYS: + if any(k.startswith(ldm_vae_key) for k in keys): + vae_key = ldm_vae_key + + for key in keys: + if key.startswith(vae_key): + vae_state_dict[key.replace(vae_key, "")] = checkpoint.get(key) + + new_checkpoint = {} + vae_diffusers_ldm_map = DIFFUSERS_TO_LDM_MAPPING["vae"] + for diffusers_key, ldm_key in vae_diffusers_ldm_map.items(): + if ldm_key not in vae_state_dict: + continue + new_checkpoint[diffusers_key] = vae_state_dict[ldm_key] + + # Retrieves the keys for the encoder down blocks only + num_down_blocks = len(config["down_block_types"]) + down_blocks = { + layer_id: [key for key in vae_state_dict if f"down.{layer_id}" in key] for layer_id in range(num_down_blocks) + } + + for i in range(num_down_blocks): + resnets = [key for key in down_blocks[i] if f"down.{i}" in key and f"down.{i}.downsample" not in key] + update_vae_resnet_ldm_to_diffusers( + resnets, + new_checkpoint, + vae_state_dict, + mapping={"old": f"down.{i}.block", "new": f"down_blocks.{i}.resnets"}, + ) + if f"encoder.down.{i}.downsample.conv.weight" in vae_state_dict: + new_checkpoint[f"encoder.down_blocks.{i}.downsamplers.0.conv.weight"] = vae_state_dict.get( + f"encoder.down.{i}.downsample.conv.weight" + ) + new_checkpoint[f"encoder.down_blocks.{i}.downsamplers.0.conv.bias"] = vae_state_dict.get( + f"encoder.down.{i}.downsample.conv.bias" + ) + + mid_resnets = [key for key in vae_state_dict if "encoder.mid.block" in key] + num_mid_res_blocks = 2 + for i in range(1, num_mid_res_blocks + 1): + resnets = [key for key in mid_resnets if f"encoder.mid.block_{i}" in key] + update_vae_resnet_ldm_to_diffusers( + resnets, + new_checkpoint, + vae_state_dict, + mapping={"old": f"mid.block_{i}", "new": f"mid_block.resnets.{i - 1}"}, + ) + + mid_attentions = [key for key in vae_state_dict if "encoder.mid.attn" in key] + update_vae_attentions_ldm_to_diffusers( + mid_attentions, new_checkpoint, vae_state_dict, mapping={"old": "mid.attn_1", "new": "mid_block.attentions.0"} + ) + + # Retrieves the keys for the decoder up blocks only + num_up_blocks = len(config["up_block_types"]) + up_blocks = { + layer_id: [key for key in vae_state_dict if f"up.{layer_id}" in key] for layer_id in range(num_up_blocks) + } + + for i in range(num_up_blocks): + block_id = num_up_blocks - 1 - i + resnets = [ + key for key in up_blocks[block_id] if f"up.{block_id}" in key and f"up.{block_id}.upsample" not in key + ] + update_vae_resnet_ldm_to_diffusers( + resnets, + new_checkpoint, + vae_state_dict, + mapping={"old": f"up.{block_id}.block", "new": f"up_blocks.{i}.resnets"}, + ) + if f"decoder.up.{block_id}.upsample.conv.weight" in vae_state_dict: + new_checkpoint[f"decoder.up_blocks.{i}.upsamplers.0.conv.weight"] = vae_state_dict[ + f"decoder.up.{block_id}.upsample.conv.weight" + ] + new_checkpoint[f"decoder.up_blocks.{i}.upsamplers.0.conv.bias"] = vae_state_dict[ + f"decoder.up.{block_id}.upsample.conv.bias" + ] + + mid_resnets = [key for key in vae_state_dict if "decoder.mid.block" in key] + num_mid_res_blocks = 2 + for i in range(1, num_mid_res_blocks + 1): + resnets = [key for key in mid_resnets if f"decoder.mid.block_{i}" in key] + update_vae_resnet_ldm_to_diffusers( + resnets, + new_checkpoint, + vae_state_dict, + mapping={"old": f"mid.block_{i}", "new": f"mid_block.resnets.{i - 1}"}, + ) + + mid_attentions = [key for key in vae_state_dict if "decoder.mid.attn" in key] + update_vae_attentions_ldm_to_diffusers( + mid_attentions, new_checkpoint, vae_state_dict, mapping={"old": "mid.attn_1", "new": "mid_block.attentions.0"} + ) + conv_attn_to_linear(new_checkpoint) + + return new_checkpoint + + +def convert_ldm_clip_checkpoint(checkpoint, remove_prefix=None): + keys = list(checkpoint.keys()) + text_model_dict = {} + + remove_prefixes = [] + remove_prefixes.extend(LDM_CLIP_PREFIX_TO_REMOVE) + if remove_prefix: + remove_prefixes.append(remove_prefix) + + for key in keys: + for prefix in remove_prefixes: + if key.startswith(prefix): + diffusers_key = key.replace(prefix, "") + text_model_dict[diffusers_key] = checkpoint.get(key) + + return text_model_dict + + +def convert_open_clip_checkpoint( + text_model, + checkpoint, + prefix="cond_stage_model.model.", +): + text_model_dict = {} + text_proj_key = prefix + "text_projection" + + if text_proj_key in checkpoint: + text_proj_dim = int(checkpoint[text_proj_key].shape[0]) + elif hasattr(text_model.config, "hidden_size"): + text_proj_dim = text_model.config.hidden_size + else: + text_proj_dim = LDM_OPEN_CLIP_TEXT_PROJECTION_DIM + + keys = list(checkpoint.keys()) + keys_to_ignore = SD_2_TEXT_ENCODER_KEYS_TO_IGNORE + + openclip_diffusers_ldm_map = DIFFUSERS_TO_LDM_MAPPING["openclip"]["layers"] + for diffusers_key, ldm_key in openclip_diffusers_ldm_map.items(): + ldm_key = prefix + ldm_key + if ldm_key not in checkpoint: + continue + if ldm_key in keys_to_ignore: + continue + if ldm_key.endswith("text_projection"): + text_model_dict[diffusers_key] = checkpoint[ldm_key].T.contiguous() + else: + text_model_dict[diffusers_key] = checkpoint[ldm_key] + + for key in keys: + if key in keys_to_ignore: + continue + + if not key.startswith(prefix + "transformer."): + continue + + diffusers_key = key.replace(prefix + "transformer.", "") + transformer_diffusers_to_ldm_map = DIFFUSERS_TO_LDM_MAPPING["openclip"]["transformer"] + for new_key, old_key in transformer_diffusers_to_ldm_map.items(): + diffusers_key = ( + diffusers_key.replace(old_key, new_key).replace(".in_proj_weight", "").replace(".in_proj_bias", "") + ) + + if key.endswith(".in_proj_weight"): + weight_value = checkpoint.get(key) + + text_model_dict[diffusers_key + ".q_proj.weight"] = weight_value[:text_proj_dim, :].clone().detach() + text_model_dict[diffusers_key + ".k_proj.weight"] = ( + weight_value[text_proj_dim : text_proj_dim * 2, :].clone().detach() + ) + text_model_dict[diffusers_key + ".v_proj.weight"] = weight_value[text_proj_dim * 2 :, :].clone().detach() + + elif key.endswith(".in_proj_bias"): + weight_value = checkpoint.get(key) + text_model_dict[diffusers_key + ".q_proj.bias"] = weight_value[:text_proj_dim].clone().detach() + text_model_dict[diffusers_key + ".k_proj.bias"] = ( + weight_value[text_proj_dim : text_proj_dim * 2].clone().detach() + ) + text_model_dict[diffusers_key + ".v_proj.bias"] = weight_value[text_proj_dim * 2 :].clone().detach() + else: + text_model_dict[diffusers_key] = checkpoint.get(key) + + return text_model_dict + + +def create_diffusers_clip_model_from_ldm( + cls, + checkpoint, + subfolder="", + config=None, + torch_dtype=None, + local_files_only=None, + is_legacy_loading=False, +): + if config: + config = {"pretrained_model_name_or_path": config} + else: + config = fetch_diffusers_config(checkpoint) + + # For backwards compatibility + # Older versions of `from_single_file` expected CLIP configs to be placed in their original transformers model repo + # in the cache_dir, rather than in a subfolder of the Diffusers model + if is_legacy_loading: + logger.warning( + ( + "Detected legacy CLIP loading behavior. Please run `from_single_file` with `local_files_only=False once to update " + "the local cache directory with the necessary CLIP model config files. " + "Attempting to load CLIP model from legacy cache directory." + ) + ) + + if is_clip_model(checkpoint) or is_clip_sdxl_model(checkpoint): + clip_config = "openai/clip-vit-large-patch14" + config["pretrained_model_name_or_path"] = clip_config + subfolder = "" + + elif is_open_clip_model(checkpoint): + clip_config = "stabilityai/stable-diffusion-2" + config["pretrained_model_name_or_path"] = clip_config + subfolder = "text_encoder" + + else: + clip_config = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" + config["pretrained_model_name_or_path"] = clip_config + subfolder = "" + + model_config = cls.config_class.from_pretrained(**config, subfolder=subfolder, local_files_only=local_files_only) + ctx = init_empty_weights if is_accelerate_available() else nullcontext + with ctx(): + model = cls(model_config) + + position_embedding_dim = model.text_model.embeddings.position_embedding.weight.shape[-1] + + if is_clip_model(checkpoint): + diffusers_format_checkpoint = convert_ldm_clip_checkpoint(checkpoint) + + elif ( + is_clip_sdxl_model(checkpoint) + and checkpoint[CHECKPOINT_KEY_NAMES["clip_sdxl"]].shape[-1] == position_embedding_dim + ): + diffusers_format_checkpoint = convert_ldm_clip_checkpoint(checkpoint) + + elif ( + is_clip_sd3_model(checkpoint) + and checkpoint[CHECKPOINT_KEY_NAMES["clip_sd3"]].shape[-1] == position_embedding_dim + ): + diffusers_format_checkpoint = convert_ldm_clip_checkpoint(checkpoint, "text_encoders.clip_l.transformer.") + diffusers_format_checkpoint["text_projection.weight"] = torch.eye(position_embedding_dim) + + elif is_open_clip_model(checkpoint): + prefix = "cond_stage_model.model." + diffusers_format_checkpoint = convert_open_clip_checkpoint(model, checkpoint, prefix=prefix) + + elif ( + is_open_clip_sdxl_model(checkpoint) + and checkpoint[CHECKPOINT_KEY_NAMES["open_clip_sdxl"]].shape[-1] == position_embedding_dim + ): + prefix = "conditioner.embedders.1.model." + diffusers_format_checkpoint = convert_open_clip_checkpoint(model, checkpoint, prefix=prefix) + + elif is_open_clip_sdxl_refiner_model(checkpoint): + prefix = "conditioner.embedders.0.model." + diffusers_format_checkpoint = convert_open_clip_checkpoint(model, checkpoint, prefix=prefix) + + elif ( + is_open_clip_sd3_model(checkpoint) + and checkpoint[CHECKPOINT_KEY_NAMES["open_clip_sd3"]].shape[-1] == position_embedding_dim + ): + diffusers_format_checkpoint = convert_ldm_clip_checkpoint(checkpoint, "text_encoders.clip_g.transformer.") + + else: + raise ValueError("The provided checkpoint does not seem to contain a valid CLIP model.") + + if is_accelerate_available(): + load_model_dict_into_meta(model, diffusers_format_checkpoint, dtype=torch_dtype) + empty_device_cache() + else: + model.load_state_dict(diffusers_format_checkpoint, strict=False) + + if torch_dtype is not None: + model.to(torch_dtype) + + model.eval() + + return model + + +def _legacy_load_scheduler( + cls, + checkpoint, + component_name, + original_config=None, + **kwargs, +): + scheduler_type = kwargs.get("scheduler_type", None) + prediction_type = kwargs.get("prediction_type", None) + + if scheduler_type is not None: + deprecation_message = ( + "Please pass an instance of a Scheduler object directly to the `scheduler` argument in `from_single_file`\n\n" + "Example:\n\n" + "from diffusers import StableDiffusionPipeline, DDIMScheduler\n\n" + "scheduler = DDIMScheduler()\n" + "pipe = StableDiffusionPipeline.from_single_file(, scheduler=scheduler)\n" + ) + deprecate("scheduler_type", "1.0.0", deprecation_message) + + if prediction_type is not None: + deprecation_message = ( + "Please configure an instance of a Scheduler with the appropriate `prediction_type` and " + "pass the object directly to the `scheduler` argument in `from_single_file`.\n\n" + "Example:\n\n" + "from diffusers import StableDiffusionPipeline, DDIMScheduler\n\n" + 'scheduler = DDIMScheduler(prediction_type="v_prediction")\n' + "pipe = StableDiffusionPipeline.from_single_file(, scheduler=scheduler)\n" + ) + deprecate("prediction_type", "1.0.0", deprecation_message) + + scheduler_config = SCHEDULER_DEFAULT_CONFIG + model_type = infer_diffusers_model_type(checkpoint=checkpoint) + + global_step = checkpoint["global_step"] if "global_step" in checkpoint else None + + if original_config: + num_train_timesteps = getattr(original_config["model"]["params"], "timesteps", 1000) + else: + num_train_timesteps = 1000 + + scheduler_config["num_train_timesteps"] = num_train_timesteps + + if model_type == "v2": + if prediction_type is None: + # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"` # as it relies on a brittle global step parameter here + prediction_type = "epsilon" if global_step == 875000 else "v_prediction" + + else: + prediction_type = prediction_type or "epsilon" + + scheduler_config["prediction_type"] = prediction_type + + if model_type in ["xl_base", "xl_refiner"]: + scheduler_type = "euler" + elif model_type == "playground": + scheduler_type = "edm_dpm_solver_multistep" + else: + if original_config: + beta_start = original_config["model"]["params"].get("linear_start") + beta_end = original_config["model"]["params"].get("linear_end") + + else: + beta_start = 0.02 + beta_end = 0.085 + + scheduler_config["beta_start"] = beta_start + scheduler_config["beta_end"] = beta_end + scheduler_config["beta_schedule"] = "scaled_linear" + scheduler_config["clip_sample"] = False + scheduler_config["set_alpha_to_one"] = False + + # to deal with an edge case StableDiffusionUpscale pipeline has two schedulers + if component_name == "low_res_scheduler": + return cls.from_config( + { + "beta_end": 0.02, + "beta_schedule": "scaled_linear", + "beta_start": 0.0001, + "clip_sample": True, + "num_train_timesteps": 1000, + "prediction_type": "epsilon", + "trained_betas": None, + "variance_type": "fixed_small", + } + ) + + if scheduler_type is None: + return cls.from_config(scheduler_config) + + elif scheduler_type == "pndm": + scheduler_config["skip_prk_steps"] = True + scheduler = PNDMScheduler.from_config(scheduler_config) + + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(scheduler_config) + + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(scheduler_config) + + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(scheduler_config) + + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler_config) + + elif scheduler_type == "dpm": + scheduler = DPMSolverMultistepScheduler.from_config(scheduler_config) + + elif scheduler_type == "ddim": + scheduler = DDIMScheduler.from_config(scheduler_config) + + elif scheduler_type == "edm_dpm_solver_multistep": + scheduler_config = { + "algorithm_type": "dpmsolver++", + "dynamic_thresholding_ratio": 0.995, + "euler_at_final": False, + "final_sigmas_type": "zero", + "lower_order_final": True, + "num_train_timesteps": 1000, + "prediction_type": "epsilon", + "rho": 7.0, + "sample_max_value": 1.0, + "sigma_data": 0.5, + "sigma_max": 80.0, + "sigma_min": 0.002, + "solver_order": 2, + "solver_type": "midpoint", + "thresholding": False, + } + scheduler = EDMDPMSolverMultistepScheduler(**scheduler_config) + + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + + return scheduler + + +def _legacy_load_clip_tokenizer(cls, checkpoint, config=None, local_files_only=False): + if config: + config = {"pretrained_model_name_or_path": config} + else: + config = fetch_diffusers_config(checkpoint) + + if is_clip_model(checkpoint) or is_clip_sdxl_model(checkpoint): + clip_config = "openai/clip-vit-large-patch14" + config["pretrained_model_name_or_path"] = clip_config + subfolder = "" + + elif is_open_clip_model(checkpoint): + clip_config = "stabilityai/stable-diffusion-2" + config["pretrained_model_name_or_path"] = clip_config + subfolder = "tokenizer" + + else: + clip_config = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" + config["pretrained_model_name_or_path"] = clip_config + subfolder = "" + + tokenizer = cls.from_pretrained(**config, subfolder=subfolder, local_files_only=local_files_only) + + return tokenizer + + +def _legacy_load_safety_checker(local_files_only, torch_dtype): + # Support for loading safety checker components using the deprecated + # `load_safety_checker` argument. + + from ..pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker + + feature_extractor = AutoImageProcessor.from_pretrained( + "CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only, torch_dtype=torch_dtype + ) + safety_checker = StableDiffusionSafetyChecker.from_pretrained( + "CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only, torch_dtype=torch_dtype + ) + + return {"safety_checker": safety_checker, "feature_extractor": feature_extractor} + + +# in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; +# while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation +def swap_scale_shift(weight, dim): + shift, scale = weight.chunk(2, dim=0) + new_weight = torch.cat([scale, shift], dim=0) + return new_weight + + +def swap_proj_gate(weight): + proj, gate = weight.chunk(2, dim=0) + new_weight = torch.cat([gate, proj], dim=0) + return new_weight + + +def get_attn2_layers(state_dict): + attn2_layers = [] + for key in state_dict.keys(): + if "attn2." in key: + # Extract the layer number from the key + layer_num = int(key.split(".")[1]) + attn2_layers.append(layer_num) + + return tuple(sorted(set(attn2_layers))) + + +def get_caption_projection_dim(state_dict): + caption_projection_dim = state_dict["context_embedder.weight"].shape[0] + return caption_projection_dim + + +def convert_sd3_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + keys = list(checkpoint.keys()) + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + num_layers = list(set(int(k.split(".", 2)[1]) for k in checkpoint if "joint_blocks" in k))[-1] + 1 # noqa: C401 + dual_attention_layers = get_attn2_layers(checkpoint) + + caption_projection_dim = get_caption_projection_dim(checkpoint) + has_qk_norm = any("ln_q" in key for key in checkpoint.keys()) + + # Positional and patch embeddings. + converted_state_dict["pos_embed.pos_embed"] = checkpoint.pop("pos_embed") + converted_state_dict["pos_embed.proj.weight"] = checkpoint.pop("x_embedder.proj.weight") + converted_state_dict["pos_embed.proj.bias"] = checkpoint.pop("x_embedder.proj.bias") + + # Timestep embeddings. + converted_state_dict["time_text_embed.timestep_embedder.linear_1.weight"] = checkpoint.pop( + "t_embedder.mlp.0.weight" + ) + converted_state_dict["time_text_embed.timestep_embedder.linear_1.bias"] = checkpoint.pop("t_embedder.mlp.0.bias") + converted_state_dict["time_text_embed.timestep_embedder.linear_2.weight"] = checkpoint.pop( + "t_embedder.mlp.2.weight" + ) + converted_state_dict["time_text_embed.timestep_embedder.linear_2.bias"] = checkpoint.pop("t_embedder.mlp.2.bias") + + # Context projections. + converted_state_dict["context_embedder.weight"] = checkpoint.pop("context_embedder.weight") + converted_state_dict["context_embedder.bias"] = checkpoint.pop("context_embedder.bias") + + # Pooled context projection. + converted_state_dict["time_text_embed.text_embedder.linear_1.weight"] = checkpoint.pop("y_embedder.mlp.0.weight") + converted_state_dict["time_text_embed.text_embedder.linear_1.bias"] = checkpoint.pop("y_embedder.mlp.0.bias") + converted_state_dict["time_text_embed.text_embedder.linear_2.weight"] = checkpoint.pop("y_embedder.mlp.2.weight") + converted_state_dict["time_text_embed.text_embedder.linear_2.bias"] = checkpoint.pop("y_embedder.mlp.2.bias") + + # Transformer blocks 🎸. + for i in range(num_layers): + # Q, K, V + sample_q, sample_k, sample_v = torch.chunk( + checkpoint.pop(f"joint_blocks.{i}.x_block.attn.qkv.weight"), 3, dim=0 + ) + context_q, context_k, context_v = torch.chunk( + checkpoint.pop(f"joint_blocks.{i}.context_block.attn.qkv.weight"), 3, dim=0 + ) + sample_q_bias, sample_k_bias, sample_v_bias = torch.chunk( + checkpoint.pop(f"joint_blocks.{i}.x_block.attn.qkv.bias"), 3, dim=0 + ) + context_q_bias, context_k_bias, context_v_bias = torch.chunk( + checkpoint.pop(f"joint_blocks.{i}.context_block.attn.qkv.bias"), 3, dim=0 + ) + + converted_state_dict[f"transformer_blocks.{i}.attn.to_q.weight"] = torch.cat([sample_q]) + converted_state_dict[f"transformer_blocks.{i}.attn.to_q.bias"] = torch.cat([sample_q_bias]) + converted_state_dict[f"transformer_blocks.{i}.attn.to_k.weight"] = torch.cat([sample_k]) + converted_state_dict[f"transformer_blocks.{i}.attn.to_k.bias"] = torch.cat([sample_k_bias]) + converted_state_dict[f"transformer_blocks.{i}.attn.to_v.weight"] = torch.cat([sample_v]) + converted_state_dict[f"transformer_blocks.{i}.attn.to_v.bias"] = torch.cat([sample_v_bias]) + + converted_state_dict[f"transformer_blocks.{i}.attn.add_q_proj.weight"] = torch.cat([context_q]) + converted_state_dict[f"transformer_blocks.{i}.attn.add_q_proj.bias"] = torch.cat([context_q_bias]) + converted_state_dict[f"transformer_blocks.{i}.attn.add_k_proj.weight"] = torch.cat([context_k]) + converted_state_dict[f"transformer_blocks.{i}.attn.add_k_proj.bias"] = torch.cat([context_k_bias]) + converted_state_dict[f"transformer_blocks.{i}.attn.add_v_proj.weight"] = torch.cat([context_v]) + converted_state_dict[f"transformer_blocks.{i}.attn.add_v_proj.bias"] = torch.cat([context_v_bias]) + + # qk norm + if has_qk_norm: + converted_state_dict[f"transformer_blocks.{i}.attn.norm_q.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.attn.ln_q.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn.norm_k.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.attn.ln_k.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn.norm_added_q.weight"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.attn.ln_q.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn.norm_added_k.weight"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.attn.ln_k.weight" + ) + + # output projections. + converted_state_dict[f"transformer_blocks.{i}.attn.to_out.0.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.attn.proj.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn.to_out.0.bias"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.attn.proj.bias" + ) + if not (i == num_layers - 1): + converted_state_dict[f"transformer_blocks.{i}.attn.to_add_out.weight"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.attn.proj.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn.to_add_out.bias"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.attn.proj.bias" + ) + + if i in dual_attention_layers: + # Q, K, V + sample_q2, sample_k2, sample_v2 = torch.chunk( + checkpoint.pop(f"joint_blocks.{i}.x_block.attn2.qkv.weight"), 3, dim=0 + ) + sample_q2_bias, sample_k2_bias, sample_v2_bias = torch.chunk( + checkpoint.pop(f"joint_blocks.{i}.x_block.attn2.qkv.bias"), 3, dim=0 + ) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_q.weight"] = torch.cat([sample_q2]) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_q.bias"] = torch.cat([sample_q2_bias]) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_k.weight"] = torch.cat([sample_k2]) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_k.bias"] = torch.cat([sample_k2_bias]) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_v.weight"] = torch.cat([sample_v2]) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_v.bias"] = torch.cat([sample_v2_bias]) + + # qk norm + if has_qk_norm: + converted_state_dict[f"transformer_blocks.{i}.attn2.norm_q.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.attn2.ln_q.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn2.norm_k.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.attn2.ln_k.weight" + ) + + # output projections. + converted_state_dict[f"transformer_blocks.{i}.attn2.to_out.0.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.attn2.proj.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_out.0.bias"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.attn2.proj.bias" + ) + + # norms. + converted_state_dict[f"transformer_blocks.{i}.norm1.linear.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.adaLN_modulation.1.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.norm1.linear.bias"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.adaLN_modulation.1.bias" + ) + if not (i == num_layers - 1): + converted_state_dict[f"transformer_blocks.{i}.norm1_context.linear.weight"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.adaLN_modulation.1.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.norm1_context.linear.bias"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.adaLN_modulation.1.bias" + ) + else: + converted_state_dict[f"transformer_blocks.{i}.norm1_context.linear.weight"] = swap_scale_shift( + checkpoint.pop(f"joint_blocks.{i}.context_block.adaLN_modulation.1.weight"), + dim=caption_projection_dim, + ) + converted_state_dict[f"transformer_blocks.{i}.norm1_context.linear.bias"] = swap_scale_shift( + checkpoint.pop(f"joint_blocks.{i}.context_block.adaLN_modulation.1.bias"), + dim=caption_projection_dim, + ) + + # ffs. + converted_state_dict[f"transformer_blocks.{i}.ff.net.0.proj.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.mlp.fc1.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.ff.net.0.proj.bias"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.mlp.fc1.bias" + ) + converted_state_dict[f"transformer_blocks.{i}.ff.net.2.weight"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.mlp.fc2.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.ff.net.2.bias"] = checkpoint.pop( + f"joint_blocks.{i}.x_block.mlp.fc2.bias" + ) + if not (i == num_layers - 1): + converted_state_dict[f"transformer_blocks.{i}.ff_context.net.0.proj.weight"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.mlp.fc1.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.ff_context.net.0.proj.bias"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.mlp.fc1.bias" + ) + converted_state_dict[f"transformer_blocks.{i}.ff_context.net.2.weight"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.mlp.fc2.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.ff_context.net.2.bias"] = checkpoint.pop( + f"joint_blocks.{i}.context_block.mlp.fc2.bias" + ) + + # Final blocks. + converted_state_dict["proj_out.weight"] = checkpoint.pop("final_layer.linear.weight") + converted_state_dict["proj_out.bias"] = checkpoint.pop("final_layer.linear.bias") + converted_state_dict["norm_out.linear.weight"] = swap_scale_shift( + checkpoint.pop("final_layer.adaLN_modulation.1.weight"), dim=caption_projection_dim + ) + converted_state_dict["norm_out.linear.bias"] = swap_scale_shift( + checkpoint.pop("final_layer.adaLN_modulation.1.bias"), dim=caption_projection_dim + ) + + return converted_state_dict + + +def is_t5_in_single_file(checkpoint): + if "text_encoders.t5xxl.transformer.shared.weight" in checkpoint: + return True + + return False + + +def convert_sd3_t5_checkpoint_to_diffusers(checkpoint): + keys = list(checkpoint.keys()) + text_model_dict = {} + + remove_prefixes = ["text_encoders.t5xxl.transformer."] + + for key in keys: + for prefix in remove_prefixes: + if key.startswith(prefix): + diffusers_key = key.replace(prefix, "") + text_model_dict[diffusers_key] = checkpoint.get(key) + + return text_model_dict + + +def create_diffusers_t5_model_from_checkpoint( + cls, + checkpoint, + subfolder="", + config=None, + torch_dtype=None, + local_files_only=None, +): + if config: + config = {"pretrained_model_name_or_path": config} + else: + config = fetch_diffusers_config(checkpoint) + + model_config = cls.config_class.from_pretrained(**config, subfolder=subfolder, local_files_only=local_files_only) + ctx = init_empty_weights if is_accelerate_available() else nullcontext + with ctx(): + model = cls(model_config) + + diffusers_format_checkpoint = convert_sd3_t5_checkpoint_to_diffusers(checkpoint) + + if is_accelerate_available(): + load_model_dict_into_meta(model, diffusers_format_checkpoint, dtype=torch_dtype) + empty_device_cache() + else: + model.load_state_dict(diffusers_format_checkpoint) + + use_keep_in_fp32_modules = (cls._keep_in_fp32_modules is not None) and (torch_dtype == torch.float16) + if use_keep_in_fp32_modules: + keep_in_fp32_modules = model._keep_in_fp32_modules + else: + keep_in_fp32_modules = [] + + if keep_in_fp32_modules is not None: + for name, param in model.named_parameters(): + if any(module_to_keep_in_fp32 in name.split(".") for module_to_keep_in_fp32 in keep_in_fp32_modules): + # param = param.to(torch.float32) does not work here as only in the local scope. + param.data = param.data.to(torch.float32) + + return model + + +def convert_animatediff_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + for k, v in checkpoint.items(): + if "pos_encoder" in k: + continue + + else: + converted_state_dict[ + k.replace(".norms.0", ".norm1") + .replace(".norms.1", ".norm2") + .replace(".ff_norm", ".norm3") + .replace(".attention_blocks.0", ".attn1") + .replace(".attention_blocks.1", ".attn2") + .replace(".temporal_transformer", "") + ] = v + + return converted_state_dict + + +def convert_flux_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + keys = list(checkpoint.keys()) + + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + num_layers = list(set(int(k.split(".", 2)[1]) for k in checkpoint if "double_blocks." in k))[-1] + 1 # noqa: C401 + num_single_layers = list(set(int(k.split(".", 2)[1]) for k in checkpoint if "single_blocks." in k))[-1] + 1 # noqa: C401 + mlp_ratio = 4.0 + inner_dim = 3072 + + # in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; + # while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation + def swap_scale_shift(weight): + shift, scale = weight.chunk(2, dim=0) + new_weight = torch.cat([scale, shift], dim=0) + return new_weight + + ## time_text_embed.timestep_embedder <- time_in + converted_state_dict["time_text_embed.timestep_embedder.linear_1.weight"] = checkpoint.pop( + "time_in.in_layer.weight" + ) + converted_state_dict["time_text_embed.timestep_embedder.linear_1.bias"] = checkpoint.pop("time_in.in_layer.bias") + converted_state_dict["time_text_embed.timestep_embedder.linear_2.weight"] = checkpoint.pop( + "time_in.out_layer.weight" + ) + converted_state_dict["time_text_embed.timestep_embedder.linear_2.bias"] = checkpoint.pop("time_in.out_layer.bias") + + ## time_text_embed.text_embedder <- vector_in + converted_state_dict["time_text_embed.text_embedder.linear_1.weight"] = checkpoint.pop("vector_in.in_layer.weight") + converted_state_dict["time_text_embed.text_embedder.linear_1.bias"] = checkpoint.pop("vector_in.in_layer.bias") + converted_state_dict["time_text_embed.text_embedder.linear_2.weight"] = checkpoint.pop( + "vector_in.out_layer.weight" + ) + converted_state_dict["time_text_embed.text_embedder.linear_2.bias"] = checkpoint.pop("vector_in.out_layer.bias") + + # guidance + has_guidance = any("guidance" in k for k in checkpoint) + if has_guidance: + converted_state_dict["time_text_embed.guidance_embedder.linear_1.weight"] = checkpoint.pop( + "guidance_in.in_layer.weight" + ) + converted_state_dict["time_text_embed.guidance_embedder.linear_1.bias"] = checkpoint.pop( + "guidance_in.in_layer.bias" + ) + converted_state_dict["time_text_embed.guidance_embedder.linear_2.weight"] = checkpoint.pop( + "guidance_in.out_layer.weight" + ) + converted_state_dict["time_text_embed.guidance_embedder.linear_2.bias"] = checkpoint.pop( + "guidance_in.out_layer.bias" + ) + + # context_embedder + converted_state_dict["context_embedder.weight"] = checkpoint.pop("txt_in.weight") + converted_state_dict["context_embedder.bias"] = checkpoint.pop("txt_in.bias") + + # x_embedder + converted_state_dict["x_embedder.weight"] = checkpoint.pop("img_in.weight") + converted_state_dict["x_embedder.bias"] = checkpoint.pop("img_in.bias") + + # double transformer blocks + for i in range(num_layers): + block_prefix = f"transformer_blocks.{i}." + # norms. + ## norm1 + converted_state_dict[f"{block_prefix}norm1.linear.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_mod.lin.weight" + ) + converted_state_dict[f"{block_prefix}norm1.linear.bias"] = checkpoint.pop( + f"double_blocks.{i}.img_mod.lin.bias" + ) + ## norm1_context + converted_state_dict[f"{block_prefix}norm1_context.linear.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_mod.lin.weight" + ) + converted_state_dict[f"{block_prefix}norm1_context.linear.bias"] = checkpoint.pop( + f"double_blocks.{i}.txt_mod.lin.bias" + ) + # Q, K, V + sample_q, sample_k, sample_v = torch.chunk(checkpoint.pop(f"double_blocks.{i}.img_attn.qkv.weight"), 3, dim=0) + context_q, context_k, context_v = torch.chunk( + checkpoint.pop(f"double_blocks.{i}.txt_attn.qkv.weight"), 3, dim=0 + ) + sample_q_bias, sample_k_bias, sample_v_bias = torch.chunk( + checkpoint.pop(f"double_blocks.{i}.img_attn.qkv.bias"), 3, dim=0 + ) + context_q_bias, context_k_bias, context_v_bias = torch.chunk( + checkpoint.pop(f"double_blocks.{i}.txt_attn.qkv.bias"), 3, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.to_q.weight"] = torch.cat([sample_q]) + converted_state_dict[f"{block_prefix}attn.to_q.bias"] = torch.cat([sample_q_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.weight"] = torch.cat([sample_k]) + converted_state_dict[f"{block_prefix}attn.to_k.bias"] = torch.cat([sample_k_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.weight"] = torch.cat([sample_v]) + converted_state_dict[f"{block_prefix}attn.to_v.bias"] = torch.cat([sample_v_bias]) + converted_state_dict[f"{block_prefix}attn.add_q_proj.weight"] = torch.cat([context_q]) + converted_state_dict[f"{block_prefix}attn.add_q_proj.bias"] = torch.cat([context_q_bias]) + converted_state_dict[f"{block_prefix}attn.add_k_proj.weight"] = torch.cat([context_k]) + converted_state_dict[f"{block_prefix}attn.add_k_proj.bias"] = torch.cat([context_k_bias]) + converted_state_dict[f"{block_prefix}attn.add_v_proj.weight"] = torch.cat([context_v]) + converted_state_dict[f"{block_prefix}attn.add_v_proj.bias"] = torch.cat([context_v_bias]) + # qk_norm + converted_state_dict[f"{block_prefix}attn.norm_q.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_attn.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_k.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_attn.norm.key_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_added_q.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_attn.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_added_k.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_attn.norm.key_norm.scale" + ) + # ff img_mlp + converted_state_dict[f"{block_prefix}ff.net.0.proj.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_mlp.0.weight" + ) + converted_state_dict[f"{block_prefix}ff.net.0.proj.bias"] = checkpoint.pop(f"double_blocks.{i}.img_mlp.0.bias") + converted_state_dict[f"{block_prefix}ff.net.2.weight"] = checkpoint.pop(f"double_blocks.{i}.img_mlp.2.weight") + converted_state_dict[f"{block_prefix}ff.net.2.bias"] = checkpoint.pop(f"double_blocks.{i}.img_mlp.2.bias") + converted_state_dict[f"{block_prefix}ff_context.net.0.proj.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_mlp.0.weight" + ) + converted_state_dict[f"{block_prefix}ff_context.net.0.proj.bias"] = checkpoint.pop( + f"double_blocks.{i}.txt_mlp.0.bias" + ) + converted_state_dict[f"{block_prefix}ff_context.net.2.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_mlp.2.weight" + ) + converted_state_dict[f"{block_prefix}ff_context.net.2.bias"] = checkpoint.pop( + f"double_blocks.{i}.txt_mlp.2.bias" + ) + # output projections. + converted_state_dict[f"{block_prefix}attn.to_out.0.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_attn.proj.weight" + ) + converted_state_dict[f"{block_prefix}attn.to_out.0.bias"] = checkpoint.pop( + f"double_blocks.{i}.img_attn.proj.bias" + ) + converted_state_dict[f"{block_prefix}attn.to_add_out.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_attn.proj.weight" + ) + converted_state_dict[f"{block_prefix}attn.to_add_out.bias"] = checkpoint.pop( + f"double_blocks.{i}.txt_attn.proj.bias" + ) + + # single transformer blocks + for i in range(num_single_layers): + block_prefix = f"single_transformer_blocks.{i}." + # norm.linear <- single_blocks.0.modulation.lin + converted_state_dict[f"{block_prefix}norm.linear.weight"] = checkpoint.pop( + f"single_blocks.{i}.modulation.lin.weight" + ) + converted_state_dict[f"{block_prefix}norm.linear.bias"] = checkpoint.pop( + f"single_blocks.{i}.modulation.lin.bias" + ) + # Q, K, V, mlp + mlp_hidden_dim = int(inner_dim * mlp_ratio) + split_size = (inner_dim, inner_dim, inner_dim, mlp_hidden_dim) + q, k, v, mlp = torch.split(checkpoint.pop(f"single_blocks.{i}.linear1.weight"), split_size, dim=0) + q_bias, k_bias, v_bias, mlp_bias = torch.split( + checkpoint.pop(f"single_blocks.{i}.linear1.bias"), split_size, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.to_q.weight"] = torch.cat([q]) + converted_state_dict[f"{block_prefix}attn.to_q.bias"] = torch.cat([q_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.weight"] = torch.cat([k]) + converted_state_dict[f"{block_prefix}attn.to_k.bias"] = torch.cat([k_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.weight"] = torch.cat([v]) + converted_state_dict[f"{block_prefix}attn.to_v.bias"] = torch.cat([v_bias]) + converted_state_dict[f"{block_prefix}proj_mlp.weight"] = torch.cat([mlp]) + converted_state_dict[f"{block_prefix}proj_mlp.bias"] = torch.cat([mlp_bias]) + # qk norm + converted_state_dict[f"{block_prefix}attn.norm_q.weight"] = checkpoint.pop( + f"single_blocks.{i}.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_k.weight"] = checkpoint.pop( + f"single_blocks.{i}.norm.key_norm.scale" + ) + # output projections. + converted_state_dict[f"{block_prefix}proj_out.weight"] = checkpoint.pop(f"single_blocks.{i}.linear2.weight") + converted_state_dict[f"{block_prefix}proj_out.bias"] = checkpoint.pop(f"single_blocks.{i}.linear2.bias") + + converted_state_dict["proj_out.weight"] = checkpoint.pop("final_layer.linear.weight") + converted_state_dict["proj_out.bias"] = checkpoint.pop("final_layer.linear.bias") + converted_state_dict["norm_out.linear.weight"] = swap_scale_shift( + checkpoint.pop("final_layer.adaLN_modulation.1.weight") + ) + converted_state_dict["norm_out.linear.bias"] = swap_scale_shift( + checkpoint.pop("final_layer.adaLN_modulation.1.bias") + ) + + return converted_state_dict + + +def convert_ltx_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys()) if "vae" not in key} + + TRANSFORMER_KEYS_RENAME_DICT = { + "model.diffusion_model.": "", + "patchify_proj": "proj_in", + "adaln_single": "time_embed", + "q_norm": "norm_q", + "k_norm": "norm_k", + } + + TRANSFORMER_SPECIAL_KEYS_REMAP = {} + + for key in list(converted_state_dict.keys()): + new_key = key + for replace_key, rename_key in TRANSFORMER_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + converted_state_dict[new_key] = converted_state_dict.pop(key) + + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in TRANSFORMER_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + return converted_state_dict + + +def convert_ltx_vae_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys()) if "vae." in key} + + def remove_keys_(key: str, state_dict): + state_dict.pop(key) + + VAE_KEYS_RENAME_DICT = { + # common + "vae.": "", + # decoder + "up_blocks.0": "mid_block", + "up_blocks.1": "up_blocks.0", + "up_blocks.2": "up_blocks.1.upsamplers.0", + "up_blocks.3": "up_blocks.1", + "up_blocks.4": "up_blocks.2.conv_in", + "up_blocks.5": "up_blocks.2.upsamplers.0", + "up_blocks.6": "up_blocks.2", + "up_blocks.7": "up_blocks.3.conv_in", + "up_blocks.8": "up_blocks.3.upsamplers.0", + "up_blocks.9": "up_blocks.3", + # encoder + "down_blocks.0": "down_blocks.0", + "down_blocks.1": "down_blocks.0.downsamplers.0", + "down_blocks.2": "down_blocks.0.conv_out", + "down_blocks.3": "down_blocks.1", + "down_blocks.4": "down_blocks.1.downsamplers.0", + "down_blocks.5": "down_blocks.1.conv_out", + "down_blocks.6": "down_blocks.2", + "down_blocks.7": "down_blocks.2.downsamplers.0", + "down_blocks.8": "down_blocks.3", + "down_blocks.9": "mid_block", + # common + "conv_shortcut": "conv_shortcut.conv", + "res_blocks": "resnets", + "norm3.norm": "norm3", + "per_channel_statistics.mean-of-means": "latents_mean", + "per_channel_statistics.std-of-means": "latents_std", + } + + VAE_091_RENAME_DICT = { + # decoder + "up_blocks.0": "mid_block", + "up_blocks.1": "up_blocks.0.upsamplers.0", + "up_blocks.2": "up_blocks.0", + "up_blocks.3": "up_blocks.1.upsamplers.0", + "up_blocks.4": "up_blocks.1", + "up_blocks.5": "up_blocks.2.upsamplers.0", + "up_blocks.6": "up_blocks.2", + "up_blocks.7": "up_blocks.3.upsamplers.0", + "up_blocks.8": "up_blocks.3", + # common + "last_time_embedder": "time_embedder", + "last_scale_shift_table": "scale_shift_table", + } + + VAE_095_RENAME_DICT = { + # decoder + "up_blocks.0": "mid_block", + "up_blocks.1": "up_blocks.0.upsamplers.0", + "up_blocks.2": "up_blocks.0", + "up_blocks.3": "up_blocks.1.upsamplers.0", + "up_blocks.4": "up_blocks.1", + "up_blocks.5": "up_blocks.2.upsamplers.0", + "up_blocks.6": "up_blocks.2", + "up_blocks.7": "up_blocks.3.upsamplers.0", + "up_blocks.8": "up_blocks.3", + # encoder + "down_blocks.0": "down_blocks.0", + "down_blocks.1": "down_blocks.0.downsamplers.0", + "down_blocks.2": "down_blocks.1", + "down_blocks.3": "down_blocks.1.downsamplers.0", + "down_blocks.4": "down_blocks.2", + "down_blocks.5": "down_blocks.2.downsamplers.0", + "down_blocks.6": "down_blocks.3", + "down_blocks.7": "down_blocks.3.downsamplers.0", + "down_blocks.8": "mid_block", + # common + "last_time_embedder": "time_embedder", + "last_scale_shift_table": "scale_shift_table", + } + + VAE_SPECIAL_KEYS_REMAP = { + "per_channel_statistics.channel": remove_keys_, + "per_channel_statistics.mean-of-means": remove_keys_, + "per_channel_statistics.mean-of-stds": remove_keys_, + } + + if converted_state_dict["vae.encoder.conv_out.conv.weight"].shape[1] == 2048: + VAE_KEYS_RENAME_DICT.update(VAE_095_RENAME_DICT) + elif "vae.decoder.last_time_embedder.timestep_embedder.linear_1.weight" in converted_state_dict: + VAE_KEYS_RENAME_DICT.update(VAE_091_RENAME_DICT) + + for key in list(converted_state_dict.keys()): + new_key = key + for replace_key, rename_key in VAE_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + converted_state_dict[new_key] = converted_state_dict.pop(key) + + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in VAE_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + return converted_state_dict + + +def convert_autoencoder_dc_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys())} + + def remap_qkv_(key: str, state_dict): + qkv = state_dict.pop(key) + q, k, v = torch.chunk(qkv, 3, dim=0) + parent_module, _, _ = key.rpartition(".qkv.conv.weight") + state_dict[f"{parent_module}.to_q.weight"] = q.squeeze() + state_dict[f"{parent_module}.to_k.weight"] = k.squeeze() + state_dict[f"{parent_module}.to_v.weight"] = v.squeeze() + + def remap_proj_conv_(key: str, state_dict): + parent_module, _, _ = key.rpartition(".proj.conv.weight") + state_dict[f"{parent_module}.to_out.weight"] = state_dict.pop(key).squeeze() + + AE_KEYS_RENAME_DICT = { + # common + "main.": "", + "op_list.": "", + "context_module": "attn", + "local_module": "conv_out", + # NOTE: The below two lines work because scales in the available configs only have a tuple length of 1 + # If there were more scales, there would be more layers, so a loop would be better to handle this + "aggreg.0.0": "to_qkv_multiscale.0.proj_in", + "aggreg.0.1": "to_qkv_multiscale.0.proj_out", + "depth_conv.conv": "conv_depth", + "inverted_conv.conv": "conv_inverted", + "point_conv.conv": "conv_point", + "point_conv.norm": "norm", + "conv.conv.": "conv.", + "conv1.conv": "conv1", + "conv2.conv": "conv2", + "conv2.norm": "norm", + "proj.norm": "norm_out", + # encoder + "encoder.project_in.conv": "encoder.conv_in", + "encoder.project_out.0.conv": "encoder.conv_out", + "encoder.stages": "encoder.down_blocks", + # decoder + "decoder.project_in.conv": "decoder.conv_in", + "decoder.project_out.0": "decoder.norm_out", + "decoder.project_out.2.conv": "decoder.conv_out", + "decoder.stages": "decoder.up_blocks", + } + + AE_F32C32_F64C128_F128C512_KEYS = { + "encoder.project_in.conv": "encoder.conv_in.conv", + "decoder.project_out.2.conv": "decoder.conv_out.conv", + } + + AE_SPECIAL_KEYS_REMAP = { + "qkv.conv.weight": remap_qkv_, + "proj.conv.weight": remap_proj_conv_, + } + if "encoder.project_in.conv.bias" not in converted_state_dict: + AE_KEYS_RENAME_DICT.update(AE_F32C32_F64C128_F128C512_KEYS) + + for key in list(converted_state_dict.keys()): + new_key = key[:] + for replace_key, rename_key in AE_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + converted_state_dict[new_key] = converted_state_dict.pop(key) + + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in AE_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + return converted_state_dict + + +def convert_mochi_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + + # Comfy checkpoints add this prefix + keys = list(checkpoint.keys()) + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + # Convert patch_embed + converted_state_dict["patch_embed.proj.weight"] = checkpoint.pop("x_embedder.proj.weight") + converted_state_dict["patch_embed.proj.bias"] = checkpoint.pop("x_embedder.proj.bias") + + # Convert time_embed + converted_state_dict["time_embed.timestep_embedder.linear_1.weight"] = checkpoint.pop("t_embedder.mlp.0.weight") + converted_state_dict["time_embed.timestep_embedder.linear_1.bias"] = checkpoint.pop("t_embedder.mlp.0.bias") + converted_state_dict["time_embed.timestep_embedder.linear_2.weight"] = checkpoint.pop("t_embedder.mlp.2.weight") + converted_state_dict["time_embed.timestep_embedder.linear_2.bias"] = checkpoint.pop("t_embedder.mlp.2.bias") + converted_state_dict["time_embed.pooler.to_kv.weight"] = checkpoint.pop("t5_y_embedder.to_kv.weight") + converted_state_dict["time_embed.pooler.to_kv.bias"] = checkpoint.pop("t5_y_embedder.to_kv.bias") + converted_state_dict["time_embed.pooler.to_q.weight"] = checkpoint.pop("t5_y_embedder.to_q.weight") + converted_state_dict["time_embed.pooler.to_q.bias"] = checkpoint.pop("t5_y_embedder.to_q.bias") + converted_state_dict["time_embed.pooler.to_out.weight"] = checkpoint.pop("t5_y_embedder.to_out.weight") + converted_state_dict["time_embed.pooler.to_out.bias"] = checkpoint.pop("t5_y_embedder.to_out.bias") + converted_state_dict["time_embed.caption_proj.weight"] = checkpoint.pop("t5_yproj.weight") + converted_state_dict["time_embed.caption_proj.bias"] = checkpoint.pop("t5_yproj.bias") + + # Convert transformer blocks + num_layers = 48 + for i in range(num_layers): + block_prefix = f"transformer_blocks.{i}." + old_prefix = f"blocks.{i}." + + # norm1 + converted_state_dict[block_prefix + "norm1.linear.weight"] = checkpoint.pop(old_prefix + "mod_x.weight") + converted_state_dict[block_prefix + "norm1.linear.bias"] = checkpoint.pop(old_prefix + "mod_x.bias") + if i < num_layers - 1: + converted_state_dict[block_prefix + "norm1_context.linear.weight"] = checkpoint.pop( + old_prefix + "mod_y.weight" + ) + converted_state_dict[block_prefix + "norm1_context.linear.bias"] = checkpoint.pop( + old_prefix + "mod_y.bias" + ) + else: + converted_state_dict[block_prefix + "norm1_context.linear_1.weight"] = checkpoint.pop( + old_prefix + "mod_y.weight" + ) + converted_state_dict[block_prefix + "norm1_context.linear_1.bias"] = checkpoint.pop( + old_prefix + "mod_y.bias" + ) + + # Visual attention + qkv_weight = checkpoint.pop(old_prefix + "attn.qkv_x.weight") + q, k, v = qkv_weight.chunk(3, dim=0) + + converted_state_dict[block_prefix + "attn1.to_q.weight"] = q + converted_state_dict[block_prefix + "attn1.to_k.weight"] = k + converted_state_dict[block_prefix + "attn1.to_v.weight"] = v + converted_state_dict[block_prefix + "attn1.norm_q.weight"] = checkpoint.pop( + old_prefix + "attn.q_norm_x.weight" + ) + converted_state_dict[block_prefix + "attn1.norm_k.weight"] = checkpoint.pop( + old_prefix + "attn.k_norm_x.weight" + ) + converted_state_dict[block_prefix + "attn1.to_out.0.weight"] = checkpoint.pop( + old_prefix + "attn.proj_x.weight" + ) + converted_state_dict[block_prefix + "attn1.to_out.0.bias"] = checkpoint.pop(old_prefix + "attn.proj_x.bias") + + # Context attention + qkv_weight = checkpoint.pop(old_prefix + "attn.qkv_y.weight") + q, k, v = qkv_weight.chunk(3, dim=0) + + converted_state_dict[block_prefix + "attn1.add_q_proj.weight"] = q + converted_state_dict[block_prefix + "attn1.add_k_proj.weight"] = k + converted_state_dict[block_prefix + "attn1.add_v_proj.weight"] = v + converted_state_dict[block_prefix + "attn1.norm_added_q.weight"] = checkpoint.pop( + old_prefix + "attn.q_norm_y.weight" + ) + converted_state_dict[block_prefix + "attn1.norm_added_k.weight"] = checkpoint.pop( + old_prefix + "attn.k_norm_y.weight" + ) + if i < num_layers - 1: + converted_state_dict[block_prefix + "attn1.to_add_out.weight"] = checkpoint.pop( + old_prefix + "attn.proj_y.weight" + ) + converted_state_dict[block_prefix + "attn1.to_add_out.bias"] = checkpoint.pop( + old_prefix + "attn.proj_y.bias" + ) + + # MLP + converted_state_dict[block_prefix + "ff.net.0.proj.weight"] = swap_proj_gate( + checkpoint.pop(old_prefix + "mlp_x.w1.weight") + ) + converted_state_dict[block_prefix + "ff.net.2.weight"] = checkpoint.pop(old_prefix + "mlp_x.w2.weight") + if i < num_layers - 1: + converted_state_dict[block_prefix + "ff_context.net.0.proj.weight"] = swap_proj_gate( + checkpoint.pop(old_prefix + "mlp_y.w1.weight") + ) + converted_state_dict[block_prefix + "ff_context.net.2.weight"] = checkpoint.pop( + old_prefix + "mlp_y.w2.weight" + ) + + # Output layers + converted_state_dict["norm_out.linear.weight"] = swap_scale_shift(checkpoint.pop("final_layer.mod.weight"), dim=0) + converted_state_dict["norm_out.linear.bias"] = swap_scale_shift(checkpoint.pop("final_layer.mod.bias"), dim=0) + converted_state_dict["proj_out.weight"] = checkpoint.pop("final_layer.linear.weight") + converted_state_dict["proj_out.bias"] = checkpoint.pop("final_layer.linear.bias") + + converted_state_dict["pos_frequencies"] = checkpoint.pop("pos_frequencies") + + return converted_state_dict + + +def convert_hunyuan_video_transformer_to_diffusers(checkpoint, **kwargs): + def remap_norm_scale_shift_(key, state_dict): + weight = state_dict.pop(key) + shift, scale = weight.chunk(2, dim=0) + new_weight = torch.cat([scale, shift], dim=0) + state_dict[key.replace("final_layer.adaLN_modulation.1", "norm_out.linear")] = new_weight + + def remap_txt_in_(key, state_dict): + def rename_key(key): + new_key = key.replace("individual_token_refiner.blocks", "token_refiner.refiner_blocks") + new_key = new_key.replace("adaLN_modulation.1", "norm_out.linear") + new_key = new_key.replace("txt_in", "context_embedder") + new_key = new_key.replace("t_embedder.mlp.0", "time_text_embed.timestep_embedder.linear_1") + new_key = new_key.replace("t_embedder.mlp.2", "time_text_embed.timestep_embedder.linear_2") + new_key = new_key.replace("c_embedder", "time_text_embed.text_embedder") + new_key = new_key.replace("mlp", "ff") + return new_key + + if "self_attn_qkv" in key: + weight = state_dict.pop(key) + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_q"))] = to_q + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_k"))] = to_k + state_dict[rename_key(key.replace("self_attn_qkv", "attn.to_v"))] = to_v + else: + state_dict[rename_key(key)] = state_dict.pop(key) + + def remap_img_attn_qkv_(key, state_dict): + weight = state_dict.pop(key) + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[key.replace("img_attn_qkv", "attn.to_q")] = to_q + state_dict[key.replace("img_attn_qkv", "attn.to_k")] = to_k + state_dict[key.replace("img_attn_qkv", "attn.to_v")] = to_v + + def remap_txt_attn_qkv_(key, state_dict): + weight = state_dict.pop(key) + to_q, to_k, to_v = weight.chunk(3, dim=0) + state_dict[key.replace("txt_attn_qkv", "attn.add_q_proj")] = to_q + state_dict[key.replace("txt_attn_qkv", "attn.add_k_proj")] = to_k + state_dict[key.replace("txt_attn_qkv", "attn.add_v_proj")] = to_v + + def remap_single_transformer_blocks_(key, state_dict): + hidden_size = 3072 + + if "linear1.weight" in key: + linear1_weight = state_dict.pop(key) + split_size = (hidden_size, hidden_size, hidden_size, linear1_weight.size(0) - 3 * hidden_size) + q, k, v, mlp = torch.split(linear1_weight, split_size, dim=0) + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix(".linear1.weight") + state_dict[f"{new_key}.attn.to_q.weight"] = q + state_dict[f"{new_key}.attn.to_k.weight"] = k + state_dict[f"{new_key}.attn.to_v.weight"] = v + state_dict[f"{new_key}.proj_mlp.weight"] = mlp + + elif "linear1.bias" in key: + linear1_bias = state_dict.pop(key) + split_size = (hidden_size, hidden_size, hidden_size, linear1_bias.size(0) - 3 * hidden_size) + q_bias, k_bias, v_bias, mlp_bias = torch.split(linear1_bias, split_size, dim=0) + new_key = key.replace("single_blocks", "single_transformer_blocks").removesuffix(".linear1.bias") + state_dict[f"{new_key}.attn.to_q.bias"] = q_bias + state_dict[f"{new_key}.attn.to_k.bias"] = k_bias + state_dict[f"{new_key}.attn.to_v.bias"] = v_bias + state_dict[f"{new_key}.proj_mlp.bias"] = mlp_bias + + else: + new_key = key.replace("single_blocks", "single_transformer_blocks") + new_key = new_key.replace("linear2", "proj_out") + new_key = new_key.replace("q_norm", "attn.norm_q") + new_key = new_key.replace("k_norm", "attn.norm_k") + state_dict[new_key] = state_dict.pop(key) + + TRANSFORMER_KEYS_RENAME_DICT = { + "img_in": "x_embedder", + "time_in.mlp.0": "time_text_embed.timestep_embedder.linear_1", + "time_in.mlp.2": "time_text_embed.timestep_embedder.linear_2", + "guidance_in.mlp.0": "time_text_embed.guidance_embedder.linear_1", + "guidance_in.mlp.2": "time_text_embed.guidance_embedder.linear_2", + "vector_in.in_layer": "time_text_embed.text_embedder.linear_1", + "vector_in.out_layer": "time_text_embed.text_embedder.linear_2", + "double_blocks": "transformer_blocks", + "img_attn_q_norm": "attn.norm_q", + "img_attn_k_norm": "attn.norm_k", + "img_attn_proj": "attn.to_out.0", + "txt_attn_q_norm": "attn.norm_added_q", + "txt_attn_k_norm": "attn.norm_added_k", + "txt_attn_proj": "attn.to_add_out", + "img_mod.linear": "norm1.linear", + "img_norm1": "norm1.norm", + "img_norm2": "norm2", + "img_mlp": "ff", + "txt_mod.linear": "norm1_context.linear", + "txt_norm1": "norm1.norm", + "txt_norm2": "norm2_context", + "txt_mlp": "ff_context", + "self_attn_proj": "attn.to_out.0", + "modulation.linear": "norm.linear", + "pre_norm": "norm.norm", + "final_layer.norm_final": "norm_out.norm", + "final_layer.linear": "proj_out", + "fc1": "net.0.proj", + "fc2": "net.2", + "input_embedder": "proj_in", + } + + TRANSFORMER_SPECIAL_KEYS_REMAP = { + "txt_in": remap_txt_in_, + "img_attn_qkv": remap_img_attn_qkv_, + "txt_attn_qkv": remap_txt_attn_qkv_, + "single_blocks": remap_single_transformer_blocks_, + "final_layer.adaLN_modulation.1": remap_norm_scale_shift_, + } + + def update_state_dict_(state_dict, old_key, new_key): + state_dict[new_key] = state_dict.pop(old_key) + + for key in list(checkpoint.keys()): + new_key = key[:] + for replace_key, rename_key in TRANSFORMER_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + update_state_dict_(checkpoint, key, new_key) + + for key in list(checkpoint.keys()): + for special_key, handler_fn_inplace in TRANSFORMER_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, checkpoint) + + return checkpoint + + +def convert_auraflow_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + state_dict_keys = list(checkpoint.keys()) + + # Handle register tokens and positional embeddings + converted_state_dict["register_tokens"] = checkpoint.pop("register_tokens", None) + + # Handle time step projection + converted_state_dict["time_step_proj.linear_1.weight"] = checkpoint.pop("t_embedder.mlp.0.weight", None) + converted_state_dict["time_step_proj.linear_1.bias"] = checkpoint.pop("t_embedder.mlp.0.bias", None) + converted_state_dict["time_step_proj.linear_2.weight"] = checkpoint.pop("t_embedder.mlp.2.weight", None) + converted_state_dict["time_step_proj.linear_2.bias"] = checkpoint.pop("t_embedder.mlp.2.bias", None) + + # Handle context embedder + converted_state_dict["context_embedder.weight"] = checkpoint.pop("cond_seq_linear.weight", None) + + # Calculate the number of layers + def calculate_layers(keys, key_prefix): + layers = set() + for k in keys: + if key_prefix in k: + layer_num = int(k.split(".")[1]) # get the layer number + layers.add(layer_num) + return len(layers) + + mmdit_layers = calculate_layers(state_dict_keys, key_prefix="double_layers") + single_dit_layers = calculate_layers(state_dict_keys, key_prefix="single_layers") + + # MMDiT blocks + for i in range(mmdit_layers): + # Feed-forward + path_mapping = {"mlpX": "ff", "mlpC": "ff_context"} + weight_mapping = {"c_fc1": "linear_1", "c_fc2": "linear_2", "c_proj": "out_projection"} + for orig_k, diffuser_k in path_mapping.items(): + for k, v in weight_mapping.items(): + converted_state_dict[f"joint_transformer_blocks.{i}.{diffuser_k}.{v}.weight"] = checkpoint.pop( + f"double_layers.{i}.{orig_k}.{k}.weight", None + ) + + # Norms + path_mapping = {"modX": "norm1", "modC": "norm1_context"} + for orig_k, diffuser_k in path_mapping.items(): + converted_state_dict[f"joint_transformer_blocks.{i}.{diffuser_k}.linear.weight"] = checkpoint.pop( + f"double_layers.{i}.{orig_k}.1.weight", None + ) + + # Attentions + x_attn_mapping = {"w2q": "to_q", "w2k": "to_k", "w2v": "to_v", "w2o": "to_out.0"} + context_attn_mapping = {"w1q": "add_q_proj", "w1k": "add_k_proj", "w1v": "add_v_proj", "w1o": "to_add_out"} + for attn_mapping in [x_attn_mapping, context_attn_mapping]: + for k, v in attn_mapping.items(): + converted_state_dict[f"joint_transformer_blocks.{i}.attn.{v}.weight"] = checkpoint.pop( + f"double_layers.{i}.attn.{k}.weight", None + ) + + # Single-DiT blocks + for i in range(single_dit_layers): + # Feed-forward + mapping = {"c_fc1": "linear_1", "c_fc2": "linear_2", "c_proj": "out_projection"} + for k, v in mapping.items(): + converted_state_dict[f"single_transformer_blocks.{i}.ff.{v}.weight"] = checkpoint.pop( + f"single_layers.{i}.mlp.{k}.weight", None + ) + + # Norms + converted_state_dict[f"single_transformer_blocks.{i}.norm1.linear.weight"] = checkpoint.pop( + f"single_layers.{i}.modCX.1.weight", None + ) + + # Attentions + x_attn_mapping = {"w1q": "to_q", "w1k": "to_k", "w1v": "to_v", "w1o": "to_out.0"} + for k, v in x_attn_mapping.items(): + converted_state_dict[f"single_transformer_blocks.{i}.attn.{v}.weight"] = checkpoint.pop( + f"single_layers.{i}.attn.{k}.weight", None + ) + # Final blocks + converted_state_dict["proj_out.weight"] = checkpoint.pop("final_linear.weight", None) + + # Handle the final norm layer + norm_weight = checkpoint.pop("modF.1.weight", None) + if norm_weight is not None: + converted_state_dict["norm_out.linear.weight"] = swap_scale_shift(norm_weight, dim=None) + else: + converted_state_dict["norm_out.linear.weight"] = None + + converted_state_dict["pos_embed.pos_embed"] = checkpoint.pop("positional_encoding") + converted_state_dict["pos_embed.proj.weight"] = checkpoint.pop("init_x_linear.weight") + converted_state_dict["pos_embed.proj.bias"] = checkpoint.pop("init_x_linear.bias") + + return converted_state_dict + + +def convert_lumina2_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + + # Original Lumina-Image-2 has an extra norm parameter that is unused + # We just remove it here + checkpoint.pop("norm_final.weight", None) + + # Comfy checkpoints add this prefix + keys = list(checkpoint.keys()) + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + LUMINA_KEY_MAP = { + "cap_embedder": "time_caption_embed.caption_embedder", + "t_embedder.mlp.0": "time_caption_embed.timestep_embedder.linear_1", + "t_embedder.mlp.2": "time_caption_embed.timestep_embedder.linear_2", + "attention": "attn", + ".out.": ".to_out.0.", + "k_norm": "norm_k", + "q_norm": "norm_q", + "w1": "linear_1", + "w2": "linear_2", + "w3": "linear_3", + "adaLN_modulation.1": "norm1.linear", + } + ATTENTION_NORM_MAP = { + "attention_norm1": "norm1.norm", + "attention_norm2": "norm2", + } + CONTEXT_REFINER_MAP = { + "context_refiner.0.attention_norm1": "context_refiner.0.norm1", + "context_refiner.0.attention_norm2": "context_refiner.0.norm2", + "context_refiner.1.attention_norm1": "context_refiner.1.norm1", + "context_refiner.1.attention_norm2": "context_refiner.1.norm2", + } + FINAL_LAYER_MAP = { + "final_layer.adaLN_modulation.1": "norm_out.linear_1", + "final_layer.linear": "norm_out.linear_2", + } + + def convert_lumina_attn_to_diffusers(tensor, diffusers_key): + q_dim = 2304 + k_dim = v_dim = 768 + + to_q, to_k, to_v = torch.split(tensor, [q_dim, k_dim, v_dim], dim=0) + + return { + diffusers_key.replace("qkv", "to_q"): to_q, + diffusers_key.replace("qkv", "to_k"): to_k, + diffusers_key.replace("qkv", "to_v"): to_v, + } + + for key in keys: + diffusers_key = key + for k, v in CONTEXT_REFINER_MAP.items(): + diffusers_key = diffusers_key.replace(k, v) + for k, v in FINAL_LAYER_MAP.items(): + diffusers_key = diffusers_key.replace(k, v) + for k, v in ATTENTION_NORM_MAP.items(): + diffusers_key = diffusers_key.replace(k, v) + for k, v in LUMINA_KEY_MAP.items(): + diffusers_key = diffusers_key.replace(k, v) + + if "qkv" in diffusers_key: + converted_state_dict.update(convert_lumina_attn_to_diffusers(checkpoint.pop(key), diffusers_key)) + else: + converted_state_dict[diffusers_key] = checkpoint.pop(key) + + return converted_state_dict + + +def convert_sana_transformer_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + keys = list(checkpoint.keys()) + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + num_layers = list(set(int(k.split(".", 2)[1]) for k in checkpoint if "blocks" in k))[-1] + 1 # noqa: C401 + + # Positional and patch embeddings. + checkpoint.pop("pos_embed") + converted_state_dict["patch_embed.proj.weight"] = checkpoint.pop("x_embedder.proj.weight") + converted_state_dict["patch_embed.proj.bias"] = checkpoint.pop("x_embedder.proj.bias") + + # Timestep embeddings. + converted_state_dict["time_embed.emb.timestep_embedder.linear_1.weight"] = checkpoint.pop( + "t_embedder.mlp.0.weight" + ) + converted_state_dict["time_embed.emb.timestep_embedder.linear_1.bias"] = checkpoint.pop("t_embedder.mlp.0.bias") + converted_state_dict["time_embed.emb.timestep_embedder.linear_2.weight"] = checkpoint.pop( + "t_embedder.mlp.2.weight" + ) + converted_state_dict["time_embed.emb.timestep_embedder.linear_2.bias"] = checkpoint.pop("t_embedder.mlp.2.bias") + converted_state_dict["time_embed.linear.weight"] = checkpoint.pop("t_block.1.weight") + converted_state_dict["time_embed.linear.bias"] = checkpoint.pop("t_block.1.bias") + + # Caption Projection. + checkpoint.pop("y_embedder.y_embedding") + converted_state_dict["caption_projection.linear_1.weight"] = checkpoint.pop("y_embedder.y_proj.fc1.weight") + converted_state_dict["caption_projection.linear_1.bias"] = checkpoint.pop("y_embedder.y_proj.fc1.bias") + converted_state_dict["caption_projection.linear_2.weight"] = checkpoint.pop("y_embedder.y_proj.fc2.weight") + converted_state_dict["caption_projection.linear_2.bias"] = checkpoint.pop("y_embedder.y_proj.fc2.bias") + converted_state_dict["caption_norm.weight"] = checkpoint.pop("attention_y_norm.weight") + + for i in range(num_layers): + converted_state_dict[f"transformer_blocks.{i}.scale_shift_table"] = checkpoint.pop( + f"blocks.{i}.scale_shift_table" + ) + + # Self-Attention + sample_q, sample_k, sample_v = torch.chunk(checkpoint.pop(f"blocks.{i}.attn.qkv.weight"), 3, dim=0) + converted_state_dict[f"transformer_blocks.{i}.attn1.to_q.weight"] = torch.cat([sample_q]) + converted_state_dict[f"transformer_blocks.{i}.attn1.to_k.weight"] = torch.cat([sample_k]) + converted_state_dict[f"transformer_blocks.{i}.attn1.to_v.weight"] = torch.cat([sample_v]) + + # Output Projections + converted_state_dict[f"transformer_blocks.{i}.attn1.to_out.0.weight"] = checkpoint.pop( + f"blocks.{i}.attn.proj.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn1.to_out.0.bias"] = checkpoint.pop( + f"blocks.{i}.attn.proj.bias" + ) + + # Cross-Attention + converted_state_dict[f"transformer_blocks.{i}.attn2.to_q.weight"] = checkpoint.pop( + f"blocks.{i}.cross_attn.q_linear.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_q.bias"] = checkpoint.pop( + f"blocks.{i}.cross_attn.q_linear.bias" + ) + + linear_sample_k, linear_sample_v = torch.chunk( + checkpoint.pop(f"blocks.{i}.cross_attn.kv_linear.weight"), 2, dim=0 + ) + linear_sample_k_bias, linear_sample_v_bias = torch.chunk( + checkpoint.pop(f"blocks.{i}.cross_attn.kv_linear.bias"), 2, dim=0 + ) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_k.weight"] = linear_sample_k + converted_state_dict[f"transformer_blocks.{i}.attn2.to_v.weight"] = linear_sample_v + converted_state_dict[f"transformer_blocks.{i}.attn2.to_k.bias"] = linear_sample_k_bias + converted_state_dict[f"transformer_blocks.{i}.attn2.to_v.bias"] = linear_sample_v_bias + + # Output Projections + converted_state_dict[f"transformer_blocks.{i}.attn2.to_out.0.weight"] = checkpoint.pop( + f"blocks.{i}.cross_attn.proj.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.attn2.to_out.0.bias"] = checkpoint.pop( + f"blocks.{i}.cross_attn.proj.bias" + ) + + # MLP + converted_state_dict[f"transformer_blocks.{i}.ff.conv_inverted.weight"] = checkpoint.pop( + f"blocks.{i}.mlp.inverted_conv.conv.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.ff.conv_inverted.bias"] = checkpoint.pop( + f"blocks.{i}.mlp.inverted_conv.conv.bias" + ) + converted_state_dict[f"transformer_blocks.{i}.ff.conv_depth.weight"] = checkpoint.pop( + f"blocks.{i}.mlp.depth_conv.conv.weight" + ) + converted_state_dict[f"transformer_blocks.{i}.ff.conv_depth.bias"] = checkpoint.pop( + f"blocks.{i}.mlp.depth_conv.conv.bias" + ) + converted_state_dict[f"transformer_blocks.{i}.ff.conv_point.weight"] = checkpoint.pop( + f"blocks.{i}.mlp.point_conv.conv.weight" + ) + + # Final layer + converted_state_dict["proj_out.weight"] = checkpoint.pop("final_layer.linear.weight") + converted_state_dict["proj_out.bias"] = checkpoint.pop("final_layer.linear.bias") + converted_state_dict["scale_shift_table"] = checkpoint.pop("final_layer.scale_shift_table") + + return converted_state_dict + + +def convert_wan_transformer_to_diffusers(checkpoint, **kwargs): + def generate_motion_encoder_mappings(): + mappings = { + "motion_encoder.dec.direction.weight": "motion_encoder.motion_synthesis_weight", + "motion_encoder.enc.net_app.convs.0.0.weight": "motion_encoder.conv_in.weight", + "motion_encoder.enc.net_app.convs.0.1.bias": "motion_encoder.conv_in.act_fn.bias", + "motion_encoder.enc.net_app.convs.8.weight": "motion_encoder.conv_out.weight", + "motion_encoder.enc.fc": "motion_encoder.motion_network", + } + + for i in range(7): + conv_idx = i + 1 + mappings.update( + { + f"motion_encoder.enc.net_app.convs.{conv_idx}.conv1.0.weight": f"motion_encoder.res_blocks.{i}.conv1.weight", + f"motion_encoder.enc.net_app.convs.{conv_idx}.conv1.1.bias": f"motion_encoder.res_blocks.{i}.conv1.act_fn.bias", + f"motion_encoder.enc.net_app.convs.{conv_idx}.conv2.1.weight": f"motion_encoder.res_blocks.{i}.conv2.weight", + f"motion_encoder.enc.net_app.convs.{conv_idx}.conv2.2.bias": f"motion_encoder.res_blocks.{i}.conv2.act_fn.bias", + f"motion_encoder.enc.net_app.convs.{conv_idx}.skip.1.weight": f"motion_encoder.res_blocks.{i}.conv_skip.weight", + } + ) + + return mappings + + def generate_face_adapter_mappings(): + return { + "face_adapter.fuser_blocks": "face_adapter", + ".k_norm.": ".norm_k.", + ".q_norm.": ".norm_q.", + ".linear1_q.": ".to_q.", + ".linear2.": ".to_out.", + "conv1_local.conv": "conv1_local", + "conv2.conv": "conv2", + "conv3.conv": "conv3", + } + + def split_tensor_handler(key, state_dict, split_pattern, target_keys): + tensor = state_dict.pop(key) + split_idx = tensor.shape[0] // 2 + + new_key_1 = key.replace(split_pattern, target_keys[0]) + new_key_2 = key.replace(split_pattern, target_keys[1]) + + state_dict[new_key_1] = tensor[:split_idx] + state_dict[new_key_2] = tensor[split_idx:] + + def reshape_bias_handler(key, state_dict): + if "motion_encoder.enc.net_app.convs." in key and ".bias" in key: + state_dict[key] = state_dict[key][0, :, 0, 0] + + converted_state_dict = {} + + # Strip model.diffusion_model prefix + keys = list(checkpoint.keys()) + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + # Base transformer mappings + TRANSFORMER_KEYS_RENAME_DICT = { + "time_embedding.0": "condition_embedder.time_embedder.linear_1", + "time_embedding.2": "condition_embedder.time_embedder.linear_2", + "text_embedding.0": "condition_embedder.text_embedder.linear_1", + "text_embedding.2": "condition_embedder.text_embedder.linear_2", + "time_projection.1": "condition_embedder.time_proj", + "cross_attn": "attn2", + "self_attn": "attn1", + ".o.": ".to_out.0.", + ".q.": ".to_q.", + ".k.": ".to_k.", + ".v.": ".to_v.", + ".k_img.": ".add_k_proj.", + ".v_img.": ".add_v_proj.", + ".norm_k_img.": ".norm_added_k.", + "head.modulation": "scale_shift_table", + "head.head": "proj_out", + "modulation": "scale_shift_table", + "ffn.0": "ffn.net.0.proj", + "ffn.2": "ffn.net.2", + # Hack to swap the layer names + "norm2": "norm__placeholder", + "norm3": "norm2", + "norm__placeholder": "norm3", + # I2V model + "img_emb.proj.0": "condition_embedder.image_embedder.norm1", + "img_emb.proj.1": "condition_embedder.image_embedder.ff.net.0.proj", + "img_emb.proj.3": "condition_embedder.image_embedder.ff.net.2", + "img_emb.proj.4": "condition_embedder.image_embedder.norm2", + # VACE model + "before_proj": "proj_in", + "after_proj": "proj_out", + } + + SPECIAL_KEYS_HANDLERS = {} + if any("face_adapter" in k for k in checkpoint.keys()): + TRANSFORMER_KEYS_RENAME_DICT.update(generate_face_adapter_mappings()) + SPECIAL_KEYS_HANDLERS[".linear1_kv."] = (split_tensor_handler, [".to_k.", ".to_v."]) + + if any("motion_encoder" in k for k in checkpoint.keys()): + TRANSFORMER_KEYS_RENAME_DICT.update(generate_motion_encoder_mappings()) + + for key in list(checkpoint.keys()): + reshape_bias_handler(key, checkpoint) + + for key in list(checkpoint.keys()): + new_key = key + for replace_key, rename_key in TRANSFORMER_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + converted_state_dict[new_key] = checkpoint.pop(key) + + for key in list(converted_state_dict.keys()): + for pattern, (handler_fn, target_keys) in SPECIAL_KEYS_HANDLERS.items(): + if pattern not in key: + continue + handler_fn(key, converted_state_dict, pattern, target_keys) + break + + return converted_state_dict + + +def convert_wan_vae_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + + # Create mappings for specific components + middle_key_mapping = { + # Encoder middle block + "encoder.middle.0.residual.0.gamma": "encoder.mid_block.resnets.0.norm1.gamma", + "encoder.middle.0.residual.2.bias": "encoder.mid_block.resnets.0.conv1.bias", + "encoder.middle.0.residual.2.weight": "encoder.mid_block.resnets.0.conv1.weight", + "encoder.middle.0.residual.3.gamma": "encoder.mid_block.resnets.0.norm2.gamma", + "encoder.middle.0.residual.6.bias": "encoder.mid_block.resnets.0.conv2.bias", + "encoder.middle.0.residual.6.weight": "encoder.mid_block.resnets.0.conv2.weight", + "encoder.middle.2.residual.0.gamma": "encoder.mid_block.resnets.1.norm1.gamma", + "encoder.middle.2.residual.2.bias": "encoder.mid_block.resnets.1.conv1.bias", + "encoder.middle.2.residual.2.weight": "encoder.mid_block.resnets.1.conv1.weight", + "encoder.middle.2.residual.3.gamma": "encoder.mid_block.resnets.1.norm2.gamma", + "encoder.middle.2.residual.6.bias": "encoder.mid_block.resnets.1.conv2.bias", + "encoder.middle.2.residual.6.weight": "encoder.mid_block.resnets.1.conv2.weight", + # Decoder middle block + "decoder.middle.0.residual.0.gamma": "decoder.mid_block.resnets.0.norm1.gamma", + "decoder.middle.0.residual.2.bias": "decoder.mid_block.resnets.0.conv1.bias", + "decoder.middle.0.residual.2.weight": "decoder.mid_block.resnets.0.conv1.weight", + "decoder.middle.0.residual.3.gamma": "decoder.mid_block.resnets.0.norm2.gamma", + "decoder.middle.0.residual.6.bias": "decoder.mid_block.resnets.0.conv2.bias", + "decoder.middle.0.residual.6.weight": "decoder.mid_block.resnets.0.conv2.weight", + "decoder.middle.2.residual.0.gamma": "decoder.mid_block.resnets.1.norm1.gamma", + "decoder.middle.2.residual.2.bias": "decoder.mid_block.resnets.1.conv1.bias", + "decoder.middle.2.residual.2.weight": "decoder.mid_block.resnets.1.conv1.weight", + "decoder.middle.2.residual.3.gamma": "decoder.mid_block.resnets.1.norm2.gamma", + "decoder.middle.2.residual.6.bias": "decoder.mid_block.resnets.1.conv2.bias", + "decoder.middle.2.residual.6.weight": "decoder.mid_block.resnets.1.conv2.weight", + } + + # Create a mapping for attention blocks + attention_mapping = { + # Encoder middle attention + "encoder.middle.1.norm.gamma": "encoder.mid_block.attentions.0.norm.gamma", + "encoder.middle.1.to_qkv.weight": "encoder.mid_block.attentions.0.to_qkv.weight", + "encoder.middle.1.to_qkv.bias": "encoder.mid_block.attentions.0.to_qkv.bias", + "encoder.middle.1.proj.weight": "encoder.mid_block.attentions.0.proj.weight", + "encoder.middle.1.proj.bias": "encoder.mid_block.attentions.0.proj.bias", + # Decoder middle attention + "decoder.middle.1.norm.gamma": "decoder.mid_block.attentions.0.norm.gamma", + "decoder.middle.1.to_qkv.weight": "decoder.mid_block.attentions.0.to_qkv.weight", + "decoder.middle.1.to_qkv.bias": "decoder.mid_block.attentions.0.to_qkv.bias", + "decoder.middle.1.proj.weight": "decoder.mid_block.attentions.0.proj.weight", + "decoder.middle.1.proj.bias": "decoder.mid_block.attentions.0.proj.bias", + } + + # Create a mapping for the head components + head_mapping = { + # Encoder head + "encoder.head.0.gamma": "encoder.norm_out.gamma", + "encoder.head.2.bias": "encoder.conv_out.bias", + "encoder.head.2.weight": "encoder.conv_out.weight", + # Decoder head + "decoder.head.0.gamma": "decoder.norm_out.gamma", + "decoder.head.2.bias": "decoder.conv_out.bias", + "decoder.head.2.weight": "decoder.conv_out.weight", + } + + # Create a mapping for the quant components + quant_mapping = { + "conv1.weight": "quant_conv.weight", + "conv1.bias": "quant_conv.bias", + "conv2.weight": "post_quant_conv.weight", + "conv2.bias": "post_quant_conv.bias", + } + + # Process each key in the state dict + for key, value in checkpoint.items(): + # Handle middle block keys using the mapping + if key in middle_key_mapping: + new_key = middle_key_mapping[key] + converted_state_dict[new_key] = value + # Handle attention blocks using the mapping + elif key in attention_mapping: + new_key = attention_mapping[key] + converted_state_dict[new_key] = value + # Handle head keys using the mapping + elif key in head_mapping: + new_key = head_mapping[key] + converted_state_dict[new_key] = value + # Handle quant keys using the mapping + elif key in quant_mapping: + new_key = quant_mapping[key] + converted_state_dict[new_key] = value + # Handle encoder conv1 + elif key == "encoder.conv1.weight": + converted_state_dict["encoder.conv_in.weight"] = value + elif key == "encoder.conv1.bias": + converted_state_dict["encoder.conv_in.bias"] = value + # Handle decoder conv1 + elif key == "decoder.conv1.weight": + converted_state_dict["decoder.conv_in.weight"] = value + elif key == "decoder.conv1.bias": + converted_state_dict["decoder.conv_in.bias"] = value + # Handle encoder downsamples + elif key.startswith("encoder.downsamples."): + # Convert to down_blocks + new_key = key.replace("encoder.downsamples.", "encoder.down_blocks.") + + # Convert residual block naming but keep the original structure + if ".residual.0.gamma" in new_key: + new_key = new_key.replace(".residual.0.gamma", ".norm1.gamma") + elif ".residual.2.bias" in new_key: + new_key = new_key.replace(".residual.2.bias", ".conv1.bias") + elif ".residual.2.weight" in new_key: + new_key = new_key.replace(".residual.2.weight", ".conv1.weight") + elif ".residual.3.gamma" in new_key: + new_key = new_key.replace(".residual.3.gamma", ".norm2.gamma") + elif ".residual.6.bias" in new_key: + new_key = new_key.replace(".residual.6.bias", ".conv2.bias") + elif ".residual.6.weight" in new_key: + new_key = new_key.replace(".residual.6.weight", ".conv2.weight") + elif ".shortcut.bias" in new_key: + new_key = new_key.replace(".shortcut.bias", ".conv_shortcut.bias") + elif ".shortcut.weight" in new_key: + new_key = new_key.replace(".shortcut.weight", ".conv_shortcut.weight") + + converted_state_dict[new_key] = value + + # Handle decoder upsamples + elif key.startswith("decoder.upsamples."): + # Convert to up_blocks + parts = key.split(".") + block_idx = int(parts[2]) + + # Group residual blocks + if "residual" in key: + if block_idx in [0, 1, 2]: + new_block_idx = 0 + resnet_idx = block_idx + elif block_idx in [4, 5, 6]: + new_block_idx = 1 + resnet_idx = block_idx - 4 + elif block_idx in [8, 9, 10]: + new_block_idx = 2 + resnet_idx = block_idx - 8 + elif block_idx in [12, 13, 14]: + new_block_idx = 3 + resnet_idx = block_idx - 12 + else: + # Keep as is for other blocks + converted_state_dict[key] = value + continue + + # Convert residual block naming + if ".residual.0.gamma" in key: + new_key = f"decoder.up_blocks.{new_block_idx}.resnets.{resnet_idx}.norm1.gamma" + elif ".residual.2.bias" in key: + new_key = f"decoder.up_blocks.{new_block_idx}.resnets.{resnet_idx}.conv1.bias" + elif ".residual.2.weight" in key: + new_key = f"decoder.up_blocks.{new_block_idx}.resnets.{resnet_idx}.conv1.weight" + elif ".residual.3.gamma" in key: + new_key = f"decoder.up_blocks.{new_block_idx}.resnets.{resnet_idx}.norm2.gamma" + elif ".residual.6.bias" in key: + new_key = f"decoder.up_blocks.{new_block_idx}.resnets.{resnet_idx}.conv2.bias" + elif ".residual.6.weight" in key: + new_key = f"decoder.up_blocks.{new_block_idx}.resnets.{resnet_idx}.conv2.weight" + else: + new_key = key + + converted_state_dict[new_key] = value + + # Handle shortcut connections + elif ".shortcut." in key: + if block_idx == 4: + new_key = key.replace(".shortcut.", ".resnets.0.conv_shortcut.") + new_key = new_key.replace("decoder.upsamples.4", "decoder.up_blocks.1") + else: + new_key = key.replace("decoder.upsamples.", "decoder.up_blocks.") + new_key = new_key.replace(".shortcut.", ".conv_shortcut.") + + converted_state_dict[new_key] = value + + # Handle upsamplers + elif ".resample." in key or ".time_conv." in key: + if block_idx == 3: + new_key = key.replace(f"decoder.upsamples.{block_idx}", "decoder.up_blocks.0.upsamplers.0") + elif block_idx == 7: + new_key = key.replace(f"decoder.upsamples.{block_idx}", "decoder.up_blocks.1.upsamplers.0") + elif block_idx == 11: + new_key = key.replace(f"decoder.upsamples.{block_idx}", "decoder.up_blocks.2.upsamplers.0") + else: + new_key = key.replace("decoder.upsamples.", "decoder.up_blocks.") + + converted_state_dict[new_key] = value + else: + new_key = key.replace("decoder.upsamples.", "decoder.up_blocks.") + converted_state_dict[new_key] = value + else: + # Keep other keys unchanged + converted_state_dict[key] = value + + return converted_state_dict + + +def convert_hidream_transformer_to_diffusers(checkpoint, **kwargs): + keys = list(checkpoint.keys()) + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + return checkpoint + + +def convert_chroma_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {} + keys = list(checkpoint.keys()) + + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + num_layers = list(set(int(k.split(".", 2)[1]) for k in checkpoint if "double_blocks." in k))[-1] + 1 # noqa: C401 + num_single_layers = list(set(int(k.split(".", 2)[1]) for k in checkpoint if "single_blocks." in k))[-1] + 1 # noqa: C401 + num_guidance_layers = ( + list(set(int(k.split(".", 3)[2]) for k in checkpoint if "distilled_guidance_layer.layers." in k))[-1] + 1 # noqa: C401 + ) + mlp_ratio = 4.0 + inner_dim = 3072 + + # in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; + # while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation + def swap_scale_shift(weight): + shift, scale = weight.chunk(2, dim=0) + new_weight = torch.cat([scale, shift], dim=0) + return new_weight + + # guidance + converted_state_dict["distilled_guidance_layer.in_proj.bias"] = checkpoint.pop( + "distilled_guidance_layer.in_proj.bias" + ) + converted_state_dict["distilled_guidance_layer.in_proj.weight"] = checkpoint.pop( + "distilled_guidance_layer.in_proj.weight" + ) + converted_state_dict["distilled_guidance_layer.out_proj.bias"] = checkpoint.pop( + "distilled_guidance_layer.out_proj.bias" + ) + converted_state_dict["distilled_guidance_layer.out_proj.weight"] = checkpoint.pop( + "distilled_guidance_layer.out_proj.weight" + ) + for i in range(num_guidance_layers): + block_prefix = f"distilled_guidance_layer.layers.{i}." + converted_state_dict[f"{block_prefix}linear_1.bias"] = checkpoint.pop( + f"distilled_guidance_layer.layers.{i}.in_layer.bias" + ) + converted_state_dict[f"{block_prefix}linear_1.weight"] = checkpoint.pop( + f"distilled_guidance_layer.layers.{i}.in_layer.weight" + ) + converted_state_dict[f"{block_prefix}linear_2.bias"] = checkpoint.pop( + f"distilled_guidance_layer.layers.{i}.out_layer.bias" + ) + converted_state_dict[f"{block_prefix}linear_2.weight"] = checkpoint.pop( + f"distilled_guidance_layer.layers.{i}.out_layer.weight" + ) + converted_state_dict[f"distilled_guidance_layer.norms.{i}.weight"] = checkpoint.pop( + f"distilled_guidance_layer.norms.{i}.scale" + ) + + # context_embedder + converted_state_dict["context_embedder.weight"] = checkpoint.pop("txt_in.weight") + converted_state_dict["context_embedder.bias"] = checkpoint.pop("txt_in.bias") + + # x_embedder + converted_state_dict["x_embedder.weight"] = checkpoint.pop("img_in.weight") + converted_state_dict["x_embedder.bias"] = checkpoint.pop("img_in.bias") + + # double transformer blocks + for i in range(num_layers): + block_prefix = f"transformer_blocks.{i}." + # Q, K, V + sample_q, sample_k, sample_v = torch.chunk(checkpoint.pop(f"double_blocks.{i}.img_attn.qkv.weight"), 3, dim=0) + context_q, context_k, context_v = torch.chunk( + checkpoint.pop(f"double_blocks.{i}.txt_attn.qkv.weight"), 3, dim=0 + ) + sample_q_bias, sample_k_bias, sample_v_bias = torch.chunk( + checkpoint.pop(f"double_blocks.{i}.img_attn.qkv.bias"), 3, dim=0 + ) + context_q_bias, context_k_bias, context_v_bias = torch.chunk( + checkpoint.pop(f"double_blocks.{i}.txt_attn.qkv.bias"), 3, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.to_q.weight"] = torch.cat([sample_q]) + converted_state_dict[f"{block_prefix}attn.to_q.bias"] = torch.cat([sample_q_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.weight"] = torch.cat([sample_k]) + converted_state_dict[f"{block_prefix}attn.to_k.bias"] = torch.cat([sample_k_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.weight"] = torch.cat([sample_v]) + converted_state_dict[f"{block_prefix}attn.to_v.bias"] = torch.cat([sample_v_bias]) + converted_state_dict[f"{block_prefix}attn.add_q_proj.weight"] = torch.cat([context_q]) + converted_state_dict[f"{block_prefix}attn.add_q_proj.bias"] = torch.cat([context_q_bias]) + converted_state_dict[f"{block_prefix}attn.add_k_proj.weight"] = torch.cat([context_k]) + converted_state_dict[f"{block_prefix}attn.add_k_proj.bias"] = torch.cat([context_k_bias]) + converted_state_dict[f"{block_prefix}attn.add_v_proj.weight"] = torch.cat([context_v]) + converted_state_dict[f"{block_prefix}attn.add_v_proj.bias"] = torch.cat([context_v_bias]) + # qk_norm + converted_state_dict[f"{block_prefix}attn.norm_q.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_attn.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_k.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_attn.norm.key_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_added_q.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_attn.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_added_k.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_attn.norm.key_norm.scale" + ) + # ff img_mlp + converted_state_dict[f"{block_prefix}ff.net.0.proj.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_mlp.0.weight" + ) + converted_state_dict[f"{block_prefix}ff.net.0.proj.bias"] = checkpoint.pop(f"double_blocks.{i}.img_mlp.0.bias") + converted_state_dict[f"{block_prefix}ff.net.2.weight"] = checkpoint.pop(f"double_blocks.{i}.img_mlp.2.weight") + converted_state_dict[f"{block_prefix}ff.net.2.bias"] = checkpoint.pop(f"double_blocks.{i}.img_mlp.2.bias") + converted_state_dict[f"{block_prefix}ff_context.net.0.proj.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_mlp.0.weight" + ) + converted_state_dict[f"{block_prefix}ff_context.net.0.proj.bias"] = checkpoint.pop( + f"double_blocks.{i}.txt_mlp.0.bias" + ) + converted_state_dict[f"{block_prefix}ff_context.net.2.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_mlp.2.weight" + ) + converted_state_dict[f"{block_prefix}ff_context.net.2.bias"] = checkpoint.pop( + f"double_blocks.{i}.txt_mlp.2.bias" + ) + # output projections. + converted_state_dict[f"{block_prefix}attn.to_out.0.weight"] = checkpoint.pop( + f"double_blocks.{i}.img_attn.proj.weight" + ) + converted_state_dict[f"{block_prefix}attn.to_out.0.bias"] = checkpoint.pop( + f"double_blocks.{i}.img_attn.proj.bias" + ) + converted_state_dict[f"{block_prefix}attn.to_add_out.weight"] = checkpoint.pop( + f"double_blocks.{i}.txt_attn.proj.weight" + ) + converted_state_dict[f"{block_prefix}attn.to_add_out.bias"] = checkpoint.pop( + f"double_blocks.{i}.txt_attn.proj.bias" + ) + + # single transformer blocks + for i in range(num_single_layers): + block_prefix = f"single_transformer_blocks.{i}." + # Q, K, V, mlp + mlp_hidden_dim = int(inner_dim * mlp_ratio) + split_size = (inner_dim, inner_dim, inner_dim, mlp_hidden_dim) + q, k, v, mlp = torch.split(checkpoint.pop(f"single_blocks.{i}.linear1.weight"), split_size, dim=0) + q_bias, k_bias, v_bias, mlp_bias = torch.split( + checkpoint.pop(f"single_blocks.{i}.linear1.bias"), split_size, dim=0 + ) + converted_state_dict[f"{block_prefix}attn.to_q.weight"] = torch.cat([q]) + converted_state_dict[f"{block_prefix}attn.to_q.bias"] = torch.cat([q_bias]) + converted_state_dict[f"{block_prefix}attn.to_k.weight"] = torch.cat([k]) + converted_state_dict[f"{block_prefix}attn.to_k.bias"] = torch.cat([k_bias]) + converted_state_dict[f"{block_prefix}attn.to_v.weight"] = torch.cat([v]) + converted_state_dict[f"{block_prefix}attn.to_v.bias"] = torch.cat([v_bias]) + converted_state_dict[f"{block_prefix}proj_mlp.weight"] = torch.cat([mlp]) + converted_state_dict[f"{block_prefix}proj_mlp.bias"] = torch.cat([mlp_bias]) + # qk norm + converted_state_dict[f"{block_prefix}attn.norm_q.weight"] = checkpoint.pop( + f"single_blocks.{i}.norm.query_norm.scale" + ) + converted_state_dict[f"{block_prefix}attn.norm_k.weight"] = checkpoint.pop( + f"single_blocks.{i}.norm.key_norm.scale" + ) + # output projections. + converted_state_dict[f"{block_prefix}proj_out.weight"] = checkpoint.pop(f"single_blocks.{i}.linear2.weight") + converted_state_dict[f"{block_prefix}proj_out.bias"] = checkpoint.pop(f"single_blocks.{i}.linear2.bias") + + converted_state_dict["proj_out.weight"] = checkpoint.pop("final_layer.linear.weight") + converted_state_dict["proj_out.bias"] = checkpoint.pop("final_layer.linear.bias") + + return converted_state_dict + + +def convert_cosmos_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys())} + + def remove_keys_(key: str, state_dict): + state_dict.pop(key) + + def rename_transformer_blocks_(key: str, state_dict): + block_index = int(key.split(".")[1].removeprefix("block")) + new_key = key + old_prefix = f"blocks.block{block_index}" + new_prefix = f"transformer_blocks.{block_index}" + new_key = new_prefix + new_key.removeprefix(old_prefix) + state_dict[new_key] = state_dict.pop(key) + + TRANSFORMER_KEYS_RENAME_DICT_COSMOS_1_0 = { + "t_embedder.1": "time_embed.t_embedder", + "affline_norm": "time_embed.norm", + ".blocks.0.block.attn": ".attn1", + ".blocks.1.block.attn": ".attn2", + ".blocks.2.block": ".ff", + ".blocks.0.adaLN_modulation.1": ".norm1.linear_1", + ".blocks.0.adaLN_modulation.2": ".norm1.linear_2", + ".blocks.1.adaLN_modulation.1": ".norm2.linear_1", + ".blocks.1.adaLN_modulation.2": ".norm2.linear_2", + ".blocks.2.adaLN_modulation.1": ".norm3.linear_1", + ".blocks.2.adaLN_modulation.2": ".norm3.linear_2", + "to_q.0": "to_q", + "to_q.1": "norm_q", + "to_k.0": "to_k", + "to_k.1": "norm_k", + "to_v.0": "to_v", + "layer1": "net.0.proj", + "layer2": "net.2", + "proj.1": "proj", + "x_embedder": "patch_embed", + "extra_pos_embedder": "learnable_pos_embed", + "final_layer.adaLN_modulation.1": "norm_out.linear_1", + "final_layer.adaLN_modulation.2": "norm_out.linear_2", + "final_layer.linear": "proj_out", + } + + TRANSFORMER_SPECIAL_KEYS_REMAP_COSMOS_1_0 = { + "blocks.block": rename_transformer_blocks_, + "logvar.0.freqs": remove_keys_, + "logvar.0.phases": remove_keys_, + "logvar.1.weight": remove_keys_, + "pos_embedder.seq": remove_keys_, + } + + TRANSFORMER_KEYS_RENAME_DICT_COSMOS_2_0 = { + "t_embedder.1": "time_embed.t_embedder", + "t_embedding_norm": "time_embed.norm", + "blocks": "transformer_blocks", + "adaln_modulation_self_attn.1": "norm1.linear_1", + "adaln_modulation_self_attn.2": "norm1.linear_2", + "adaln_modulation_cross_attn.1": "norm2.linear_1", + "adaln_modulation_cross_attn.2": "norm2.linear_2", + "adaln_modulation_mlp.1": "norm3.linear_1", + "adaln_modulation_mlp.2": "norm3.linear_2", + "self_attn": "attn1", + "cross_attn": "attn2", + "q_proj": "to_q", + "k_proj": "to_k", + "v_proj": "to_v", + "output_proj": "to_out.0", + "q_norm": "norm_q", + "k_norm": "norm_k", + "mlp.layer1": "ff.net.0.proj", + "mlp.layer2": "ff.net.2", + "x_embedder.proj.1": "patch_embed.proj", + "final_layer.adaln_modulation.1": "norm_out.linear_1", + "final_layer.adaln_modulation.2": "norm_out.linear_2", + "final_layer.linear": "proj_out", + } + + TRANSFORMER_SPECIAL_KEYS_REMAP_COSMOS_2_0 = { + "accum_video_sample_counter": remove_keys_, + "accum_image_sample_counter": remove_keys_, + "accum_iteration": remove_keys_, + "accum_train_in_hours": remove_keys_, + "pos_embedder.seq": remove_keys_, + "pos_embedder.dim_spatial_range": remove_keys_, + "pos_embedder.dim_temporal_range": remove_keys_, + "_extra_state": remove_keys_, + } + + PREFIX_KEY = "net." + if "net.blocks.block1.blocks.0.block.attn.to_q.0.weight" in checkpoint: + TRANSFORMER_KEYS_RENAME_DICT = TRANSFORMER_KEYS_RENAME_DICT_COSMOS_1_0 + TRANSFORMER_SPECIAL_KEYS_REMAP = TRANSFORMER_SPECIAL_KEYS_REMAP_COSMOS_1_0 + else: + TRANSFORMER_KEYS_RENAME_DICT = TRANSFORMER_KEYS_RENAME_DICT_COSMOS_2_0 + TRANSFORMER_SPECIAL_KEYS_REMAP = TRANSFORMER_SPECIAL_KEYS_REMAP_COSMOS_2_0 + + state_dict_keys = list(converted_state_dict.keys()) + for key in state_dict_keys: + new_key = key[:] + if new_key.startswith(PREFIX_KEY): + new_key = new_key.removeprefix(PREFIX_KEY) + for replace_key, rename_key in TRANSFORMER_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + converted_state_dict[new_key] = converted_state_dict.pop(key) + + state_dict_keys = list(converted_state_dict.keys()) + for key in state_dict_keys: + for special_key, handler_fn_inplace in TRANSFORMER_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + return converted_state_dict + + +def convert_flux2_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + FLUX2_TRANSFORMER_KEYS_RENAME_DICT = { + # Image and text input projections + "img_in": "x_embedder", + "txt_in": "context_embedder", + # Timestep and guidance embeddings + "time_in.in_layer": "time_guidance_embed.timestep_embedder.linear_1", + "time_in.out_layer": "time_guidance_embed.timestep_embedder.linear_2", + "guidance_in.in_layer": "time_guidance_embed.guidance_embedder.linear_1", + "guidance_in.out_layer": "time_guidance_embed.guidance_embedder.linear_2", + # Modulation parameters + "double_stream_modulation_img.lin": "double_stream_modulation_img.linear", + "double_stream_modulation_txt.lin": "double_stream_modulation_txt.linear", + "single_stream_modulation.lin": "single_stream_modulation.linear", + # Final output layer + # "final_layer.adaLN_modulation.1": "norm_out.linear", # Handle separately since we need to swap mod params + "final_layer.linear": "proj_out", + } + + FLUX2_TRANSFORMER_ADA_LAYER_NORM_KEY_MAP = { + "final_layer.adaLN_modulation.1": "norm_out.linear", + } + + FLUX2_TRANSFORMER_DOUBLE_BLOCK_KEY_MAP = { + # Handle fused QKV projections separately as we need to break into Q, K, V projections + "img_attn.norm.query_norm": "attn.norm_q", + "img_attn.norm.key_norm": "attn.norm_k", + "img_attn.proj": "attn.to_out.0", + "img_mlp.0": "ff.linear_in", + "img_mlp.2": "ff.linear_out", + "txt_attn.norm.query_norm": "attn.norm_added_q", + "txt_attn.norm.key_norm": "attn.norm_added_k", + "txt_attn.proj": "attn.to_add_out", + "txt_mlp.0": "ff_context.linear_in", + "txt_mlp.2": "ff_context.linear_out", + } + + FLUX2_TRANSFORMER_SINGLE_BLOCK_KEY_MAP = { + "linear1": "attn.to_qkv_mlp_proj", + "linear2": "attn.to_out", + "norm.query_norm": "attn.norm_q", + "norm.key_norm": "attn.norm_k", + } + + def convert_flux2_single_stream_blocks(key: str, state_dict: dict[str, object]) -> None: + # Skip if not a weight, bias, or scale + if ".weight" not in key and ".bias" not in key and ".scale" not in key: + return + + # Mapping: + # - single_blocks.{N}.linear1 --> single_transformer_blocks.{N}.attn.to_qkv_mlp_proj + # - single_blocks.{N}.linear2 --> single_transformer_blocks.{N}.attn.to_out + # - single_blocks.{N}.norm.query_norm.scale --> single_transformer_blocks.{N}.attn.norm_q.weight + # - single_blocks.{N}.norm.key_norm.scale --> single_transformer_blocks.{N}.attn.norm_k.weight + new_prefix = "single_transformer_blocks" + if "single_blocks." in key: + parts = key.split(".") + block_idx = parts[1] + within_block_name = ".".join(parts[2:-1]) + param_type = parts[-1] + + if param_type == "scale": + param_type = "weight" + + new_within_block_name = FLUX2_TRANSFORMER_SINGLE_BLOCK_KEY_MAP[within_block_name] + new_key = ".".join([new_prefix, block_idx, new_within_block_name, param_type]) + + param = state_dict.pop(key) + state_dict[new_key] = param + + return + + def convert_ada_layer_norm_weights(key: str, state_dict: dict[str, object]) -> None: + # Skip if not a weight + if ".weight" not in key: + return + + # If adaLN_modulation is in the key, swap scale and shift parameters + # Original implementation is (shift, scale); diffusers implementation is (scale, shift) + if "adaLN_modulation" in key: + key_without_param_type, param_type = key.rsplit(".", maxsplit=1) + # Assume all such keys are in the AdaLayerNorm key map + new_key_without_param_type = FLUX2_TRANSFORMER_ADA_LAYER_NORM_KEY_MAP[key_without_param_type] + new_key = ".".join([new_key_without_param_type, param_type]) + + swapped_weight = swap_scale_shift(state_dict.pop(key), 0) + state_dict[new_key] = swapped_weight + + return + + def convert_flux2_double_stream_blocks(key: str, state_dict: dict[str, object]) -> None: + # Skip if not a weight, bias, or scale + if ".weight" not in key and ".bias" not in key and ".scale" not in key: + return + + new_prefix = "transformer_blocks" + if "double_blocks." in key: + parts = key.split(".") + block_idx = parts[1] + modality_block_name = parts[2] # img_attn, img_mlp, txt_attn, txt_mlp + within_block_name = ".".join(parts[2:-1]) + param_type = parts[-1] + + if param_type == "scale": + param_type = "weight" + + if "qkv" in within_block_name: + fused_qkv_weight = state_dict.pop(key) + to_q_weight, to_k_weight, to_v_weight = torch.chunk(fused_qkv_weight, 3, dim=0) + if "img" in modality_block_name: + # double_blocks.{N}.img_attn.qkv --> transformer_blocks.{N}.attn.{to_q|to_k|to_v} + to_q_weight, to_k_weight, to_v_weight = torch.chunk(fused_qkv_weight, 3, dim=0) + new_q_name = "attn.to_q" + new_k_name = "attn.to_k" + new_v_name = "attn.to_v" + elif "txt" in modality_block_name: + # double_blocks.{N}.txt_attn.qkv --> transformer_blocks.{N}.attn.{add_q_proj|add_k_proj|add_v_proj} + to_q_weight, to_k_weight, to_v_weight = torch.chunk(fused_qkv_weight, 3, dim=0) + new_q_name = "attn.add_q_proj" + new_k_name = "attn.add_k_proj" + new_v_name = "attn.add_v_proj" + new_q_key = ".".join([new_prefix, block_idx, new_q_name, param_type]) + new_k_key = ".".join([new_prefix, block_idx, new_k_name, param_type]) + new_v_key = ".".join([new_prefix, block_idx, new_v_name, param_type]) + state_dict[new_q_key] = to_q_weight + state_dict[new_k_key] = to_k_weight + state_dict[new_v_key] = to_v_weight + else: + new_within_block_name = FLUX2_TRANSFORMER_DOUBLE_BLOCK_KEY_MAP[within_block_name] + new_key = ".".join([new_prefix, block_idx, new_within_block_name, param_type]) + + param = state_dict.pop(key) + state_dict[new_key] = param + return + + def update_state_dict(state_dict: dict[str, object], old_key: str, new_key: str) -> None: + state_dict[new_key] = state_dict.pop(old_key) + + TRANSFORMER_SPECIAL_KEYS_REMAP = { + "adaLN_modulation": convert_ada_layer_norm_weights, + "double_blocks": convert_flux2_double_stream_blocks, + "single_blocks": convert_flux2_single_stream_blocks, + } + + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys())} + + # Handle official code --> diffusers key remapping via the remap dict + for key in list(converted_state_dict.keys()): + new_key = key[:] + for replace_key, rename_key in FLUX2_TRANSFORMER_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + + update_state_dict(converted_state_dict, key, new_key) + + # Handle any special logic which can't be expressed by a simple 1:1 remapping with the handlers in + # special_keys_remap + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in TRANSFORMER_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + return converted_state_dict + + +def convert_z_image_transformer_checkpoint_to_diffusers(checkpoint, **kwargs): + Z_IMAGE_KEYS_RENAME_DICT = { + "final_layer.": "all_final_layer.2-1.", + "x_embedder.": "all_x_embedder.2-1.", + ".attention.out.bias": ".attention.to_out.0.bias", + ".attention.k_norm.weight": ".attention.norm_k.weight", + ".attention.q_norm.weight": ".attention.norm_q.weight", + ".attention.out.weight": ".attention.to_out.0.weight", + "model.diffusion_model.": "", + } + + def convert_z_image_fused_attention(key: str, state_dict: dict[str, object]) -> None: + if ".attention.qkv.weight" not in key: + return + + fused_qkv_weight = state_dict.pop(key) + to_q_weight, to_k_weight, to_v_weight = torch.chunk(fused_qkv_weight, 3, dim=0) + new_q_name = key.replace(".attention.qkv.weight", ".attention.to_q.weight") + new_k_name = key.replace(".attention.qkv.weight", ".attention.to_k.weight") + new_v_name = key.replace(".attention.qkv.weight", ".attention.to_v.weight") + + state_dict[new_q_name] = to_q_weight + state_dict[new_k_name] = to_k_weight + state_dict[new_v_name] = to_v_weight + return + + TRANSFORMER_SPECIAL_KEYS_REMAP = { + ".attention.qkv.weight": convert_z_image_fused_attention, + } + + def update_state_dict(state_dict: dict[str, object], old_key: str, new_key: str) -> None: + state_dict[new_key] = state_dict.pop(old_key) + + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys())} + + # Handle single file --> diffusers key remapping via the remap dict + for key in list(converted_state_dict.keys()): + new_key = key[:] + for replace_key, rename_key in Z_IMAGE_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + + update_state_dict(converted_state_dict, key, new_key) + + if "norm_final.weight" in converted_state_dict.keys(): + _ = converted_state_dict.pop("norm_final.weight") + + # Handle any special logic which can't be expressed by a simple 1:1 remapping with the handlers in + # special_keys_remap + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in TRANSFORMER_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + return converted_state_dict + + +def convert_z_image_controlnet_checkpoint_to_diffusers(checkpoint, config, **kwargs): + if config["add_control_noise_refiner"] is None: + return checkpoint + elif config["add_control_noise_refiner"] == "control_noise_refiner": + return checkpoint + elif config["add_control_noise_refiner"] == "control_layers": + converted_state_dict = { + key: checkpoint.pop(key) for key in list(checkpoint.keys()) if not key.startswith("control_noise_refiner.") + } + return converted_state_dict + else: + raise ValueError("Unknown Z-Image Turbo ControlNet type.") + + +def convert_ltx2_transformer_to_diffusers(checkpoint, **kwargs): + LTX_2_0_TRANSFORMER_KEYS_RENAME_DICT = { + # Transformer prefix + "model.diffusion_model.": "", + # Input Patchify Projections + "patchify_proj": "proj_in", + "audio_patchify_proj": "audio_proj_in", + # Modulation Parameters + # Handle adaln_single --> time_embed, audioln_single --> audio_time_embed separately as the original keys are + # substrings of the other modulation parameters below + "av_ca_video_scale_shift_adaln_single": "av_cross_attn_video_scale_shift", + "av_ca_a2v_gate_adaln_single": "av_cross_attn_video_a2v_gate", + "av_ca_audio_scale_shift_adaln_single": "av_cross_attn_audio_scale_shift", + "av_ca_v2a_gate_adaln_single": "av_cross_attn_audio_v2a_gate", + # Transformer Blocks + # Per-Block Cross Attention Modulation Parameters + "scale_shift_table_a2v_ca_video": "video_a2v_cross_attn_scale_shift_table", + "scale_shift_table_a2v_ca_audio": "audio_a2v_cross_attn_scale_shift_table", + # Attention QK Norms + "q_norm": "norm_q", + "k_norm": "norm_k", + } + + def update_state_dict_inplace(state_dict, old_key: str, new_key: str) -> None: + state_dict[new_key] = state_dict.pop(old_key) + + def remove_keys_inplace(key: str, state_dict) -> None: + state_dict.pop(key) + + def convert_ltx2_transformer_adaln_single(key: str, state_dict) -> None: + # Skip if not a weight, bias + if ".weight" not in key and ".bias" not in key: + return + + if key.startswith("adaln_single."): + new_key = key.replace("adaln_single.", "time_embed.") + param = state_dict.pop(key) + state_dict[new_key] = param + + if key.startswith("audio_adaln_single."): + new_key = key.replace("audio_adaln_single.", "audio_time_embed.") + param = state_dict.pop(key) + state_dict[new_key] = param + + return + + LTX_2_0_TRANSFORMER_SPECIAL_KEYS_REMAP = { + "video_embeddings_connector": remove_keys_inplace, + "audio_embeddings_connector": remove_keys_inplace, + "adaln_single": convert_ltx2_transformer_adaln_single, + } + + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys())} + + # Handle official code --> diffusers key remapping via the remap dict + for key in list(converted_state_dict.keys()): + new_key = key[:] + for replace_key, rename_key in LTX_2_0_TRANSFORMER_KEYS_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + + update_state_dict_inplace(converted_state_dict, key, new_key) + + # Handle any special logic which can't be expressed by a simple 1:1 remapping with the handlers in + # special_keys_remap + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in LTX_2_0_TRANSFORMER_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + return converted_state_dict + + +def convert_ltx2_vae_to_diffusers(checkpoint, **kwargs): + LTX_2_0_VIDEO_VAE_RENAME_DICT = { + # Video VAE prefix + "vae.": "", + # Encoder + "down_blocks.0": "down_blocks.0", + "down_blocks.1": "down_blocks.0.downsamplers.0", + "down_blocks.2": "down_blocks.1", + "down_blocks.3": "down_blocks.1.downsamplers.0", + "down_blocks.4": "down_blocks.2", + "down_blocks.5": "down_blocks.2.downsamplers.0", + "down_blocks.6": "down_blocks.3", + "down_blocks.7": "down_blocks.3.downsamplers.0", + "down_blocks.8": "mid_block", + # Decoder + "up_blocks.0": "mid_block", + "up_blocks.1": "up_blocks.0.upsamplers.0", + "up_blocks.2": "up_blocks.0", + "up_blocks.3": "up_blocks.1.upsamplers.0", + "up_blocks.4": "up_blocks.1", + "up_blocks.5": "up_blocks.2.upsamplers.0", + "up_blocks.6": "up_blocks.2", + # Common + # For all 3D ResNets + "res_blocks": "resnets", + "per_channel_statistics.mean-of-means": "latents_mean", + "per_channel_statistics.std-of-means": "latents_std", + } + + def update_state_dict_inplace(state_dict, old_key: str, new_key: str) -> None: + state_dict[new_key] = state_dict.pop(old_key) + + def remove_keys_inplace(key: str, state_dict) -> None: + state_dict.pop(key) + + LTX_2_0_VAE_SPECIAL_KEYS_REMAP = { + "per_channel_statistics.channel": remove_keys_inplace, + "per_channel_statistics.mean-of-stds": remove_keys_inplace, + } + + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys())} + + # Handle official code --> diffusers key remapping via the remap dict + for key in list(converted_state_dict.keys()): + new_key = key[:] + for replace_key, rename_key in LTX_2_0_VIDEO_VAE_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + + update_state_dict_inplace(converted_state_dict, key, new_key) + + # Handle any special logic which can't be expressed by a simple 1:1 remapping with the handlers in + # special_keys_remap + for key in list(converted_state_dict.keys()): + for special_key, handler_fn_inplace in LTX_2_0_VAE_SPECIAL_KEYS_REMAP.items(): + if special_key not in key: + continue + handler_fn_inplace(key, converted_state_dict) + + return converted_state_dict + + +def convert_ltx2_audio_vae_to_diffusers(checkpoint, **kwargs): + LTX_2_0_AUDIO_VAE_RENAME_DICT = { + # Audio VAE prefix + "audio_vae.": "", + "per_channel_statistics.mean-of-means": "latents_mean", + "per_channel_statistics.std-of-means": "latents_std", + } + + def update_state_dict_inplace(state_dict, old_key: str, new_key: str) -> None: + state_dict[new_key] = state_dict.pop(old_key) + + converted_state_dict = {key: checkpoint.pop(key) for key in list(checkpoint.keys())} + + # Handle official code --> diffusers key remapping via the remap dict + for key in list(converted_state_dict.keys()): + new_key = key[:] + for replace_key, rename_key in LTX_2_0_AUDIO_VAE_RENAME_DICT.items(): + new_key = new_key.replace(replace_key, rename_key) + + update_state_dict_inplace(converted_state_dict, key, new_key) + + return converted_state_dict diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/textual_inversion.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/textual_inversion.py new file mode 100644 index 0000000000000000000000000000000000000000..711de5f81343a47a525a05dcc657f4d5ad3aa6d2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/textual_inversion.py @@ -0,0 +1,605 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import json + +import safetensors +import torch +from huggingface_hub.utils import validate_hf_hub_args +from tokenizers import Tokenizer as TokenizerFast +from torch import nn + +from ..models.modeling_utils import load_state_dict +from ..utils import ( + _get_model_file, + is_accelerate_available, + is_transformers_available, + logging, +) + + +if is_transformers_available(): + from transformers import PreTrainedModel, PreTrainedTokenizer + +if is_accelerate_available(): + from accelerate.hooks import AlignDevicesHook, CpuOffload, remove_hook_from_module + +logger = logging.get_logger(__name__) + +TEXT_INVERSION_NAME = "learned_embeds.bin" +TEXT_INVERSION_NAME_SAFE = "learned_embeds.safetensors" + + +@validate_hf_hub_args +def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs): + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + hf_token = kwargs.pop("hf_token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = { + "file_type": "text_inversion", + "framework": "pytorch", + } + state_dicts = [] + for pretrained_model_name_or_path in pretrained_model_name_or_paths: + if not isinstance(pretrained_model_name_or_path, (dict, torch.Tensor)): + # 3.1. Load textual inversion file + model_file = None + + # Let's first try to load .safetensors weights + if (use_safetensors and weight_name is None) or ( + weight_name is not None and weight_name.endswith(".safetensors") + ): + try: + model_file = _get_model_file( + pretrained_model_name_or_path, + weights_name=weight_name or TEXT_INVERSION_NAME_SAFE, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=hf_token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + state_dict = safetensors.torch.load_file(model_file, device="cpu") + except Exception as e: + if not allow_pickle: + raise e + + model_file = None + + if model_file is None: + model_file = _get_model_file( + pretrained_model_name_or_path, + weights_name=weight_name or TEXT_INVERSION_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=hf_token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + state_dict = load_state_dict(model_file) + else: + state_dict = pretrained_model_name_or_path + + state_dicts.append(state_dict) + + return state_dicts + + +class TextualInversionLoaderMixin: + r""" + Load Textual Inversion tokens and embeddings to the tokenizer and text encoder. + """ + + def maybe_convert_prompt(self, prompt: str | list[str], tokenizer: "PreTrainedTokenizer"): # noqa: F821 + r""" + Processes prompts that include a special token corresponding to a multi-vector textual inversion embedding to + be replaced with multiple special tokens each corresponding to one of the vectors. If the prompt has no textual + inversion token or if the textual inversion token is a single vector, the input prompt is returned. + + Parameters: + prompt (`str` or list of `str`): + The prompt or prompts to guide the image generation. + tokenizer (`PreTrainedTokenizer`): + The tokenizer responsible for encoding the prompt into input tokens. + + Returns: + `str` or list of `str`: The converted prompt + """ + if not isinstance(prompt, list): + prompts = [prompt] + else: + prompts = prompt + + prompts = [self._maybe_convert_prompt(p, tokenizer) for p in prompts] + + if not isinstance(prompt, list): + return prompts[0] + + return prompts + + def _maybe_convert_prompt(self, prompt: str, tokenizer: "PreTrainedTokenizer"): # noqa: F821 + r""" + Maybe convert a prompt into a "multi vector"-compatible prompt. If the prompt includes a token that corresponds + to a multi-vector textual inversion embedding, this function will process the prompt so that the special token + is replaced with multiple special tokens each corresponding to one of the vectors. If the prompt has no textual + inversion token or a textual inversion token that is a single vector, the input prompt is simply returned. + + Parameters: + prompt (`str`): + The prompt to guide the image generation. + tokenizer (`PreTrainedTokenizer`): + The tokenizer responsible for encoding the prompt into input tokens. + + Returns: + `str`: The converted prompt + """ + tokens = tokenizer.tokenize(prompt) + unique_tokens = set(tokens) + for token in unique_tokens: + if token in tokenizer.added_tokens_encoder: + replacement = token + i = 1 + while f"{token}_{i}" in tokenizer.added_tokens_encoder: + replacement += f" {token}_{i}" + i += 1 + + prompt = prompt.replace(token, replacement) + + return prompt + + def _check_text_inv_inputs(self, tokenizer, text_encoder, pretrained_model_name_or_paths, tokens): + if tokenizer is None: + raise ValueError( + f"{self.__class__.__name__} requires `self.tokenizer` or passing a `tokenizer` of type `PreTrainedTokenizer` for calling" + f" `{self.load_textual_inversion.__name__}`" + ) + + if text_encoder is None: + raise ValueError( + f"{self.__class__.__name__} requires `self.text_encoder` or passing a `text_encoder` of type `PreTrainedModel` for calling" + f" `{self.load_textual_inversion.__name__}`" + ) + + if len(pretrained_model_name_or_paths) > 1 and len(pretrained_model_name_or_paths) != len(tokens): + raise ValueError( + f"You have passed a list of models of length {len(pretrained_model_name_or_paths)}, and list of tokens of length {len(tokens)} " + f"Make sure both lists have the same length." + ) + + valid_tokens = [t for t in tokens if t is not None] + if len(set(valid_tokens)) < len(valid_tokens): + raise ValueError(f"You have passed a list of tokens that contains duplicates: {tokens}") + + @staticmethod + def _retrieve_tokens_and_embeddings(tokens, state_dicts, tokenizer): + all_tokens = [] + all_embeddings = [] + for state_dict, token in zip(state_dicts, tokens): + if isinstance(state_dict, torch.Tensor): + if token is None: + raise ValueError( + "You are trying to load a textual inversion embedding that has been saved as a PyTorch tensor. Make sure to pass the name of the corresponding token in this case: `token=...`." + ) + loaded_token = token + embedding = state_dict + elif len(state_dict) == 1: + # diffusers + loaded_token, embedding = next(iter(state_dict.items())) + elif "string_to_param" in state_dict: + # A1111 + loaded_token = state_dict["name"] + embedding = state_dict["string_to_param"]["*"] + else: + raise ValueError( + f"Loaded state dictionary is incorrect: {state_dict}. \n\n" + "Please verify that the loaded state dictionary of the textual embedding either only has a single key or includes the `string_to_param`" + " input key." + ) + + if token is not None and loaded_token != token: + logger.info(f"The loaded token: {loaded_token} is overwritten by the passed token {token}.") + else: + token = loaded_token + + if token in tokenizer.get_vocab(): + raise ValueError( + f"Token {token} already in tokenizer vocabulary. Please choose a different token name or remove {token} and embedding from the tokenizer and text encoder." + ) + + all_tokens.append(token) + all_embeddings.append(embedding) + + return all_tokens, all_embeddings + + @staticmethod + def _extend_tokens_and_embeddings(tokens, embeddings, tokenizer): + all_tokens = [] + all_embeddings = [] + + for embedding, token in zip(embeddings, tokens): + if f"{token}_1" in tokenizer.get_vocab(): + multi_vector_tokens = [token] + i = 1 + while f"{token}_{i}" in tokenizer.added_tokens_encoder: + multi_vector_tokens.append(f"{token}_{i}") + i += 1 + + raise ValueError( + f"Multi-vector Token {multi_vector_tokens} already in tokenizer vocabulary. Please choose a different token name or remove the {multi_vector_tokens} and embedding from the tokenizer and text encoder." + ) + + is_multi_vector = len(embedding.shape) > 1 and embedding.shape[0] > 1 + if is_multi_vector: + all_tokens += [token] + [f"{token}_{i}" for i in range(1, embedding.shape[0])] + all_embeddings += [e for e in embedding] # noqa: C416 + else: + all_tokens += [token] + all_embeddings += [embedding[0]] if len(embedding.shape) > 1 else [embedding] + + return all_tokens, all_embeddings + + @validate_hf_hub_args + def load_textual_inversion( + self, + pretrained_model_name_or_path: str | list[str] | dict[str, torch.Tensor] | list[dict[str, torch.Tensor]], + token: str | list[str] | None = None, + tokenizer: "PreTrainedTokenizer" | None = None, # noqa: F821 + text_encoder: "PreTrainedModel" | None = None, # noqa: F821 + **kwargs, + ): + r""" + Load Textual Inversion embeddings into the text encoder of [`StableDiffusionPipeline`] (both 🤗 Diffusers and + Automatic1111 formats are supported). + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike` or `list[str or os.PathLike]` or `Dict` or `list[Dict]`): + Can be either one of the following or a list of them: + + - A string, the *model id* (for example `sd-concepts-library/low-poly-hd-logos-icons`) of a + pretrained model hosted on the Hub. + - A path to a *directory* (for example `./my_text_inversion_directory/`) containing the textual + inversion weights. + - A path to a *file* (for example `./my_text_inversions.pt`) containing textual inversion weights. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + + token (`str` or `list[str]`, *optional*): + Override the token to use for the textual inversion weights. If `pretrained_model_name_or_path` is a + list, then `token` must also be a list of equal length. + text_encoder ([`~transformers.CLIPTextModel`], *optional*): + Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)). + If not specified, function will take self.tokenizer. + tokenizer ([`~transformers.CLIPTokenizer`], *optional*): + A `CLIPTokenizer` to tokenize text. If not specified, function will take self.tokenizer. + weight_name (`str`, *optional*): + Name of a custom weight file. This should be used when: + + - The saved textual inversion file is in 🤗 Diffusers format, but was saved under a specific weight + name such as `text_inv.bin`. + - The saved textual inversion file is in the Automatic1111 format. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + hf_token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you're downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + + Example: + + To load a Textual Inversion embedding vector in 🤗 Diffusers format: + + ```py + from diffusers import StableDiffusionPipeline + import torch + + model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") + + pipe.load_textual_inversion("sd-concepts-library/cat-toy") + + prompt = "A backpack" + + image = pipe(prompt, num_inference_steps=50).images[0] + image.save("cat-backpack.png") + ``` + + To load a Textual Inversion embedding vector in Automatic1111 format, make sure to download the vector first + (for example from [civitAI](https://civitai.com/models/3036?modelVersionId=9857)) and then load the vector + locally: + + ```py + from diffusers import StableDiffusionPipeline + import torch + + model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") + + pipe.load_textual_inversion("./charturnerv2.pt", token="charturnerv2") + + prompt = "charturnerv2, multiple views of the same character in the same outfit, a character turnaround of a woman wearing a black jacket and red shirt, best quality, intricate details." + + image = pipe(prompt, num_inference_steps=50).images[0] + image.save("character.png") + ``` + + """ + # 1. Set correct tokenizer and text encoder + tokenizer = tokenizer or getattr(self, "tokenizer", None) + text_encoder = text_encoder or getattr(self, "text_encoder", None) + + # 2. Normalize inputs + pretrained_model_name_or_paths = ( + [pretrained_model_name_or_path] + if not isinstance(pretrained_model_name_or_path, list) + else pretrained_model_name_or_path + ) + tokens = [token] if not isinstance(token, list) else token + if tokens[0] is None: + tokens = tokens * len(pretrained_model_name_or_paths) + + # 3. Check inputs + self._check_text_inv_inputs(tokenizer, text_encoder, pretrained_model_name_or_paths, tokens) + + # 4. Load state dicts of textual embeddings + state_dicts = load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs) + + # 4.1 Handle the special case when state_dict is a tensor that contains n embeddings for n tokens + if len(tokens) > 1 and len(state_dicts) == 1: + if isinstance(state_dicts[0], torch.Tensor): + state_dicts = list(state_dicts[0]) + if len(tokens) != len(state_dicts): + raise ValueError( + f"You have passed a state_dict contains {len(state_dicts)} embeddings, and list of tokens of length {len(tokens)} " + f"Make sure both have the same length." + ) + + # 4. Retrieve tokens and embeddings + tokens, embeddings = self._retrieve_tokens_and_embeddings(tokens, state_dicts, tokenizer) + + # 5. Extend tokens and embeddings for multi vector + tokens, embeddings = self._extend_tokens_and_embeddings(tokens, embeddings, tokenizer) + + # 6. Make sure all embeddings have the correct size + expected_emb_dim = text_encoder.get_input_embeddings().weight.shape[-1] + if any(expected_emb_dim != emb.shape[-1] for emb in embeddings): + raise ValueError( + "Loaded embeddings are of incorrect shape. Expected each textual inversion embedding " + "to be of shape {input_embeddings.shape[-1]}, but are {embeddings.shape[-1]} " + ) + + # 7. Now we can be sure that loading the embedding matrix works + # < Unsafe code: + + # 7.1 Offload all hooks in case the pipeline was cpu offloaded before make sure, we offload and onload again + is_model_cpu_offload = False + is_sequential_cpu_offload = False + if self.hf_device_map is None: + for _, component in self.components.items(): + if isinstance(component, nn.Module): + if hasattr(component, "_hf_hook"): + is_model_cpu_offload = isinstance(getattr(component, "_hf_hook"), CpuOffload) + is_sequential_cpu_offload = ( + isinstance(getattr(component, "_hf_hook"), AlignDevicesHook) + or hasattr(component._hf_hook, "hooks") + and isinstance(component._hf_hook.hooks[0], AlignDevicesHook) + ) + logger.info( + "Accelerate hooks detected. Since you have called `load_textual_inversion()`, the previous hooks will be first removed. Then the textual inversion parameters will be loaded and the hooks will be applied again." + ) + if is_sequential_cpu_offload or is_model_cpu_offload: + remove_hook_from_module(component, recurse=is_sequential_cpu_offload) + + # 7.2 save expected device and dtype + device = text_encoder.device + dtype = text_encoder.dtype + + # 7.3 Increase token embedding matrix + text_encoder.resize_token_embeddings(len(tokenizer) + len(tokens)) + input_embeddings = text_encoder.get_input_embeddings().weight + + # 7.4 Load token and embedding + for token, embedding in zip(tokens, embeddings): + # add tokens and get ids + tokenizer.add_tokens(token) + token_id = tokenizer.convert_tokens_to_ids(token) + input_embeddings.data[token_id] = embedding + logger.info(f"Loaded textual inversion embedding for {token}.") + + input_embeddings.to(dtype=dtype, device=device) + + # 7.5 Offload the model again + if is_model_cpu_offload: + self.enable_model_cpu_offload(device=device) + elif is_sequential_cpu_offload: + self.enable_sequential_cpu_offload(device=device) + + # / Unsafe Code > + + def unload_textual_inversion( + self, + tokens: str | list[str] | None = None, + tokenizer: "PreTrainedTokenizer" | None = None, + text_encoder: "PreTrainedModel" | None = None, + ): + r""" + Unload Textual Inversion embeddings from the text encoder of [`StableDiffusionPipeline`] + + Example: + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + + # Example 1 + pipeline.load_textual_inversion("sd-concepts-library/gta5-artwork") + pipeline.load_textual_inversion("sd-concepts-library/moeb-style") + + # Remove all token embeddings + pipeline.unload_textual_inversion() + + # Example 2 + pipeline.load_textual_inversion("sd-concepts-library/moeb-style") + pipeline.load_textual_inversion("sd-concepts-library/gta5-artwork") + + # Remove just one token + pipeline.unload_textual_inversion("") + + # Example 3: unload from SDXL + pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + embedding_path = hf_hub_download( + repo_id="linoyts/web_y2k", filename="web_y2k_emb.safetensors", repo_type="model" + ) + + # load embeddings to the text encoders + state_dict = load_file(embedding_path) + + # load embeddings of text_encoder 1 (CLIP ViT-L/14) + pipeline.load_textual_inversion( + state_dict["clip_l"], + tokens=["", ""], + text_encoder=pipeline.text_encoder, + tokenizer=pipeline.tokenizer, + ) + # load embeddings of text_encoder 2 (CLIP ViT-G/14) + pipeline.load_textual_inversion( + state_dict["clip_g"], + tokens=["", ""], + text_encoder=pipeline.text_encoder_2, + tokenizer=pipeline.tokenizer_2, + ) + + # Unload explicitly from both text encoders and tokenizers + pipeline.unload_textual_inversion( + tokens=["", ""], text_encoder=pipeline.text_encoder, tokenizer=pipeline.tokenizer + ) + pipeline.unload_textual_inversion( + tokens=["", ""], text_encoder=pipeline.text_encoder_2, tokenizer=pipeline.tokenizer_2 + ) + ``` + """ + + tokenizer = tokenizer or getattr(self, "tokenizer", None) + text_encoder = text_encoder or getattr(self, "text_encoder", None) + + # Get textual inversion tokens and ids + token_ids = [] + last_special_token_id = None + + if tokens: + if isinstance(tokens, str): + tokens = [tokens] + for added_token_id, added_token in tokenizer.added_tokens_decoder.items(): + if not added_token.special: + if added_token.content in tokens: + token_ids.append(added_token_id) + else: + last_special_token_id = added_token_id + if len(token_ids) == 0: + raise ValueError("No tokens to remove found") + else: + tokens = [] + for added_token_id, added_token in tokenizer.added_tokens_decoder.items(): + if not added_token.special: + token_ids.append(added_token_id) + tokens.append(added_token.content) + else: + last_special_token_id = added_token_id + + # Fast tokenizers (v5+) + if hasattr(tokenizer, "_tokenizer"): + # Fast tokenizers: serialize, filter tokens, reload + tokenizer_json = json.loads(tokenizer._tokenizer.to_str()) + new_id = last_special_token_id + 1 + filtered = [] + for tok in tokenizer_json.get("added_tokens", []): + if tok.get("content") in set(tokens): + continue + if not tok.get("special", False): + tok["id"] = new_id + new_id += 1 + filtered.append(tok) + tokenizer_json["added_tokens"] = filtered + tokenizer._tokenizer = TokenizerFast.from_str(json.dumps(tokenizer_json)) + else: + # Slow tokenizers + for token_id, token_to_remove in zip(token_ids, tokens): + del tokenizer._added_tokens_decoder[token_id] + del tokenizer._added_tokens_encoder[token_to_remove] + + key_id = 1 + for token_id in list(tokenizer.added_tokens_decoder.keys()): + if token_id > last_special_token_id and token_id > last_special_token_id + key_id: + token = tokenizer._added_tokens_decoder[token_id] + tokenizer._added_tokens_decoder[last_special_token_id + key_id] = token + del tokenizer._added_tokens_decoder[token_id] + tokenizer._added_tokens_encoder[token.content] = last_special_token_id + key_id + key_id += 1 + if hasattr(tokenizer, "_update_trie"): + tokenizer._update_trie() + if hasattr(tokenizer, "_update_total_vocab_size"): + tokenizer._update_total_vocab_size() + + # Delete from text encoder + text_embedding_dim = text_encoder.get_input_embeddings().embedding_dim + temp_text_embedding_weights = text_encoder.get_input_embeddings().weight + text_embedding_weights = temp_text_embedding_weights[: last_special_token_id + 1] + to_append = [] + for i in range(last_special_token_id + 1, temp_text_embedding_weights.shape[0]): + if i not in token_ids: + to_append.append(temp_text_embedding_weights[i].unsqueeze(0)) + if len(to_append) > 0: + to_append = torch.cat(to_append, dim=0) + text_embedding_weights = torch.cat([text_embedding_weights, to_append], dim=0) + text_embeddings_filtered = nn.Embedding(text_embedding_weights.shape[0], text_embedding_dim) + text_embeddings_filtered.weight.data = text_embedding_weights + text_encoder.set_input_embeddings(text_embeddings_filtered) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/transformer_flux.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/transformer_flux.py new file mode 100644 index 0000000000000000000000000000000000000000..ef7b921b7ddfdb8f375889fffaeaa001069c9a68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/transformer_flux.py @@ -0,0 +1,179 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from contextlib import nullcontext + +from ..models.embeddings import ( + ImageProjection, + MultiIPAdapterImageProjection, +) +from ..models.model_loading_utils import load_model_dict_into_meta +from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT +from ..utils import is_accelerate_available, is_torch_version, logging +from ..utils.torch_utils import empty_device_cache + + +if is_accelerate_available(): + pass + +logger = logging.get_logger(__name__) + + +class FluxTransformer2DLoadersMixin: + """ + Load layers into a [`FluxTransformer2DModel`]. + """ + + def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT): + if low_cpu_mem_usage: + if is_accelerate_available(): + from accelerate import init_empty_weights + + else: + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + updated_state_dict = {} + image_projection = None + init_context = init_empty_weights if low_cpu_mem_usage else nullcontext + + if "proj.weight" in state_dict: + # IP-Adapter + num_image_text_embeds = 4 + if state_dict["proj.weight"].shape[0] == 65536: + num_image_text_embeds = 16 + clip_embeddings_dim = state_dict["proj.weight"].shape[-1] + cross_attention_dim = state_dict["proj.weight"].shape[0] // num_image_text_embeds + + with init_context(): + image_projection = ImageProjection( + cross_attention_dim=cross_attention_dim, + image_embed_dim=clip_embeddings_dim, + num_image_text_embeds=num_image_text_embeds, + ) + + for key, value in state_dict.items(): + diffusers_name = key.replace("proj", "image_embeds") + updated_state_dict[diffusers_name] = value + + if not low_cpu_mem_usage: + image_projection.load_state_dict(updated_state_dict, strict=True) + else: + device_map = {"": self.device} + load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype) + empty_device_cache() + + return image_projection + + def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT): + from ..models.transformers.transformer_flux import FluxIPAdapterAttnProcessor + + if low_cpu_mem_usage: + if is_accelerate_available(): + from accelerate import init_empty_weights + + else: + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + # set ip-adapter cross-attention processors & load state_dict + attn_procs = {} + key_id = 0 + init_context = init_empty_weights if low_cpu_mem_usage else nullcontext + for name in self.attn_processors.keys(): + if name.startswith("single_transformer_blocks"): + attn_processor_class = self.attn_processors[name].__class__ + attn_procs[name] = attn_processor_class() + else: + cross_attention_dim = self.config.joint_attention_dim + hidden_size = self.inner_dim + attn_processor_class = FluxIPAdapterAttnProcessor + num_image_text_embeds = [] + for state_dict in state_dicts: + if "proj.weight" in state_dict["image_proj"]: + num_image_text_embed = 4 + if state_dict["image_proj"]["proj.weight"].shape[0] == 65536: + num_image_text_embed = 16 + # IP-Adapter + num_image_text_embeds += [num_image_text_embed] + + with init_context(): + attn_procs[name] = attn_processor_class( + hidden_size=hidden_size, + cross_attention_dim=cross_attention_dim, + scale=1.0, + num_tokens=num_image_text_embeds, + dtype=self.dtype, + device=self.device, + ) + + value_dict = {} + for i, state_dict in enumerate(state_dicts): + value_dict.update({f"to_k_ip.{i}.weight": state_dict["ip_adapter"][f"{key_id}.to_k_ip.weight"]}) + value_dict.update({f"to_v_ip.{i}.weight": state_dict["ip_adapter"][f"{key_id}.to_v_ip.weight"]}) + value_dict.update({f"to_k_ip.{i}.bias": state_dict["ip_adapter"][f"{key_id}.to_k_ip.bias"]}) + value_dict.update({f"to_v_ip.{i}.bias": state_dict["ip_adapter"][f"{key_id}.to_v_ip.bias"]}) + + if not low_cpu_mem_usage: + attn_procs[name].load_state_dict(value_dict) + else: + device_map = {"": self.device} + dtype = self.dtype + load_model_dict_into_meta(attn_procs[name], value_dict, device_map=device_map, dtype=dtype) + + key_id += 1 + + empty_device_cache() + + return attn_procs + + def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT): + if not isinstance(state_dicts, list): + state_dicts = [state_dicts] + + self.encoder_hid_proj = None + + attn_procs = self._convert_ip_adapter_attn_to_diffusers(state_dicts, low_cpu_mem_usage=low_cpu_mem_usage) + self.set_attn_processor(attn_procs) + + image_projection_layers = [] + for state_dict in state_dicts: + image_projection_layer = self._convert_ip_adapter_image_proj_to_diffusers( + state_dict["image_proj"], low_cpu_mem_usage=low_cpu_mem_usage + ) + image_projection_layers.append(image_projection_layer) + + self.encoder_hid_proj = MultiIPAdapterImageProjection(image_projection_layers) + self.config.encoder_hid_dim_type = "ip_image_proj" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/transformer_sd3.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/transformer_sd3.py new file mode 100644 index 0000000000000000000000000000000000000000..d1c3fff14e603bdced8fdd740ca11a36ad7ba9b4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/transformer_sd3.py @@ -0,0 +1,174 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from contextlib import nullcontext + +from ..models.attention_processor import SD3IPAdapterJointAttnProcessor2_0 +from ..models.embeddings import IPAdapterTimeImageProjection +from ..models.model_loading_utils import load_model_dict_into_meta +from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT +from ..utils import is_accelerate_available, is_torch_version, logging +from ..utils.torch_utils import empty_device_cache + + +logger = logging.get_logger(__name__) + + +class SD3Transformer2DLoadersMixin: + """Load IP-Adapters and LoRA layers into a `[SD3Transformer2DModel]`.""" + + def _convert_ip_adapter_attn_to_diffusers( + self, state_dict: dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT + ) -> dict: + if low_cpu_mem_usage: + if is_accelerate_available(): + from accelerate import init_empty_weights + + else: + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + # IP-Adapter cross attention parameters + hidden_size = self.config.attention_head_dim * self.config.num_attention_heads + ip_hidden_states_dim = self.config.attention_head_dim * self.config.num_attention_heads + timesteps_emb_dim = state_dict["0.norm_ip.linear.weight"].shape[1] + + # Dict where key is transformer layer index, value is attention processor's state dict + # ip_adapter state dict keys example: "0.norm_ip.linear.weight" + layer_state_dict = {idx: {} for idx in range(len(self.attn_processors))} + for key, weights in state_dict.items(): + idx, name = key.split(".", maxsplit=1) + layer_state_dict[int(idx)][name] = weights + + # Create IP-Adapter attention processor & load state_dict + attn_procs = {} + init_context = init_empty_weights if low_cpu_mem_usage else nullcontext + for idx, name in enumerate(self.attn_processors.keys()): + with init_context(): + attn_procs[name] = SD3IPAdapterJointAttnProcessor2_0( + hidden_size=hidden_size, + ip_hidden_states_dim=ip_hidden_states_dim, + head_dim=self.config.attention_head_dim, + timesteps_emb_dim=timesteps_emb_dim, + ) + + if not low_cpu_mem_usage: + attn_procs[name].load_state_dict(layer_state_dict[idx], strict=True) + else: + device_map = {"": self.device} + load_model_dict_into_meta( + attn_procs[name], layer_state_dict[idx], device_map=device_map, dtype=self.dtype + ) + + empty_device_cache() + + return attn_procs + + def _convert_ip_adapter_image_proj_to_diffusers( + self, state_dict: dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT + ) -> IPAdapterTimeImageProjection: + if low_cpu_mem_usage: + if is_accelerate_available(): + from accelerate import init_empty_weights + + else: + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + init_context = init_empty_weights if low_cpu_mem_usage else nullcontext + + # Convert to diffusers + updated_state_dict = {} + for key, value in state_dict.items(): + # InstantX/SD3.5-Large-IP-Adapter + if key.startswith("layers."): + idx = key.split(".")[1] + key = key.replace(f"layers.{idx}.0.norm1", f"layers.{idx}.ln0") + key = key.replace(f"layers.{idx}.0.norm2", f"layers.{idx}.ln1") + key = key.replace(f"layers.{idx}.0.to_q", f"layers.{idx}.attn.to_q") + key = key.replace(f"layers.{idx}.0.to_kv", f"layers.{idx}.attn.to_kv") + key = key.replace(f"layers.{idx}.0.to_out", f"layers.{idx}.attn.to_out.0") + key = key.replace(f"layers.{idx}.1.0", f"layers.{idx}.adaln_norm") + key = key.replace(f"layers.{idx}.1.1", f"layers.{idx}.ff.net.0.proj") + key = key.replace(f"layers.{idx}.1.3", f"layers.{idx}.ff.net.2") + key = key.replace(f"layers.{idx}.2.1", f"layers.{idx}.adaln_proj") + updated_state_dict[key] = value + + # Image projection parameters + embed_dim = updated_state_dict["proj_in.weight"].shape[1] + output_dim = updated_state_dict["proj_out.weight"].shape[0] + hidden_dim = updated_state_dict["proj_in.weight"].shape[0] + heads = updated_state_dict["layers.0.attn.to_q.weight"].shape[0] // 64 + num_queries = updated_state_dict["latents"].shape[1] + timestep_in_dim = updated_state_dict["time_embedding.linear_1.weight"].shape[1] + + # Image projection + with init_context(): + image_proj = IPAdapterTimeImageProjection( + embed_dim=embed_dim, + output_dim=output_dim, + hidden_dim=hidden_dim, + heads=heads, + num_queries=num_queries, + timestep_in_dim=timestep_in_dim, + ) + + if not low_cpu_mem_usage: + image_proj.load_state_dict(updated_state_dict, strict=True) + else: + device_map = {"": self.device} + load_model_dict_into_meta(image_proj, updated_state_dict, device_map=device_map, dtype=self.dtype) + empty_device_cache() + + return image_proj + + def _load_ip_adapter_weights(self, state_dict: dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT) -> None: + """Sets IP-Adapter attention processors, image projection, and loads state_dict. + + Args: + state_dict (`Dict`): + State dict with keys "ip_adapter", which contains parameters for attention processors, and + "image_proj", which contains parameters for image projection net. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + """ + + attn_procs = self._convert_ip_adapter_attn_to_diffusers(state_dict["ip_adapter"], low_cpu_mem_usage) + self.set_attn_processor(attn_procs) + + self.image_proj = self._convert_ip_adapter_image_proj_to_diffusers(state_dict["image_proj"], low_cpu_mem_usage) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/unet.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..9dab3bc667ea9c87f08bcf3e0dd484a9ee8e1cb6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/unet.py @@ -0,0 +1,943 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from collections import defaultdict +from contextlib import nullcontext +from pathlib import Path +from typing import Callable + +import safetensors +import torch +import torch.nn.functional as F +from huggingface_hub.utils import validate_hf_hub_args + +from ..models.embeddings import ( + ImageProjection, + IPAdapterFaceIDImageProjection, + IPAdapterFaceIDPlusImageProjection, + IPAdapterFullImageProjection, + IPAdapterPlusImageProjection, + MultiIPAdapterImageProjection, +) +from ..models.model_loading_utils import load_model_dict_into_meta +from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_state_dict +from ..utils import ( + USE_PEFT_BACKEND, + _get_model_file, + convert_unet_state_dict_to_peft, + deprecate, + get_adapter_name, + get_peft_kwargs, + is_accelerate_available, + is_peft_version, + is_torch_version, + logging, +) +from ..utils.torch_utils import empty_device_cache +from .lora_base import _func_optionally_disable_offloading +from .lora_pipeline import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE, TEXT_ENCODER_NAME, UNET_NAME +from .utils import AttnProcsLayers + + +logger = logging.get_logger(__name__) + + +CUSTOM_DIFFUSION_WEIGHT_NAME = "pytorch_custom_diffusion_weights.bin" +CUSTOM_DIFFUSION_WEIGHT_NAME_SAFE = "pytorch_custom_diffusion_weights.safetensors" + + +class UNet2DConditionLoadersMixin: + """ + Load LoRA layers into a [`UNet2DCondtionModel`]. + """ + + text_encoder_name = TEXT_ENCODER_NAME + unet_name = UNET_NAME + + @validate_hf_hub_args + def load_attn_procs(self, pretrained_model_name_or_path_or_dict: str | dict[str, torch.Tensor], **kwargs): + r""" + Load pretrained attention processor layers into [`UNet2DConditionModel`]. Attention processor layers have to be + defined in + [`attention_processor.py`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py) + and be a `torch.nn.Module` class. Currently supported: LoRA, Custom Diffusion. For LoRA, one must install + `peft`: `pip install -U peft`. + + Parameters: + pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`): + Can be either: + + - A string, the model id (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a directory (for example `./my_model_directory`) containing the model weights saved + with [`ModelMixin.save_pretrained`]. + - A [torch state + dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict). + + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + network_alphas (`dict[str, float]`): + The value of the network alpha used for stable learning and preventing underflow. This value has the + same meaning as the `--network_alpha` option in the kohya-ss trainer script. Refer to [this + link](https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning). + adapter_name (`str`, *optional*, defaults to None): + Adapter name to be used for referencing the loaded adapter model. If not specified, it will use + `default_{i}` where i is the total number of adapters being loaded. + weight_name (`str`, *optional*, defaults to None): + Name of the serialized state dict file. + low_cpu_mem_usage (`bool`, *optional*): + Speed up model loading by only loading the pretrained LoRA weights and not initializing the random + weights. + + Example: + + ```py + from diffusers import AutoPipelineForText2Image + import torch + + pipeline = AutoPipelineForText2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ).to("cuda") + pipeline.unet.load_attn_procs( + "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic" + ) + ``` + """ + from ..hooks.group_offloading import _maybe_remove_and_reapply_group_offloading + + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + weight_name = kwargs.pop("weight_name", None) + use_safetensors = kwargs.pop("use_safetensors", None) + adapter_name = kwargs.pop("adapter_name", None) + _pipeline = kwargs.pop("_pipeline", None) + network_alphas = kwargs.pop("network_alphas", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + allow_pickle = False + + if low_cpu_mem_usage and is_peft_version("<=", "0.13.0"): + raise ValueError( + "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`." + ) + + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"} + + model_file = None + if not isinstance(pretrained_model_name_or_path_or_dict, dict): + # Let's first try to load .safetensors weights + if (use_safetensors and weight_name is None) or ( + weight_name is not None and weight_name.endswith(".safetensors") + ): + try: + model_file = _get_model_file( + pretrained_model_name_or_path_or_dict, + weights_name=weight_name or LORA_WEIGHT_NAME_SAFE, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + state_dict = safetensors.torch.load_file(model_file, device="cpu") + except IOError as e: + if not allow_pickle: + raise e + # try loading non-safetensors weights + pass + if model_file is None: + model_file = _get_model_file( + pretrained_model_name_or_path_or_dict, + weights_name=weight_name or LORA_WEIGHT_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + ) + state_dict = load_state_dict(model_file) + else: + state_dict = pretrained_model_name_or_path_or_dict + + is_custom_diffusion = any("custom_diffusion" in k for k in state_dict.keys()) + is_lora = all(("lora" in k or k.endswith(".alpha")) for k in state_dict.keys()) + is_model_cpu_offload = False + is_sequential_cpu_offload = False + is_group_offload = False + + if is_lora: + deprecation_message = "Using the `load_attn_procs()` method has been deprecated and will be removed in a future version. Please use `load_lora_adapter()`." + deprecate("load_attn_procs", "0.40.0", deprecation_message) + + if is_custom_diffusion: + attn_processors = self._process_custom_diffusion(state_dict=state_dict) + elif is_lora: + is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload = self._process_lora( + state_dict=state_dict, + unet_identifier_key=self.unet_name, + network_alphas=network_alphas, + adapter_name=adapter_name, + _pipeline=_pipeline, + low_cpu_mem_usage=low_cpu_mem_usage, + ) + else: + raise ValueError( + f"{model_file} does not seem to be in the correct format expected by Custom Diffusion training." + ) + + # + + def _process_custom_diffusion(self, state_dict): + from ..models.attention_processor import CustomDiffusionAttnProcessor + + attn_processors = {} + custom_diffusion_grouped_dict = defaultdict(dict) + for key, value in state_dict.items(): + if len(value) == 0: + custom_diffusion_grouped_dict[key] = {} + else: + if "to_out" in key: + attn_processor_key, sub_key = ".".join(key.split(".")[:-3]), ".".join(key.split(".")[-3:]) + else: + attn_processor_key, sub_key = ".".join(key.split(".")[:-2]), ".".join(key.split(".")[-2:]) + custom_diffusion_grouped_dict[attn_processor_key][sub_key] = value + + for key, value_dict in custom_diffusion_grouped_dict.items(): + if len(value_dict) == 0: + attn_processors[key] = CustomDiffusionAttnProcessor( + train_kv=False, train_q_out=False, hidden_size=None, cross_attention_dim=None + ) + else: + cross_attention_dim = value_dict["to_k_custom_diffusion.weight"].shape[1] + hidden_size = value_dict["to_k_custom_diffusion.weight"].shape[0] + train_q_out = True if "to_q_custom_diffusion.weight" in value_dict else False + attn_processors[key] = CustomDiffusionAttnProcessor( + train_kv=True, + train_q_out=train_q_out, + hidden_size=hidden_size, + cross_attention_dim=cross_attention_dim, + ) + attn_processors[key].load_state_dict(value_dict) + + return attn_processors + + def _process_lora( + self, state_dict, unet_identifier_key, network_alphas, adapter_name, _pipeline, low_cpu_mem_usage + ): + # This method does the following things: + # 1. Filters the `state_dict` with keys matching `unet_identifier_key` when using the non-legacy + # format. For legacy format no filtering is applied. + # 2. Converts the `state_dict` to the `peft` compatible format. + # 3. Creates a `LoraConfig` and then injects the converted `state_dict` into the UNet per the + # `LoraConfig` specs. + # 4. It also reports if the underlying `_pipeline` has any kind of offloading inside of it. + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for this method.") + + from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict + + keys = list(state_dict.keys()) + + unet_keys = [k for k in keys if k.startswith(unet_identifier_key)] + unet_state_dict = { + k.replace(f"{unet_identifier_key}.", ""): v for k, v in state_dict.items() if k in unet_keys + } + + if network_alphas is not None: + alpha_keys = [k for k in network_alphas.keys() if k.startswith(unet_identifier_key)] + network_alphas = { + k.replace(f"{unet_identifier_key}.", ""): v for k, v in network_alphas.items() if k in alpha_keys + } + + is_model_cpu_offload = False + is_sequential_cpu_offload = False + is_group_offload = False + state_dict_to_be_used = unet_state_dict if len(unet_state_dict) > 0 else state_dict + + if len(state_dict_to_be_used) > 0: + if adapter_name in getattr(self, "peft_config", {}): + raise ValueError( + f"Adapter name {adapter_name} already in use in the Unet - please select a new adapter name." + ) + + state_dict = convert_unet_state_dict_to_peft(state_dict_to_be_used) + + if network_alphas is not None: + # The alphas state dict have the same structure as Unet, thus we convert it to peft format using + # `convert_unet_state_dict_to_peft` method. + network_alphas = convert_unet_state_dict_to_peft(network_alphas) + + rank = {} + for key, val in state_dict.items(): + if "lora_B" in key: + rank[key] = val.shape[1] + + lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict, is_unet=True) + if "use_dora" in lora_config_kwargs: + if lora_config_kwargs["use_dora"]: + if is_peft_version("<", "0.9.0"): + raise ValueError( + "You need `peft` 0.9.0 at least to use DoRA-enabled LoRAs. Please upgrade your installation of `peft`." + ) + else: + if is_peft_version("<", "0.9.0"): + lora_config_kwargs.pop("use_dora") + + if "lora_bias" in lora_config_kwargs: + if lora_config_kwargs["lora_bias"]: + if is_peft_version("<=", "0.13.2"): + raise ValueError( + "You need `peft` 0.14.0 at least to use `bias` in LoRAs. Please upgrade your installation of `peft`." + ) + else: + if is_peft_version("<=", "0.13.2"): + lora_config_kwargs.pop("lora_bias") + + lora_config = LoraConfig(**lora_config_kwargs) + + # adapter_name + if adapter_name is None: + adapter_name = get_adapter_name(self) + + # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks + # otherwise loading LoRA weights will lead to an error + is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload = self._optionally_disable_offloading( + _pipeline + ) + peft_kwargs = {} + if is_peft_version(">=", "0.13.1"): + peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + + inject_adapter_in_model(lora_config, self, adapter_name=adapter_name, **peft_kwargs) + incompatible_keys = set_peft_model_state_dict(self, state_dict, adapter_name, **peft_kwargs) + + warn_msg = "" + if incompatible_keys is not None: + # Check only for unexpected keys. + unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None) + if unexpected_keys: + lora_unexpected_keys = [k for k in unexpected_keys if "lora_" in k and adapter_name in k] + if lora_unexpected_keys: + warn_msg = ( + f"Loading adapter weights from state_dict led to unexpected keys found in the model:" + f" {', '.join(lora_unexpected_keys)}. " + ) + + # Filter missing keys specific to the current adapter. + missing_keys = getattr(incompatible_keys, "missing_keys", None) + if missing_keys: + lora_missing_keys = [k for k in missing_keys if "lora_" in k and adapter_name in k] + if lora_missing_keys: + warn_msg += ( + f"Loading adapter weights from state_dict led to missing keys in the model:" + f" {', '.join(lora_missing_keys)}." + ) + + if warn_msg: + logger.warning(warn_msg) + + return is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload + + @classmethod + # Copied from diffusers.loaders.lora_base.LoraBaseMixin._optionally_disable_offloading + def _optionally_disable_offloading(cls, _pipeline): + return _func_optionally_disable_offloading(_pipeline=_pipeline) + + def save_attn_procs( + self, + save_directory: str | os.PathLike, + is_main_process: bool = True, + weight_name: str = None, + save_function: Callable = None, + safe_serialization: bool = True, + **kwargs, + ): + r""" + Save attention processor layers to a directory so that it can be reloaded with the + [`~loaders.UNet2DConditionLoadersMixin.load_attn_procs`] method. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to save an attention processor to (will be created if it doesn't exist). + is_main_process (`bool`, *optional*, defaults to `True`): + Whether the process calling this is the main process or not. Useful during distributed training and you + need to call this function on all processes. In this case, set `is_main_process=True` only on the main + process to avoid race conditions. + save_function (`Callable`): + The function to use to save the state dictionary. Useful during distributed training when you need to + replace `torch.save` with another method. Can be configured with the environment variable + `DIFFUSERS_SAVE_MODE`. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or with `pickle`. + + Example: + + ```py + import torch + from diffusers import DiffusionPipeline + + pipeline = DiffusionPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + torch_dtype=torch.float16, + ).to("cuda") + pipeline.unet.load_attn_procs("path-to-save-model", weight_name="pytorch_custom_diffusion_weights.bin") + pipeline.unet.save_attn_procs("path-to-save-model", weight_name="pytorch_custom_diffusion_weights.bin") + ``` + """ + from ..models.attention_processor import ( + CustomDiffusionAttnProcessor, + CustomDiffusionAttnProcessor2_0, + CustomDiffusionXFormersAttnProcessor, + ) + + if os.path.isfile(save_directory): + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") + return + + is_custom_diffusion = any( + isinstance( + x, + (CustomDiffusionAttnProcessor, CustomDiffusionAttnProcessor2_0, CustomDiffusionXFormersAttnProcessor), + ) + for (_, x) in self.attn_processors.items() + ) + if is_custom_diffusion: + state_dict = self._get_custom_diffusion_state_dict() + if save_function is None and safe_serialization: + # safetensors does not support saving dicts with non-tensor values + empty_state_dict = {k: v for k, v in state_dict.items() if not isinstance(v, torch.Tensor)} + if len(empty_state_dict) > 0: + logger.warning( + f"Safetensors does not support saving dicts with non-tensor values. " + f"The following keys will be ignored: {empty_state_dict.keys()}" + ) + state_dict = {k: v for k, v in state_dict.items() if isinstance(v, torch.Tensor)} + else: + deprecation_message = "Using the `save_attn_procs()` method has been deprecated and will be removed in a future version. Please use `save_lora_adapter()`." + deprecate("save_attn_procs", "0.40.0", deprecation_message) + + if not USE_PEFT_BACKEND: + raise ValueError("PEFT backend is required for saving LoRAs using the `save_attn_procs()` method.") + + from peft.utils import get_peft_model_state_dict + + state_dict = get_peft_model_state_dict(self) + + if save_function is None: + if safe_serialization: + + def save_function(weights, filename): + return safetensors.torch.save_file(weights, filename, metadata={"format": "pt"}) + + else: + save_function = torch.save + + os.makedirs(save_directory, exist_ok=True) + + if weight_name is None: + if safe_serialization: + weight_name = CUSTOM_DIFFUSION_WEIGHT_NAME_SAFE if is_custom_diffusion else LORA_WEIGHT_NAME_SAFE + else: + weight_name = CUSTOM_DIFFUSION_WEIGHT_NAME if is_custom_diffusion else LORA_WEIGHT_NAME + + # Save the model + save_path = Path(save_directory, weight_name).as_posix() + save_function(state_dict, save_path) + logger.info(f"Model weights saved in {save_path}") + + def _get_custom_diffusion_state_dict(self): + from ..models.attention_processor import ( + CustomDiffusionAttnProcessor, + CustomDiffusionAttnProcessor2_0, + CustomDiffusionXFormersAttnProcessor, + ) + + model_to_save = AttnProcsLayers( + { + y: x + for (y, x) in self.attn_processors.items() + if isinstance( + x, + ( + CustomDiffusionAttnProcessor, + CustomDiffusionAttnProcessor2_0, + CustomDiffusionXFormersAttnProcessor, + ), + ) + } + ) + state_dict = model_to_save.state_dict() + for name, attn in self.attn_processors.items(): + if len(attn.state_dict()) == 0: + state_dict[name] = {} + + return state_dict + + def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT): + if low_cpu_mem_usage: + if is_accelerate_available(): + from accelerate import init_empty_weights + + else: + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + updated_state_dict = {} + image_projection = None + init_context = init_empty_weights if low_cpu_mem_usage else nullcontext + + if "proj.weight" in state_dict: + # IP-Adapter + num_image_text_embeds = 4 + clip_embeddings_dim = state_dict["proj.weight"].shape[-1] + cross_attention_dim = state_dict["proj.weight"].shape[0] // 4 + + with init_context(): + image_projection = ImageProjection( + cross_attention_dim=cross_attention_dim, + image_embed_dim=clip_embeddings_dim, + num_image_text_embeds=num_image_text_embeds, + ) + + for key, value in state_dict.items(): + diffusers_name = key.replace("proj", "image_embeds") + updated_state_dict[diffusers_name] = value + + elif "proj.3.weight" in state_dict: + # IP-Adapter Full + clip_embeddings_dim = state_dict["proj.0.weight"].shape[0] + cross_attention_dim = state_dict["proj.3.weight"].shape[0] + + with init_context(): + image_projection = IPAdapterFullImageProjection( + cross_attention_dim=cross_attention_dim, image_embed_dim=clip_embeddings_dim + ) + + for key, value in state_dict.items(): + diffusers_name = key.replace("proj.0", "ff.net.0.proj") + diffusers_name = diffusers_name.replace("proj.2", "ff.net.2") + diffusers_name = diffusers_name.replace("proj.3", "norm") + updated_state_dict[diffusers_name] = value + + elif "perceiver_resampler.proj_in.weight" in state_dict: + # IP-Adapter Face ID Plus + id_embeddings_dim = state_dict["proj.0.weight"].shape[1] + embed_dims = state_dict["perceiver_resampler.proj_in.weight"].shape[0] + hidden_dims = state_dict["perceiver_resampler.proj_in.weight"].shape[1] + output_dims = state_dict["perceiver_resampler.proj_out.weight"].shape[0] + heads = state_dict["perceiver_resampler.layers.0.0.to_q.weight"].shape[0] // 64 + + with init_context(): + image_projection = IPAdapterFaceIDPlusImageProjection( + embed_dims=embed_dims, + output_dims=output_dims, + hidden_dims=hidden_dims, + heads=heads, + id_embeddings_dim=id_embeddings_dim, + ) + + for key, value in state_dict.items(): + diffusers_name = key.replace("perceiver_resampler.", "") + diffusers_name = diffusers_name.replace("0.to", "attn.to") + diffusers_name = diffusers_name.replace("0.1.0.", "0.ff.0.") + diffusers_name = diffusers_name.replace("0.1.1.weight", "0.ff.1.net.0.proj.weight") + diffusers_name = diffusers_name.replace("0.1.3.weight", "0.ff.1.net.2.weight") + diffusers_name = diffusers_name.replace("1.1.0.", "1.ff.0.") + diffusers_name = diffusers_name.replace("1.1.1.weight", "1.ff.1.net.0.proj.weight") + diffusers_name = diffusers_name.replace("1.1.3.weight", "1.ff.1.net.2.weight") + diffusers_name = diffusers_name.replace("2.1.0.", "2.ff.0.") + diffusers_name = diffusers_name.replace("2.1.1.weight", "2.ff.1.net.0.proj.weight") + diffusers_name = diffusers_name.replace("2.1.3.weight", "2.ff.1.net.2.weight") + diffusers_name = diffusers_name.replace("3.1.0.", "3.ff.0.") + diffusers_name = diffusers_name.replace("3.1.1.weight", "3.ff.1.net.0.proj.weight") + diffusers_name = diffusers_name.replace("3.1.3.weight", "3.ff.1.net.2.weight") + diffusers_name = diffusers_name.replace("layers.0.0", "layers.0.ln0") + diffusers_name = diffusers_name.replace("layers.0.1", "layers.0.ln1") + diffusers_name = diffusers_name.replace("layers.1.0", "layers.1.ln0") + diffusers_name = diffusers_name.replace("layers.1.1", "layers.1.ln1") + diffusers_name = diffusers_name.replace("layers.2.0", "layers.2.ln0") + diffusers_name = diffusers_name.replace("layers.2.1", "layers.2.ln1") + diffusers_name = diffusers_name.replace("layers.3.0", "layers.3.ln0") + diffusers_name = diffusers_name.replace("layers.3.1", "layers.3.ln1") + + if "norm1" in diffusers_name: + updated_state_dict[diffusers_name.replace("0.norm1", "0")] = value + elif "norm2" in diffusers_name: + updated_state_dict[diffusers_name.replace("0.norm2", "1")] = value + elif "to_kv" in diffusers_name: + v_chunk = value.chunk(2, dim=0) + updated_state_dict[diffusers_name.replace("to_kv", "to_k")] = v_chunk[0] + updated_state_dict[diffusers_name.replace("to_kv", "to_v")] = v_chunk[1] + elif "to_out" in diffusers_name: + updated_state_dict[diffusers_name.replace("to_out", "to_out.0")] = value + elif "proj.0.weight" == diffusers_name: + updated_state_dict["proj.net.0.proj.weight"] = value + elif "proj.0.bias" == diffusers_name: + updated_state_dict["proj.net.0.proj.bias"] = value + elif "proj.2.weight" == diffusers_name: + updated_state_dict["proj.net.2.weight"] = value + elif "proj.2.bias" == diffusers_name: + updated_state_dict["proj.net.2.bias"] = value + else: + updated_state_dict[diffusers_name] = value + + elif "norm.weight" in state_dict: + # IP-Adapter Face ID + id_embeddings_dim_in = state_dict["proj.0.weight"].shape[1] + id_embeddings_dim_out = state_dict["proj.0.weight"].shape[0] + multiplier = id_embeddings_dim_out // id_embeddings_dim_in + norm_layer = "norm.weight" + cross_attention_dim = state_dict[norm_layer].shape[0] + num_tokens = state_dict["proj.2.weight"].shape[0] // cross_attention_dim + + with init_context(): + image_projection = IPAdapterFaceIDImageProjection( + cross_attention_dim=cross_attention_dim, + image_embed_dim=id_embeddings_dim_in, + mult=multiplier, + num_tokens=num_tokens, + ) + + for key, value in state_dict.items(): + diffusers_name = key.replace("proj.0", "ff.net.0.proj") + diffusers_name = diffusers_name.replace("proj.2", "ff.net.2") + updated_state_dict[diffusers_name] = value + + else: + # IP-Adapter Plus + num_image_text_embeds = state_dict["latents"].shape[1] + embed_dims = state_dict["proj_in.weight"].shape[1] + output_dims = state_dict["proj_out.weight"].shape[0] + hidden_dims = state_dict["latents"].shape[2] + attn_key_present = any("attn" in k for k in state_dict) + heads = ( + state_dict["layers.0.attn.to_q.weight"].shape[0] // 64 + if attn_key_present + else state_dict["layers.0.0.to_q.weight"].shape[0] // 64 + ) + + with init_context(): + image_projection = IPAdapterPlusImageProjection( + embed_dims=embed_dims, + output_dims=output_dims, + hidden_dims=hidden_dims, + heads=heads, + num_queries=num_image_text_embeds, + ) + + for key, value in state_dict.items(): + diffusers_name = key.replace("0.to", "2.to") + + diffusers_name = diffusers_name.replace("0.0.norm1", "0.ln0") + diffusers_name = diffusers_name.replace("0.0.norm2", "0.ln1") + diffusers_name = diffusers_name.replace("1.0.norm1", "1.ln0") + diffusers_name = diffusers_name.replace("1.0.norm2", "1.ln1") + diffusers_name = diffusers_name.replace("2.0.norm1", "2.ln0") + diffusers_name = diffusers_name.replace("2.0.norm2", "2.ln1") + diffusers_name = diffusers_name.replace("3.0.norm1", "3.ln0") + diffusers_name = diffusers_name.replace("3.0.norm2", "3.ln1") + + if "to_kv" in diffusers_name: + parts = diffusers_name.split(".") + parts[2] = "attn" + diffusers_name = ".".join(parts) + v_chunk = value.chunk(2, dim=0) + updated_state_dict[diffusers_name.replace("to_kv", "to_k")] = v_chunk[0] + updated_state_dict[diffusers_name.replace("to_kv", "to_v")] = v_chunk[1] + elif "to_q" in diffusers_name: + parts = diffusers_name.split(".") + parts[2] = "attn" + diffusers_name = ".".join(parts) + updated_state_dict[diffusers_name] = value + elif "to_out" in diffusers_name: + parts = diffusers_name.split(".") + parts[2] = "attn" + diffusers_name = ".".join(parts) + updated_state_dict[diffusers_name.replace("to_out", "to_out.0")] = value + else: + diffusers_name = diffusers_name.replace("0.1.0", "0.ff.0") + diffusers_name = diffusers_name.replace("0.1.1", "0.ff.1.net.0.proj") + diffusers_name = diffusers_name.replace("0.1.3", "0.ff.1.net.2") + + diffusers_name = diffusers_name.replace("1.1.0", "1.ff.0") + diffusers_name = diffusers_name.replace("1.1.1", "1.ff.1.net.0.proj") + diffusers_name = diffusers_name.replace("1.1.3", "1.ff.1.net.2") + + diffusers_name = diffusers_name.replace("2.1.0", "2.ff.0") + diffusers_name = diffusers_name.replace("2.1.1", "2.ff.1.net.0.proj") + diffusers_name = diffusers_name.replace("2.1.3", "2.ff.1.net.2") + + diffusers_name = diffusers_name.replace("3.1.0", "3.ff.0") + diffusers_name = diffusers_name.replace("3.1.1", "3.ff.1.net.0.proj") + diffusers_name = diffusers_name.replace("3.1.3", "3.ff.1.net.2") + updated_state_dict[diffusers_name] = value + + if not low_cpu_mem_usage: + image_projection.load_state_dict(updated_state_dict, strict=True) + else: + device_map = {"": self.device} + load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype) + empty_device_cache() + + return image_projection + + def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT): + from ..models.attention_processor import ( + IPAdapterAttnProcessor, + IPAdapterAttnProcessor2_0, + IPAdapterXFormersAttnProcessor, + ) + + if low_cpu_mem_usage: + if is_accelerate_available(): + from accelerate import init_empty_weights + + else: + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + # set ip-adapter cross-attention processors & load state_dict + attn_procs = {} + key_id = 1 + init_context = init_empty_weights if low_cpu_mem_usage else nullcontext + for name in self.attn_processors.keys(): + cross_attention_dim = None if name.endswith("attn1.processor") else self.config.cross_attention_dim + if name.startswith("mid_block"): + hidden_size = self.config.block_out_channels[-1] + elif name.startswith("up_blocks"): + block_id = int(name[len("up_blocks.")]) + hidden_size = list(reversed(self.config.block_out_channels))[block_id] + elif name.startswith("down_blocks"): + block_id = int(name[len("down_blocks.")]) + hidden_size = self.config.block_out_channels[block_id] + + if cross_attention_dim is None or "motion_modules" in name: + attn_processor_class = self.attn_processors[name].__class__ + attn_procs[name] = attn_processor_class() + else: + if "XFormers" in str(self.attn_processors[name].__class__): + attn_processor_class = IPAdapterXFormersAttnProcessor + else: + attn_processor_class = ( + IPAdapterAttnProcessor2_0 + if hasattr(F, "scaled_dot_product_attention") + else IPAdapterAttnProcessor + ) + num_image_text_embeds = [] + for state_dict in state_dicts: + if "proj.weight" in state_dict["image_proj"]: + # IP-Adapter + num_image_text_embeds += [4] + elif "proj.3.weight" in state_dict["image_proj"]: + # IP-Adapter Full Face + num_image_text_embeds += [257] # 256 CLIP tokens + 1 CLS token + elif "perceiver_resampler.proj_in.weight" in state_dict["image_proj"]: + # IP-Adapter Face ID Plus + num_image_text_embeds += [4] + elif "norm.weight" in state_dict["image_proj"]: + # IP-Adapter Face ID + num_image_text_embeds += [4] + else: + # IP-Adapter Plus + num_image_text_embeds += [state_dict["image_proj"]["latents"].shape[1]] + + with init_context(): + attn_procs[name] = attn_processor_class( + hidden_size=hidden_size, + cross_attention_dim=cross_attention_dim, + scale=1.0, + num_tokens=num_image_text_embeds, + ) + + value_dict = {} + for i, state_dict in enumerate(state_dicts): + value_dict.update({f"to_k_ip.{i}.weight": state_dict["ip_adapter"][f"{key_id}.to_k_ip.weight"]}) + value_dict.update({f"to_v_ip.{i}.weight": state_dict["ip_adapter"][f"{key_id}.to_v_ip.weight"]}) + + if not low_cpu_mem_usage: + attn_procs[name].load_state_dict(value_dict) + else: + device = next(iter(value_dict.values())).device + dtype = next(iter(value_dict.values())).dtype + device_map = {"": device} + load_model_dict_into_meta(attn_procs[name], value_dict, device_map=device_map, dtype=dtype) + + key_id += 2 + + empty_device_cache() + + return attn_procs + + def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT): + if not isinstance(state_dicts, list): + state_dicts = [state_dicts] + + # Kolors Unet already has a `encoder_hid_proj` + if ( + self.encoder_hid_proj is not None + and self.config.encoder_hid_dim_type == "text_proj" + and not hasattr(self, "text_encoder_hid_proj") + ): + self.text_encoder_hid_proj = self.encoder_hid_proj + + # Set encoder_hid_proj after loading ip_adapter weights, + # because `IPAdapterPlusImageProjection` also has `attn_processors`. + self.encoder_hid_proj = None + + attn_procs = self._convert_ip_adapter_attn_to_diffusers(state_dicts, low_cpu_mem_usage=low_cpu_mem_usage) + self.set_attn_processor(attn_procs) + + # convert IP-Adapter Image Projection layers to diffusers + image_projection_layers = [] + for state_dict in state_dicts: + image_projection_layer = self._convert_ip_adapter_image_proj_to_diffusers( + state_dict["image_proj"], low_cpu_mem_usage=low_cpu_mem_usage + ) + image_projection_layers.append(image_projection_layer) + + self.encoder_hid_proj = MultiIPAdapterImageProjection(image_projection_layers) + self.config.encoder_hid_dim_type = "ip_image_proj" + + self.to(dtype=self.dtype, device=self.device) + + def _load_ip_adapter_loras(self, state_dicts): + lora_dicts = {} + for key_id, name in enumerate(self.attn_processors.keys()): + for i, state_dict in enumerate(state_dicts): + if f"{key_id}.to_k_lora.down.weight" in state_dict["ip_adapter"]: + if i not in lora_dicts: + lora_dicts[i] = {} + lora_dicts[i].update( + { + f"unet.{name}.to_k_lora.down.weight": state_dict["ip_adapter"][ + f"{key_id}.to_k_lora.down.weight" + ] + } + ) + lora_dicts[i].update( + { + f"unet.{name}.to_q_lora.down.weight": state_dict["ip_adapter"][ + f"{key_id}.to_q_lora.down.weight" + ] + } + ) + lora_dicts[i].update( + { + f"unet.{name}.to_v_lora.down.weight": state_dict["ip_adapter"][ + f"{key_id}.to_v_lora.down.weight" + ] + } + ) + lora_dicts[i].update( + { + f"unet.{name}.to_out_lora.down.weight": state_dict["ip_adapter"][ + f"{key_id}.to_out_lora.down.weight" + ] + } + ) + lora_dicts[i].update( + {f"unet.{name}.to_k_lora.up.weight": state_dict["ip_adapter"][f"{key_id}.to_k_lora.up.weight"]} + ) + lora_dicts[i].update( + {f"unet.{name}.to_q_lora.up.weight": state_dict["ip_adapter"][f"{key_id}.to_q_lora.up.weight"]} + ) + lora_dicts[i].update( + {f"unet.{name}.to_v_lora.up.weight": state_dict["ip_adapter"][f"{key_id}.to_v_lora.up.weight"]} + ) + lora_dicts[i].update( + { + f"unet.{name}.to_out_lora.up.weight": state_dict["ip_adapter"][ + f"{key_id}.to_out_lora.up.weight" + ] + } + ) + return lora_dicts diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/unet_loader_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/unet_loader_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d6aff55d6d229ed6bf37e153ec83b3b6ab3cba59 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/unet_loader_utils.py @@ -0,0 +1,164 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import copy +from typing import TYPE_CHECKING + +from torch import nn + +from ..utils import logging + + +if TYPE_CHECKING: + # import here to avoid circular imports + from ..models import UNet2DConditionModel + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _translate_into_actual_layer_name(name): + """Translate user-friendly name (e.g. 'mid') into actual layer name (e.g. 'mid_block.attentions.0')""" + if name == "mid": + return "mid_block.attentions.0" + + updown, block, attn = name.split(".") + + updown = updown.replace("down", "down_blocks").replace("up", "up_blocks") + block = block.replace("block_", "") + attn = "attentions." + attn + + return ".".join((updown, block, attn)) + + +def _maybe_expand_lora_scales(unet: "UNet2DConditionModel", weight_scales: list[float | dict], default_scale=1.0): + blocks_with_transformer = { + "down": [i for i, block in enumerate(unet.down_blocks) if hasattr(block, "attentions")], + "up": [i for i, block in enumerate(unet.up_blocks) if hasattr(block, "attentions")], + } + transformer_per_block = {"down": unet.config.layers_per_block, "up": unet.config.layers_per_block + 1} + + expanded_weight_scales = [ + _maybe_expand_lora_scales_for_one_adapter( + weight_for_adapter, + blocks_with_transformer, + transformer_per_block, + model=unet, + default_scale=default_scale, + ) + for weight_for_adapter in weight_scales + ] + + return expanded_weight_scales + + +def _maybe_expand_lora_scales_for_one_adapter( + scales: float | dict, + blocks_with_transformer: dict[str, int], + transformer_per_block: dict[str, int], + model: nn.Module, + default_scale: float = 1.0, +): + """ + Expands the inputs into a more granular dictionary. See the example below for more details. + + Parameters: + scales (`float | Dict`): + Scales dict to expand. + blocks_with_transformer (`dict[str, int]`): + Dict with keys 'up' and 'down', showing which blocks have transformer layers + transformer_per_block (`dict[str, int]`): + Dict with keys 'up' and 'down', showing how many transformer layers each block has + + E.g. turns + ```python + scales = {"down": 2, "mid": 3, "up": {"block_0": 4, "block_1": [5, 6, 7]}} + blocks_with_transformer = {"down": [1, 2], "up": [0, 1]} + transformer_per_block = {"down": 2, "up": 3} + ``` + into + ```python + { + "down.block_1.0": 2, + "down.block_1.1": 2, + "down.block_2.0": 2, + "down.block_2.1": 2, + "mid": 3, + "up.block_0.0": 4, + "up.block_0.1": 4, + "up.block_0.2": 4, + "up.block_1.0": 5, + "up.block_1.1": 6, + "up.block_1.2": 7, + } + ``` + """ + if sorted(blocks_with_transformer.keys()) != ["down", "up"]: + raise ValueError("blocks_with_transformer needs to be a dict with keys `'down' and `'up'`") + + if sorted(transformer_per_block.keys()) != ["down", "up"]: + raise ValueError("transformer_per_block needs to be a dict with keys `'down' and `'up'`") + + if not isinstance(scales, dict): + # don't expand if scales is a single number + return scales + + scales = copy.deepcopy(scales) + + if "mid" not in scales: + scales["mid"] = default_scale + elif isinstance(scales["mid"], list): + if len(scales["mid"]) == 1: + scales["mid"] = scales["mid"][0] + else: + raise ValueError(f"Expected 1 scales for mid, got {len(scales['mid'])}.") + + for updown in ["up", "down"]: + if updown not in scales: + scales[updown] = default_scale + + # eg {"down": 1} to {"down": {"block_1": 1, "block_2": 1}}} + if not isinstance(scales[updown], dict): + scales[updown] = {f"block_{i}": copy.deepcopy(scales[updown]) for i in blocks_with_transformer[updown]} + + # eg {"down": {"block_1": 1}} to {"down": {"block_1": [1, 1]}} + for i in blocks_with_transformer[updown]: + block = f"block_{i}" + # set not assigned blocks to default scale + if block not in scales[updown]: + scales[updown][block] = default_scale + if not isinstance(scales[updown][block], list): + scales[updown][block] = [scales[updown][block] for _ in range(transformer_per_block[updown])] + elif len(scales[updown][block]) == 1: + # a list specifying scale to each masked IP input + scales[updown][block] = scales[updown][block] * transformer_per_block[updown] + elif len(scales[updown][block]) != transformer_per_block[updown]: + raise ValueError( + f"Expected {transformer_per_block[updown]} scales for {updown}.{block}, got {len(scales[updown][block])}." + ) + + # eg {"down": "block_1": [1, 1]}} to {"down.block_1.0": 1, "down.block_1.1": 1} + for i in blocks_with_transformer[updown]: + block = f"block_{i}" + for tf_idx, value in enumerate(scales[updown][block]): + scales[f"{updown}.{block}.{tf_idx}"] = value + + del scales[updown] + + state_dict = model.state_dict() + for layer in scales.keys(): + if not any(_translate_into_actual_layer_name(layer) in module for module in state_dict.keys()): + raise ValueError( + f"Can't set lora scale for layer {layer}. It either doesn't exist in this unet or it has no attentions." + ) + + return {_translate_into_actual_layer_name(name): weight for name, weight in scales.items()} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..72d6453a8bbf47e06a760fbc3ca83f6cbf848fea --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/loaders/utils.py @@ -0,0 +1,58 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch + + +class AttnProcsLayers(torch.nn.Module): + def __init__(self, state_dict: dict[str, torch.Tensor]): + super().__init__() + self.layers = torch.nn.ModuleList(state_dict.values()) + self.mapping = dict(enumerate(state_dict.keys())) + self.rev_mapping = {v: k for k, v in enumerate(state_dict.keys())} + + # .processor for unet, .self_attn for text encoder + self.split_keys = [".processor", ".self_attn"] + + # we add a hook to state_dict() and load_state_dict() so that the + # naming fits with `unet.attn_processors` + def map_to(module, state_dict, *args, **kwargs): + new_state_dict = {} + for key, value in state_dict.items(): + num = int(key.split(".")[1]) # 0 is always "layers" + new_key = key.replace(f"layers.{num}", module.mapping[num]) + new_state_dict[new_key] = value + + return new_state_dict + + def remap_key(key, state_dict): + for k in self.split_keys: + if k in key: + return key.split(k)[0] + k + + raise ValueError( + f"There seems to be a problem with the state_dict: {set(state_dict.keys())}. {key} has to have one of {self.split_keys}." + ) + + def map_from(module, state_dict, *args, **kwargs): + all_keys = list(state_dict.keys()) + for key in all_keys: + replace_key = remap_key(key, state_dict) + new_key = key.replace(replace_key, f"layers.{module.rev_mapping[replace_key]}") + state_dict[new_key] = state_dict[key] + del state_dict[key] + + self._register_state_dict_hook(map_to) + self._register_load_state_dict_pre_hook(map_from, with_module=True) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b5b9805d4c968c9d58eb33ac7f531902a86a5e25 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__init__.py @@ -0,0 +1,274 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING + +from ..utils import ( + DIFFUSERS_SLOW_IMPORT, + _LazyModule, + is_flax_available, + is_torch_available, +) + + +_import_structure = {} + +if is_torch_available(): + _import_structure["_modeling_parallel"] = ["ContextParallelConfig", "ParallelConfig"] + _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"] + _import_structure["attention_dispatch"] = ["AttentionBackendName", "attention_backend"] + _import_structure["auto_model"] = ["AutoModel"] + _import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"] + _import_structure["autoencoders.autoencoder_dc"] = ["AutoencoderDC"] + _import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"] + _import_structure["autoencoders.autoencoder_kl_allegro"] = ["AutoencoderKLAllegro"] + _import_structure["autoencoders.autoencoder_kl_cogvideox"] = ["AutoencoderKLCogVideoX"] + _import_structure["autoencoders.autoencoder_kl_cosmos"] = ["AutoencoderKLCosmos"] + _import_structure["autoencoders.autoencoder_kl_flux2"] = ["AutoencoderKLFlux2"] + _import_structure["autoencoders.autoencoder_kl_hunyuan_video"] = ["AutoencoderKLHunyuanVideo"] + _import_structure["autoencoders.autoencoder_kl_hunyuanimage"] = ["AutoencoderKLHunyuanImage"] + _import_structure["autoencoders.autoencoder_kl_hunyuanimage_refiner"] = ["AutoencoderKLHunyuanImageRefiner"] + _import_structure["autoencoders.autoencoder_kl_hunyuanvideo15"] = ["AutoencoderKLHunyuanVideo15"] + _import_structure["autoencoders.autoencoder_kl_ltx"] = ["AutoencoderKLLTXVideo"] + _import_structure["autoencoders.autoencoder_kl_ltx2"] = ["AutoencoderKLLTX2Video"] + _import_structure["autoencoders.autoencoder_kl_ltx2_audio"] = ["AutoencoderKLLTX2Audio"] + _import_structure["autoencoders.autoencoder_kl_magvit"] = ["AutoencoderKLMagvit"] + _import_structure["autoencoders.autoencoder_kl_mochi"] = ["AutoencoderKLMochi"] + _import_structure["autoencoders.autoencoder_kl_qwenimage"] = ["AutoencoderKLQwenImage"] + _import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"] + _import_structure["autoencoders.autoencoder_kl_wan"] = ["AutoencoderKLWan"] + _import_structure["autoencoders.autoencoder_oobleck"] = ["AutoencoderOobleck"] + _import_structure["autoencoders.autoencoder_rae"] = ["AutoencoderRAE"] + _import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"] + _import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"] + _import_structure["autoencoders.vq_model"] = ["VQModel"] + _import_structure["cache_utils"] = ["CacheMixin"] + _import_structure["controlnets.controlnet"] = ["ControlNetModel"] + _import_structure["controlnets.controlnet_cosmos"] = ["CosmosControlNetModel"] + _import_structure["controlnets.controlnet_flux"] = ["FluxControlNetModel", "FluxMultiControlNetModel"] + _import_structure["controlnets.controlnet_hunyuan"] = [ + "HunyuanDiT2DControlNetModel", + "HunyuanDiT2DMultiControlNetModel", + ] + _import_structure["controlnets.controlnet_qwenimage"] = [ + "QwenImageControlNetModel", + "QwenImageMultiControlNetModel", + ] + _import_structure["controlnets.controlnet_sana"] = ["SanaControlNetModel"] + _import_structure["controlnets.controlnet_sd3"] = ["SD3ControlNetModel", "SD3MultiControlNetModel"] + _import_structure["controlnets.controlnet_sparsectrl"] = ["SparseControlNetModel"] + _import_structure["controlnets.controlnet_union"] = ["ControlNetUnionModel"] + _import_structure["controlnets.controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"] + _import_structure["controlnets.controlnet_z_image"] = ["ZImageControlNetModel"] + _import_structure["controlnets.multicontrolnet"] = ["MultiControlNetModel"] + _import_structure["controlnets.multicontrolnet_union"] = ["MultiControlNetUnionModel"] + _import_structure["embeddings"] = ["ImageProjection"] + _import_structure["modeling_utils"] = ["ModelMixin"] + _import_structure["transformers.auraflow_transformer_2d"] = ["AuraFlowTransformer2DModel"] + _import_structure["transformers.cogvideox_transformer_3d"] = ["CogVideoXTransformer3DModel"] + _import_structure["transformers.consisid_transformer_3d"] = ["ConsisIDTransformer3DModel"] + _import_structure["transformers.dit_transformer_2d"] = ["DiTTransformer2DModel"] + _import_structure["transformers.dual_transformer_2d"] = ["DualTransformer2DModel"] + _import_structure["transformers.hunyuan_transformer_2d"] = ["HunyuanDiT2DModel"] + _import_structure["transformers.latte_transformer_3d"] = ["LatteTransformer3DModel"] + _import_structure["transformers.lumina_nextdit2d"] = ["LuminaNextDiT2DModel"] + _import_structure["transformers.pixart_transformer_2d"] = ["PixArtTransformer2DModel"] + _import_structure["transformers.prior_transformer"] = ["PriorTransformer"] + _import_structure["transformers.sana_transformer"] = ["SanaTransformer2DModel"] + _import_structure["transformers.stable_audio_transformer"] = ["StableAudioDiTModel"] + _import_structure["transformers.t5_film_transformer"] = ["T5FilmDecoder"] + _import_structure["transformers.transformer_2d"] = ["Transformer2DModel"] + _import_structure["transformers.transformer_allegro"] = ["AllegroTransformer3DModel"] + _import_structure["transformers.transformer_bria"] = ["BriaTransformer2DModel"] + _import_structure["transformers.transformer_bria_fibo"] = ["BriaFiboTransformer2DModel"] + _import_structure["transformers.transformer_chroma"] = ["ChromaTransformer2DModel"] + _import_structure["transformers.transformer_chronoedit"] = ["ChronoEditTransformer3DModel"] + _import_structure["transformers.transformer_cogview3plus"] = ["CogView3PlusTransformer2DModel"] + _import_structure["transformers.transformer_cogview4"] = ["CogView4Transformer2DModel"] + _import_structure["transformers.transformer_cosmos"] = ["CosmosTransformer3DModel"] + _import_structure["transformers.transformer_easyanimate"] = ["EasyAnimateTransformer3DModel"] + _import_structure["transformers.transformer_flux"] = ["FluxTransformer2DModel"] + _import_structure["transformers.transformer_flux2"] = ["Flux2Transformer2DModel"] + _import_structure["transformers.transformer_glm_image"] = ["GlmImageTransformer2DModel"] + _import_structure["transformers.transformer_helios"] = ["HeliosTransformer3DModel"] + _import_structure["transformers.transformer_hidream_image"] = ["HiDreamImageTransformer2DModel"] + _import_structure["transformers.transformer_hunyuan_video"] = ["HunyuanVideoTransformer3DModel"] + _import_structure["transformers.transformer_hunyuan_video15"] = ["HunyuanVideo15Transformer3DModel"] + _import_structure["transformers.transformer_hunyuan_video_framepack"] = ["HunyuanVideoFramepackTransformer3DModel"] + _import_structure["transformers.transformer_hunyuanimage"] = ["HunyuanImageTransformer2DModel"] + _import_structure["transformers.transformer_kandinsky"] = ["Kandinsky5Transformer3DModel"] + _import_structure["transformers.transformer_longcat_image"] = ["LongCatImageTransformer2DModel"] + _import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"] + _import_structure["transformers.transformer_ltx2"] = ["LTX2VideoTransformer3DModel"] + _import_structure["transformers.transformer_lumina2"] = ["Lumina2Transformer2DModel"] + _import_structure["transformers.transformer_mochi"] = ["MochiTransformer3DModel"] + _import_structure["transformers.transformer_omnigen"] = ["OmniGenTransformer2DModel"] + _import_structure["transformers.transformer_ovis_image"] = ["OvisImageTransformer2DModel"] + _import_structure["transformers.transformer_prx"] = ["PRXTransformer2DModel"] + _import_structure["transformers.transformer_qwenimage"] = ["QwenImageTransformer2DModel"] + _import_structure["transformers.transformer_sana_video"] = ["SanaVideoTransformer3DModel"] + _import_structure["transformers.transformer_sd3"] = ["SD3Transformer2DModel"] + _import_structure["transformers.transformer_skyreels_v2"] = ["SkyReelsV2Transformer3DModel"] + _import_structure["transformers.transformer_temporal"] = ["TransformerTemporalModel"] + _import_structure["transformers.transformer_wan"] = ["WanTransformer3DModel"] + _import_structure["transformers.transformer_wan_animate"] = ["WanAnimateTransformer3DModel"] + _import_structure["transformers.transformer_wan_vace"] = ["WanVACETransformer3DModel"] + _import_structure["transformers.transformer_z_image"] = ["ZImageTransformer2DModel"] + _import_structure["unets.unet_1d"] = ["UNet1DModel"] + _import_structure["unets.unet_2d"] = ["UNet2DModel"] + _import_structure["unets.unet_2d_condition"] = ["UNet2DConditionModel"] + _import_structure["unets.unet_3d_condition"] = ["UNet3DConditionModel"] + _import_structure["unets.unet_i2vgen_xl"] = ["I2VGenXLUNet"] + _import_structure["unets.unet_kandinsky3"] = ["Kandinsky3UNet"] + _import_structure["unets.unet_motion_model"] = ["MotionAdapter", "UNetMotionModel"] + _import_structure["unets.unet_spatio_temporal_condition"] = ["UNetSpatioTemporalConditionModel"] + _import_structure["unets.unet_stable_cascade"] = ["StableCascadeUNet"] + _import_structure["unets.uvit_2d"] = ["UVit2DModel"] + +if is_flax_available(): + _import_structure["controlnets.controlnet_flax"] = ["FlaxControlNetModel"] + _import_structure["unets.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"] + _import_structure["vae_flax"] = ["FlaxAutoencoderKL"] + + +if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: + if is_torch_available(): + from ._modeling_parallel import ContextParallelConfig, ParallelConfig + from .adapter import MultiAdapter, T2IAdapter + from .attention_dispatch import AttentionBackendName, attention_backend + from .auto_model import AutoModel + from .autoencoders import ( + AsymmetricAutoencoderKL, + AutoencoderDC, + AutoencoderKL, + AutoencoderKLAllegro, + AutoencoderKLCogVideoX, + AutoencoderKLCosmos, + AutoencoderKLFlux2, + AutoencoderKLHunyuanImage, + AutoencoderKLHunyuanImageRefiner, + AutoencoderKLHunyuanVideo, + AutoencoderKLHunyuanVideo15, + AutoencoderKLLTX2Audio, + AutoencoderKLLTX2Video, + AutoencoderKLLTXVideo, + AutoencoderKLMagvit, + AutoencoderKLMochi, + AutoencoderKLQwenImage, + AutoencoderKLTemporalDecoder, + AutoencoderKLWan, + AutoencoderOobleck, + AutoencoderRAE, + AutoencoderTiny, + ConsistencyDecoderVAE, + VQModel, + ) + from .cache_utils import CacheMixin + from .controlnets import ( + ControlNetModel, + ControlNetUnionModel, + ControlNetXSAdapter, + CosmosControlNetModel, + FluxControlNetModel, + FluxMultiControlNetModel, + HunyuanDiT2DControlNetModel, + HunyuanDiT2DMultiControlNetModel, + MultiControlNetModel, + MultiControlNetUnionModel, + QwenImageControlNetModel, + QwenImageMultiControlNetModel, + SanaControlNetModel, + SD3ControlNetModel, + SD3MultiControlNetModel, + SparseControlNetModel, + UNetControlNetXSModel, + ZImageControlNetModel, + ) + from .embeddings import ImageProjection + from .modeling_utils import ModelMixin + from .transformers import ( + AllegroTransformer3DModel, + AuraFlowTransformer2DModel, + BriaFiboTransformer2DModel, + BriaTransformer2DModel, + ChromaTransformer2DModel, + ChronoEditTransformer3DModel, + CogVideoXTransformer3DModel, + CogView3PlusTransformer2DModel, + CogView4Transformer2DModel, + ConsisIDTransformer3DModel, + CosmosTransformer3DModel, + DiTTransformer2DModel, + DualTransformer2DModel, + EasyAnimateTransformer3DModel, + Flux2Transformer2DModel, + FluxTransformer2DModel, + GlmImageTransformer2DModel, + HeliosTransformer3DModel, + HiDreamImageTransformer2DModel, + HunyuanDiT2DModel, + HunyuanImageTransformer2DModel, + HunyuanVideo15Transformer3DModel, + HunyuanVideoFramepackTransformer3DModel, + HunyuanVideoTransformer3DModel, + Kandinsky5Transformer3DModel, + LatteTransformer3DModel, + LongCatImageTransformer2DModel, + LTX2VideoTransformer3DModel, + LTXVideoTransformer3DModel, + Lumina2Transformer2DModel, + LuminaNextDiT2DModel, + MochiTransformer3DModel, + OmniGenTransformer2DModel, + OvisImageTransformer2DModel, + PixArtTransformer2DModel, + PriorTransformer, + PRXTransformer2DModel, + QwenImageTransformer2DModel, + SanaTransformer2DModel, + SanaVideoTransformer3DModel, + SD3Transformer2DModel, + SkyReelsV2Transformer3DModel, + StableAudioDiTModel, + T5FilmDecoder, + Transformer2DModel, + TransformerTemporalModel, + WanAnimateTransformer3DModel, + WanTransformer3DModel, + WanVACETransformer3DModel, + ZImageTransformer2DModel, + ) + from .unets import ( + I2VGenXLUNet, + Kandinsky3UNet, + MotionAdapter, + StableCascadeUNet, + UNet1DModel, + UNet2DConditionModel, + UNet2DModel, + UNet3DConditionModel, + UNetMotionModel, + UNetSpatioTemporalConditionModel, + UVit2DModel, + ) + + if is_flax_available(): + from .controlnets import FlaxControlNetModel + from .unets import FlaxUNet2DConditionModel + from .vae_flax import FlaxAutoencoderKL + +else: + import sys + + sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/cache_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/cache_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e1896c89ef21c99bbd155a22c519675e99f1172 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/cache_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/downsampling.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/downsampling.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d1d4ccb81e2dcd7eccb4de9cc25e6f7ae4bbe50b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/downsampling.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/modeling_flax_pytorch_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/modeling_flax_pytorch_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d745fd8bb24f5da14bfb33a499ee45dc11da3a04 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/__pycache__/modeling_flax_pytorch_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/_modeling_parallel.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/_modeling_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..ed966dc8fe98495426e722cc33f1242d3329efd5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/_modeling_parallel.py @@ -0,0 +1,302 @@ +# 🚨🚨🚨 Experimental parallelism support for Diffusers 🚨🚨🚨 +# Experimental changes are subject to change and APIs may break without warning. + +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Literal + +import torch +import torch.distributed as dist + +from ..utils import get_logger + + +if TYPE_CHECKING: + pass + + +logger = get_logger(__name__) # pylint: disable=invalid-name + + +# TODO(aryan): add support for the following: +# - Unified Attention +# - More dispatcher attention backends +# - CFG/Data Parallel +# - Tensor Parallel + + +@dataclass +class ContextParallelConfig: + """ + Configuration for context parallelism. + + Args: + ring_degree (`int`, *optional*, defaults to `1`): + Number of devices to use for Ring Attention. Sequence is split across devices. Each device computes + attention between its local Q and KV chunks passed sequentially around ring. Lower memory (only holds 1/N + of KV at a time), overlaps compute with communication, but requires N iterations to see all tokens. Best + for long sequences with limited memory/bandwidth. Number of devices to use for ring attention within a + context parallel region. Must be a divisor of the total number of devices in the context parallel mesh. + ulysses_degree (`int`, *optional*, defaults to `1`): + Number of devices to use for Ulysses Attention. Sequence split is across devices. Each device computes + local QKV, then all-gathers all KV chunks to compute full attention in one pass. Higher memory (stores all + KV), requires high-bandwidth all-to-all communication, but lower latency. Best for moderate sequences with + good interconnect bandwidth. + convert_to_fp32 (`bool`, *optional*, defaults to `True`): + Whether to convert output and LSE to float32 for ring attention numerical stability. + rotate_method (`str`, *optional*, defaults to `"allgather"`): + Method to use for rotating key/value states across devices in ring attention. Currently, only `"allgather"` + is supported. + + """ + + ring_degree: int | None = None + ulysses_degree: int | None = None + convert_to_fp32: bool = True + # TODO: support alltoall + rotate_method: Literal["allgather", "alltoall"] = "allgather" + # Whether to enable ulysses anything attention to support + # any sequence lengths and any head numbers. + ulysses_anything: bool = False + + _rank: int = None + _world_size: int = None + _device: torch.device = None + _mesh: torch.distributed.device_mesh.DeviceMesh = None + _flattened_mesh: torch.distributed.device_mesh.DeviceMesh = None + _ring_mesh: torch.distributed.device_mesh.DeviceMesh = None + _ulysses_mesh: torch.distributed.device_mesh.DeviceMesh = None + _ring_local_rank: int = None + _ulysses_local_rank: int = None + + def __post_init__(self): + if self.ring_degree is None: + self.ring_degree = 1 + if self.ulysses_degree is None: + self.ulysses_degree = 1 + + if self.ring_degree == 1 and self.ulysses_degree == 1: + raise ValueError( + "Either ring_degree or ulysses_degree must be greater than 1 in order to use context parallel inference" + ) + if self.ring_degree < 1 or self.ulysses_degree < 1: + raise ValueError("`ring_degree` and `ulysses_degree` must be greater than or equal to 1.") + if self.rotate_method != "allgather": + raise NotImplementedError( + f"Only rotate_method='allgather' is supported for now, but got {self.rotate_method}." + ) + if self.ulysses_anything: + if self.ulysses_degree == 1: + raise ValueError("ulysses_degree must be greater than 1 for ulysses_anything to be enabled.") + if self.ring_degree > 1: + raise ValueError("ulysses_anything cannot be enabled when ring_degree > 1.") + + @property + def mesh_shape(self) -> tuple[int, int]: + return (self.ring_degree, self.ulysses_degree) + + @property + def mesh_dim_names(self) -> tuple[str, str]: + """Dimension names for the device mesh.""" + return ("ring", "ulysses") + + def setup(self, rank: int, world_size: int, device: torch.device, mesh: torch.distributed.device_mesh.DeviceMesh): + self._rank = rank + self._world_size = world_size + self._device = device + self._mesh = mesh + + if self.ulysses_degree * self.ring_degree > world_size: + raise ValueError( + f"The product of `ring_degree` ({self.ring_degree}) and `ulysses_degree` ({self.ulysses_degree}) must not exceed the world size ({world_size})." + ) + + self._flattened_mesh = self._mesh._flatten() + self._ring_mesh = self._mesh["ring"] + self._ulysses_mesh = self._mesh["ulysses"] + self._ring_local_rank = self._ring_mesh.get_local_rank() + self._ulysses_local_rank = self._ulysses_mesh.get_local_rank() + + +@dataclass +class ParallelConfig: + """ + Configuration for applying different parallelisms. + + Args: + context_parallel_config (`ContextParallelConfig`, *optional*): + Configuration for context parallelism. + """ + + context_parallel_config: ContextParallelConfig | None = None + + _rank: int = None + _world_size: int = None + _device: torch.device = None + _mesh: torch.distributed.device_mesh.DeviceMesh = None + + def setup( + self, + rank: int, + world_size: int, + device: torch.device, + *, + mesh: torch.distributed.device_mesh.DeviceMesh | None = None, + ): + self._rank = rank + self._world_size = world_size + self._device = device + self._mesh = mesh + if self.context_parallel_config is not None: + self.context_parallel_config.setup(rank, world_size, device, mesh) + + +@dataclass(frozen=True) +class ContextParallelInput: + """ + Configuration for splitting an input tensor across context parallel region. + + Args: + split_dim (`int`): + The dimension along which to split the tensor. + expected_dims (`int`, *optional*): + The expected number of dimensions of the tensor. If provided, a check will be performed to ensure that the + tensor has the expected number of dimensions before splitting. + split_output (`bool`, *optional*, defaults to `False`): + Whether to split the output tensor of the layer along the given `split_dim` instead of the input tensor. + This is useful for layers whose outputs should be split after it does some preprocessing on the inputs (ex: + RoPE). + """ + + split_dim: int + expected_dims: int | None = None + split_output: bool = False + + def __repr__(self): + return f"ContextParallelInput(split_dim={self.split_dim}, expected_dims={self.expected_dims}, split_output={self.split_output})" + + +@dataclass(frozen=True) +class ContextParallelOutput: + """ + Configuration for gathering an output tensor across context parallel region. + + Args: + gather_dim (`int`): + The dimension along which to gather the tensor. + expected_dims (`int`, *optional*): + The expected number of dimensions of the tensor. If provided, a check will be performed to ensure that the + tensor has the expected number of dimensions before gathering. + """ + + gather_dim: int + expected_dims: int | None = None + + def __repr__(self): + return f"ContextParallelOutput(gather_dim={self.gather_dim}, expected_dims={self.expected_dims})" + + +# A dictionary where keys denote the input to be split across context parallel region, and the +# value denotes the sharding configuration. +# If the key is a string, it denotes the name of the parameter in the forward function. +# If the key is an integer, split_output must be set to True, and it denotes the index of the output +# to be split across context parallel region. +ContextParallelInputType = dict[ + str | int, ContextParallelInput | list[ContextParallelInput] | tuple[ContextParallelInput, ...] +] + +# A dictionary where keys denote the output to be gathered across context parallel region, and the +# value denotes the gathering configuration. +ContextParallelOutputType = ContextParallelOutput | list[ContextParallelOutput] | tuple[ContextParallelOutput, ...] + +# A dictionary where keys denote the module id, and the value denotes how the inputs/outputs of +# the module should be split/gathered across context parallel region. +ContextParallelModelPlan = dict[str, ContextParallelInputType | ContextParallelOutputType] + + +# Example of a ContextParallelModelPlan (QwenImageTransformer2DModel): +# +# Each model should define a _cp_plan attribute that contains information on how to shard/gather +# tensors at different stages of the forward: +# +# ```python +# _cp_plan = { +# "": { +# "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), +# "encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), +# "encoder_hidden_states_mask": ContextParallelInput(split_dim=1, expected_dims=2, split_output=False), +# }, +# "pos_embed": { +# 0: ContextParallelInput(split_dim=0, expected_dims=2, split_output=True), +# 1: ContextParallelInput(split_dim=0, expected_dims=2, split_output=True), +# }, +# "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), +# } +# ``` +# +# The dictionary is a set of module names mapped to their respective CP plan. The inputs/outputs of layers will be +# split/gathered according to this at the respective module level. Here, the following happens: +# - "": +# we specify that we want to split the various inputs across the sequence dim in the pre-forward hook (i.e. before +# the actual forward logic of the QwenImageTransformer2DModel is run, we will splitthe inputs) +# - "pos_embed": +# we specify that we want to split the outputs of the RoPE layer. Since there are two outputs (imag & text freqs), +# we can individually specify how they should be split +# - "proj_out": +# before returning to the user, we gather the entire sequence on each rank in the post-forward hook (after the linear +# layer forward has run). +# +# ContextParallelInput: +# specifies how to split the input tensor in the pre-forward or post-forward hook of the layer it is attached to +# +# ContextParallelOutput: +# specifies how to gather the input tensor in the post-forward hook in the layer it is attached to + + +# Below are utility functions for distributed communication in context parallelism. +def gather_size_by_comm(size: int, group: dist.ProcessGroup) -> list[int]: + r"""Gather the local size from all ranks. + size: int, local size return: list[int], list of size from all ranks + """ + # NOTE(Serving/CP Safety): + # Do NOT cache this collective result. + # + # In "Ulysses Anything" mode, `size` (e.g. per-rank local seq_len / S_LOCAL) + # may legitimately differ across ranks. If we cache based on the *local* `size`, + # different ranks can have different cache hit/miss patterns across time. + # + # That can lead to a catastrophic distributed hang: + # - some ranks hit cache and *skip* dist.all_gather() + # - other ranks miss cache and *enter* dist.all_gather() + # This mismatched collective participation will stall the process group and + # eventually trigger NCCL watchdog timeouts (often surfacing later as ALLTOALL + # timeouts in Ulysses attention). + world_size = dist.get_world_size(group=group) + # HACK: Use Gloo backend for all_gather to avoid H2D and D2H overhead + comm_backends = str(dist.get_backend(group=group)) + # NOTE: e.g., dist.init_process_group(backend="cpu:gloo,cuda:nccl") + gather_device = "cpu" if "cpu" in comm_backends else torch.accelerator.current_accelerator() + gathered_sizes = [torch.empty((1,), device=gather_device, dtype=torch.int64) for _ in range(world_size)] + dist.all_gather( + gathered_sizes, + torch.tensor([size], device=gather_device, dtype=torch.int64), + group=group, + ) + + gathered_sizes = [s[0].item() for s in gathered_sizes] + # NOTE: DON'T use tolist here due to graph break - Explanation: + # Backend compiler `inductor` failed with aten._local_scalar_dense.default + return gathered_sizes diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/activations.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/activations.py new file mode 100644 index 0000000000000000000000000000000000000000..2d1fdb5f7d8303ae605afe4c6905af38950e4acb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/activations.py @@ -0,0 +1,178 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn.functional as F +from torch import nn + +from ..utils import deprecate +from ..utils.import_utils import is_torch_npu_available, is_torch_version + + +if is_torch_npu_available(): + import torch_npu + +ACT2CLS = { + "swish": nn.SiLU, + "silu": nn.SiLU, + "mish": nn.Mish, + "gelu": nn.GELU, + "relu": nn.ReLU, +} + + +def get_activation(act_fn: str) -> nn.Module: + """Helper function to get activation function from string. + + Args: + act_fn (str): Name of activation function. + + Returns: + nn.Module: Activation function. + """ + + act_fn = act_fn.lower() + if act_fn in ACT2CLS: + return ACT2CLS[act_fn]() + else: + raise ValueError(f"activation function {act_fn} not found in ACT2FN mapping {list(ACT2CLS.keys())}") + + +class FP32SiLU(nn.Module): + r""" + SiLU activation function with input upcasted to torch.float32. + """ + + def __init__(self): + super().__init__() + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + return F.silu(inputs.float(), inplace=False).to(inputs.dtype) + + +class GELU(nn.Module): + r""" + GELU activation function with tanh approximation support with `approximate="tanh"`. + + Parameters: + dim_in (`int`): The number of channels in the input. + dim_out (`int`): The number of channels in the output. + approximate (`str`, *optional*, defaults to `"none"`): If `"tanh"`, use tanh approximation. + bias (`bool`, defaults to True): Whether to use a bias in the linear layer. + """ + + def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out, bias=bias) + self.approximate = approximate + + def gelu(self, gate: torch.Tensor) -> torch.Tensor: + if gate.device.type == "mps" and is_torch_version("<", "2.0.0"): + # fp16 gelu not supported on mps before torch 2.0 + return F.gelu(gate.to(dtype=torch.float32), approximate=self.approximate).to(dtype=gate.dtype) + return F.gelu(gate, approximate=self.approximate) + + def forward(self, hidden_states): + hidden_states = self.proj(hidden_states) + hidden_states = self.gelu(hidden_states) + return hidden_states + + +class GEGLU(nn.Module): + r""" + A [variant](https://huggingface.co/papers/2002.05202) of the gated linear unit activation function. + + Parameters: + dim_in (`int`): The number of channels in the input. + dim_out (`int`): The number of channels in the output. + bias (`bool`, defaults to True): Whether to use a bias in the linear layer. + """ + + def __init__(self, dim_in: int, dim_out: int, bias: bool = True): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2, bias=bias) + + def gelu(self, gate: torch.Tensor) -> torch.Tensor: + if gate.device.type == "mps" and is_torch_version("<", "2.0.0"): + # fp16 gelu not supported on mps before torch 2.0 + return F.gelu(gate.to(dtype=torch.float32)).to(dtype=gate.dtype) + return F.gelu(gate) + + def forward(self, hidden_states, *args, **kwargs): + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + hidden_states = self.proj(hidden_states) + if is_torch_npu_available(): + # using torch_npu.npu_geglu can run faster and save memory on NPU. + return torch_npu.npu_geglu(hidden_states, dim=-1, approximate=1)[0] + else: + hidden_states, gate = hidden_states.chunk(2, dim=-1) + return hidden_states * self.gelu(gate) + + +class SwiGLU(nn.Module): + r""" + A [variant](https://huggingface.co/papers/2002.05202) of the gated linear unit activation function. It's similar to + `GEGLU` but uses SiLU / Swish instead of GeLU. + + Parameters: + dim_in (`int`): The number of channels in the input. + dim_out (`int`): The number of channels in the output. + bias (`bool`, defaults to True): Whether to use a bias in the linear layer. + """ + + def __init__(self, dim_in: int, dim_out: int, bias: bool = True): + super().__init__() + + self.proj = nn.Linear(dim_in, dim_out * 2, bias=bias) + self.activation = nn.SiLU() + + def forward(self, hidden_states): + hidden_states = self.proj(hidden_states) + hidden_states, gate = hidden_states.chunk(2, dim=-1) + return hidden_states * self.activation(gate) + + +class ApproximateGELU(nn.Module): + r""" + The approximate form of the Gaussian Error Linear Unit (GELU). For more details, see section 2 of this + [paper](https://huggingface.co/papers/1606.08415). + + Parameters: + dim_in (`int`): The number of channels in the input. + dim_out (`int`): The number of channels in the output. + bias (`bool`, defaults to True): Whether to use a bias in the linear layer. + """ + + def __init__(self, dim_in: int, dim_out: int, bias: bool = True): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out, bias=bias) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.proj(x) + return x * torch.sigmoid(1.702 * x) + + +class LinearActivation(nn.Module): + def __init__(self, dim_in: int, dim_out: int, bias: bool = True, activation: str = "silu"): + super().__init__() + + self.proj = nn.Linear(dim_in, dim_out, bias=bias) + self.activation = get_activation(activation) + + def forward(self, hidden_states): + hidden_states = self.proj(hidden_states) + return self.activation(hidden_states) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/adapter.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..f0652c581a3eda0d79ee524051300c693113933f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/adapter.py @@ -0,0 +1,583 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import Callable + +import torch +import torch.nn as nn + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging +from .modeling_utils import ModelMixin + + +logger = logging.get_logger(__name__) + + +class MultiAdapter(ModelMixin): + r""" + MultiAdapter is a wrapper model that contains multiple adapter models and merges their outputs according to + user-assigned weighting. + + This model inherits from [`ModelMixin`]. Check the superclass documentation for common methods such as downloading + or saving. + + Args: + adapters (`list[T2IAdapter]`, *optional*, defaults to None): + A list of `T2IAdapter` model instances. + """ + + def __init__(self, adapters: list["T2IAdapter"]): + super(MultiAdapter, self).__init__() + + self.num_adapter = len(adapters) + self.adapters = nn.ModuleList(adapters) + + if len(adapters) == 0: + raise ValueError("Expecting at least one adapter") + + if len(adapters) == 1: + raise ValueError("For a single adapter, please use the `T2IAdapter` class instead of `MultiAdapter`") + + # The outputs from each adapter are added together with a weight. + # This means that the change in dimensions from downsampling must + # be the same for all adapters. Inductively, it also means the + # downscale_factor and total_downscale_factor must be the same for all + # adapters. + first_adapter_total_downscale_factor = adapters[0].total_downscale_factor + first_adapter_downscale_factor = adapters[0].downscale_factor + for idx in range(1, len(adapters)): + if ( + adapters[idx].total_downscale_factor != first_adapter_total_downscale_factor + or adapters[idx].downscale_factor != first_adapter_downscale_factor + ): + raise ValueError( + f"Expecting all adapters to have the same downscaling behavior, but got:\n" + f"adapters[0].total_downscale_factor={first_adapter_total_downscale_factor}\n" + f"adapters[0].downscale_factor={first_adapter_downscale_factor}\n" + f"adapter[`{idx}`].total_downscale_factor={adapters[idx].total_downscale_factor}\n" + f"adapter[`{idx}`].downscale_factor={adapters[idx].downscale_factor}" + ) + + self.total_downscale_factor = first_adapter_total_downscale_factor + self.downscale_factor = first_adapter_downscale_factor + + def forward(self, xs: torch.Tensor, adapter_weights: list[float] | None = None) -> list[torch.Tensor]: + r""" + Args: + xs (`torch.Tensor`): + A tensor of shape (batch, channel, height, width) representing input images for multiple adapter + models, concatenated along dimension 1(channel dimension). The `channel` dimension should be equal to + `num_adapter` * number of channel per image. + + adapter_weights (`list[float]`, *optional*, defaults to None): + A list of floats representing the weights which will be multiplied by each adapter's output before + summing them together. If `None`, equal weights will be used for all adapters. + """ + if adapter_weights is None: + adapter_weights = torch.tensor([1 / self.num_adapter] * self.num_adapter) + else: + adapter_weights = torch.tensor(adapter_weights) + + accume_state = None + for x, w, adapter in zip(xs, adapter_weights, self.adapters): + features = adapter(x) + if accume_state is None: + accume_state = features + for i in range(len(accume_state)): + accume_state[i] = w * accume_state[i] + else: + for i in range(len(features)): + accume_state[i] += w * features[i] + return accume_state + + def save_pretrained( + self, + save_directory: str | os.PathLike, + is_main_process: bool = True, + save_function: Callable = None, + safe_serialization: bool = True, + variant: str | None = None, + ): + """ + Save a model and its configuration file to a specified directory, allowing it to be re-loaded with the + `[`~models.adapter.MultiAdapter.from_pretrained`]` class method. + + Args: + save_directory (`str` or `os.PathLike`): + The directory where the model will be saved. If the directory does not exist, it will be created. + is_main_process (`bool`, optional, defaults=True): + Indicates whether current process is the main process or not. Useful for distributed training (e.g., + TPUs) and need to call this function on all processes. In this case, set `is_main_process=True` only + for the main process to avoid race conditions. + save_function (`Callable`): + Function used to save the state dictionary. Useful for distributed training (e.g., TPUs) to replace + `torch.save` with another method. Can also be configured using`DIFFUSERS_SAVE_MODE` environment + variable. + safe_serialization (`bool`, optional, defaults=True): + If `True`, save the model using `safetensors`. If `False`, save the model with `pickle`. + variant (`str`, *optional*): + If specified, weights are saved in the format `pytorch_model..bin`. + """ + idx = 0 + model_path_to_save = save_directory + for adapter in self.adapters: + adapter.save_pretrained( + model_path_to_save, + is_main_process=is_main_process, + save_function=save_function, + safe_serialization=safe_serialization, + variant=variant, + ) + + idx += 1 + model_path_to_save = model_path_to_save + f"_{idx}" + + @classmethod + def from_pretrained(cls, pretrained_model_path: str | os.PathLike | None, **kwargs): + r""" + Instantiate a pretrained `MultiAdapter` model from multiple pre-trained adapter models. + + The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train + the model, set it back to training mode using `model.train()`. + + Warnings: + *Weights from XXX not initialized from pretrained model* means that the weights of XXX are not pretrained + with the rest of the model. It is up to you to train those weights with a downstream fine-tuning. *Weights + from XXX not used in YYY* means that the layer XXX is not used by YYY, so those weights are discarded. + + Args: + pretrained_model_path (`os.PathLike`): + A path to a *directory* containing model weights saved using + [`~diffusers.models.adapter.MultiAdapter.save_pretrained`], e.g., `./my_model_directory/adapter`. + torch_dtype (`torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model under this dtype. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + device_map (`str` or `dict[str, int | str | torch.device]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be refined to each + parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the + same device. + + To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + max_memory (`Dict`, *optional*): + A dictionary mapping device identifiers to their maximum memory. Default to the maximum memory + available for each GPU and the available CPU RAM if unset. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading by not initializing the weights and only loading the pre-trained weights. This + also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the + model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, + setting this argument to `True` will raise an error. + variant (`str`, *optional*): + If specified, load weights from a `variant` file (*e.g.* pytorch_model..bin). `variant` will + be ignored when using `from_flax`. + use_safetensors (`bool`, *optional*, defaults to `None`): + If `None`, the `safetensors` weights will be downloaded if available **and** if`safetensors` library is + installed. If `True`, the model will be forcibly loaded from`safetensors` weights. If `False`, + `safetensors` is not used. + """ + idx = 0 + adapters = [] + + # load adapter and append to list until no adapter directory exists anymore + # first adapter has to be saved under `./mydirectory/adapter` to be compliant with `DiffusionPipeline.from_pretrained` + # second, third, ... adapters have to be saved under `./mydirectory/adapter_1`, `./mydirectory/adapter_2`, ... + model_path_to_load = pretrained_model_path + while os.path.isdir(model_path_to_load): + adapter = T2IAdapter.from_pretrained(model_path_to_load, **kwargs) + adapters.append(adapter) + + idx += 1 + model_path_to_load = pretrained_model_path + f"_{idx}" + + logger.info(f"{len(adapters)} adapters loaded from {pretrained_model_path}.") + + if len(adapters) == 0: + raise ValueError( + f"No T2IAdapters found under {os.path.dirname(pretrained_model_path)}. Expected at least {pretrained_model_path + '_0'}." + ) + + return cls(adapters) + + +class T2IAdapter(ModelMixin, ConfigMixin): + r""" + A simple ResNet-like model that accepts images containing control signals such as keyposes and depth. The model + generates multiple feature maps that are used as additional conditioning in [`UNet2DConditionModel`]. The model's + architecture follows the original implementation of + [Adapter](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L97) + and + [AdapterLight](https://github.com/TencentARC/T2I-Adapter/blob/686de4681515662c0ac2ffa07bf5dda83af1038a/ldm/modules/encoders/adapter.py#L235). + + This model inherits from [`ModelMixin`]. Check the superclass documentation for the common methods, such as + downloading or saving. + + Args: + in_channels (`int`, *optional*, defaults to `3`): + The number of channels in the adapter's input (*control image*). Set it to 1 if you're using a gray scale + image. + channels (`list[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`): + The number of channels in each downsample block's output hidden state. The `len(block_out_channels)` + determines the number of downsample blocks in the adapter. + num_res_blocks (`int`, *optional*, defaults to `2`): + Number of ResNet blocks in each downsample block. + downscale_factor (`int`, *optional*, defaults to `8`): + A factor that determines the total downscale factor of the Adapter. + adapter_type (`str`, *optional*, defaults to `full_adapter`): + Adapter type (`full_adapter` or `full_adapter_xl` or `light_adapter`) to use. + """ + + @register_to_config + def __init__( + self, + in_channels: int = 3, + channels: list[int] = [320, 640, 1280, 1280], + num_res_blocks: int = 2, + downscale_factor: int = 8, + adapter_type: str = "full_adapter", + ): + super().__init__() + + if adapter_type == "full_adapter": + self.adapter = FullAdapter(in_channels, channels, num_res_blocks, downscale_factor) + elif adapter_type == "full_adapter_xl": + self.adapter = FullAdapterXL(in_channels, channels, num_res_blocks, downscale_factor) + elif adapter_type == "light_adapter": + self.adapter = LightAdapter(in_channels, channels, num_res_blocks, downscale_factor) + else: + raise ValueError( + f"Unsupported adapter_type: '{adapter_type}'. Choose either 'full_adapter' or " + "'full_adapter_xl' or 'light_adapter'." + ) + + def forward(self, x: torch.Tensor) -> list[torch.Tensor]: + r""" + This function processes the input tensor `x` through the adapter model and returns a list of feature tensors, + each representing information extracted at a different scale from the input. The length of the list is + determined by the number of downsample blocks in the Adapter, as specified by the `channels` and + `num_res_blocks` parameters during initialization. + """ + return self.adapter(x) + + @property + def total_downscale_factor(self): + return self.adapter.total_downscale_factor + + @property + def downscale_factor(self): + """The downscale factor applied in the T2I-Adapter's initial pixel unshuffle operation. If an input image's dimensions are + not evenly divisible by the downscale_factor then an exception will be raised. + """ + return self.adapter.unshuffle.downscale_factor + + +# full adapter + + +class FullAdapter(nn.Module): + r""" + See [`T2IAdapter`] for more information. + """ + + def __init__( + self, + in_channels: int = 3, + channels: list[int] = [320, 640, 1280, 1280], + num_res_blocks: int = 2, + downscale_factor: int = 8, + ): + super().__init__() + + in_channels = in_channels * downscale_factor**2 + + self.unshuffle = nn.PixelUnshuffle(downscale_factor) + self.conv_in = nn.Conv2d(in_channels, channels[0], kernel_size=3, padding=1) + + self.body = nn.ModuleList( + [ + AdapterBlock(channels[0], channels[0], num_res_blocks), + *[ + AdapterBlock(channels[i - 1], channels[i], num_res_blocks, down=True) + for i in range(1, len(channels)) + ], + ] + ) + + self.total_downscale_factor = downscale_factor * 2 ** (len(channels) - 1) + + def forward(self, x: torch.Tensor) -> list[torch.Tensor]: + r""" + This method processes the input tensor `x` through the FullAdapter model and performs operations including + pixel unshuffling, convolution, and a stack of AdapterBlocks. It returns a list of feature tensors, each + capturing information at a different stage of processing within the FullAdapter model. The number of feature + tensors in the list is determined by the number of downsample blocks specified during initialization. + """ + x = self.unshuffle(x) + x = self.conv_in(x) + + features = [] + + for block in self.body: + x = block(x) + features.append(x) + + return features + + +class FullAdapterXL(nn.Module): + r""" + See [`T2IAdapter`] for more information. + """ + + def __init__( + self, + in_channels: int = 3, + channels: list[int] = [320, 640, 1280, 1280], + num_res_blocks: int = 2, + downscale_factor: int = 16, + ): + super().__init__() + + in_channels = in_channels * downscale_factor**2 + + self.unshuffle = nn.PixelUnshuffle(downscale_factor) + self.conv_in = nn.Conv2d(in_channels, channels[0], kernel_size=3, padding=1) + + self.body = [] + # blocks to extract XL features with dimensions of [320, 64, 64], [640, 64, 64], [1280, 32, 32], [1280, 32, 32] + for i in range(len(channels)): + if i == 1: + self.body.append(AdapterBlock(channels[i - 1], channels[i], num_res_blocks)) + elif i == 2: + self.body.append(AdapterBlock(channels[i - 1], channels[i], num_res_blocks, down=True)) + else: + self.body.append(AdapterBlock(channels[i], channels[i], num_res_blocks)) + + self.body = nn.ModuleList(self.body) + # XL has only one downsampling AdapterBlock. + self.total_downscale_factor = downscale_factor * 2 + + def forward(self, x: torch.Tensor) -> list[torch.Tensor]: + r""" + This method takes the tensor x as input and processes it through FullAdapterXL model. It consists of operations + including unshuffling pixels, applying convolution layer and appending each block into list of feature tensors. + """ + x = self.unshuffle(x) + x = self.conv_in(x) + + features = [] + + for block in self.body: + x = block(x) + features.append(x) + + return features + + +class AdapterBlock(nn.Module): + r""" + An AdapterBlock is a helper model that contains multiple ResNet-like blocks. It is used in the `FullAdapter` and + `FullAdapterXL` models. + + Args: + in_channels (`int`): + Number of channels of AdapterBlock's input. + out_channels (`int`): + Number of channels of AdapterBlock's output. + num_res_blocks (`int`): + Number of ResNet blocks in the AdapterBlock. + down (`bool`, *optional*, defaults to `False`): + If `True`, perform downsampling on AdapterBlock's input. + """ + + def __init__(self, in_channels: int, out_channels: int, num_res_blocks: int, down: bool = False): + super().__init__() + + self.downsample = None + if down: + self.downsample = nn.AvgPool2d(kernel_size=2, stride=2, ceil_mode=True) + + self.in_conv = None + if in_channels != out_channels: + self.in_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) + + self.resnets = nn.Sequential( + *[AdapterResnetBlock(out_channels) for _ in range(num_res_blocks)], + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + r""" + This method takes tensor x as input and performs operations downsampling and convolutional layers if the + self.downsample and self.in_conv properties of AdapterBlock model are specified. Then it applies a series of + residual blocks to the input tensor. + """ + if self.downsample is not None: + x = self.downsample(x) + + if self.in_conv is not None: + x = self.in_conv(x) + + x = self.resnets(x) + + return x + + +class AdapterResnetBlock(nn.Module): + r""" + An `AdapterResnetBlock` is a helper model that implements a ResNet-like block. + + Args: + channels (`int`): + Number of channels of AdapterResnetBlock's input and output. + """ + + def __init__(self, channels: int): + super().__init__() + self.block1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1) + self.act = nn.ReLU() + self.block2 = nn.Conv2d(channels, channels, kernel_size=1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + r""" + This method takes input tensor x and applies a convolutional layer, ReLU activation, and another convolutional + layer on the input tensor. It returns addition with the input tensor. + """ + + h = self.act(self.block1(x)) + h = self.block2(h) + + return h + x + + +# light adapter + + +class LightAdapter(nn.Module): + r""" + See [`T2IAdapter`] for more information. + """ + + def __init__( + self, + in_channels: int = 3, + channels: list[int] = [320, 640, 1280], + num_res_blocks: int = 4, + downscale_factor: int = 8, + ): + super().__init__() + + in_channels = in_channels * downscale_factor**2 + + self.unshuffle = nn.PixelUnshuffle(downscale_factor) + + self.body = nn.ModuleList( + [ + LightAdapterBlock(in_channels, channels[0], num_res_blocks), + *[ + LightAdapterBlock(channels[i], channels[i + 1], num_res_blocks, down=True) + for i in range(len(channels) - 1) + ], + LightAdapterBlock(channels[-1], channels[-1], num_res_blocks, down=True), + ] + ) + + self.total_downscale_factor = downscale_factor * (2 ** len(channels)) + + def forward(self, x: torch.Tensor) -> list[torch.Tensor]: + r""" + This method takes the input tensor x and performs downscaling and appends it in list of feature tensors. Each + feature tensor corresponds to a different level of processing within the LightAdapter. + """ + x = self.unshuffle(x) + + features = [] + + for block in self.body: + x = block(x) + features.append(x) + + return features + + +class LightAdapterBlock(nn.Module): + r""" + A `LightAdapterBlock` is a helper model that contains multiple `LightAdapterResnetBlocks`. It is used in the + `LightAdapter` model. + + Args: + in_channels (`int`): + Number of channels of LightAdapterBlock's input. + out_channels (`int`): + Number of channels of LightAdapterBlock's output. + num_res_blocks (`int`): + Number of LightAdapterResnetBlocks in the LightAdapterBlock. + down (`bool`, *optional*, defaults to `False`): + If `True`, perform downsampling on LightAdapterBlock's input. + """ + + def __init__(self, in_channels: int, out_channels: int, num_res_blocks: int, down: bool = False): + super().__init__() + mid_channels = out_channels // 4 + + self.downsample = None + if down: + self.downsample = nn.AvgPool2d(kernel_size=2, stride=2, ceil_mode=True) + + self.in_conv = nn.Conv2d(in_channels, mid_channels, kernel_size=1) + self.resnets = nn.Sequential(*[LightAdapterResnetBlock(mid_channels) for _ in range(num_res_blocks)]) + self.out_conv = nn.Conv2d(mid_channels, out_channels, kernel_size=1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + r""" + This method takes tensor x as input and performs downsampling if required. Then it applies in convolution + layer, a sequence of residual blocks, and out convolutional layer. + """ + if self.downsample is not None: + x = self.downsample(x) + + x = self.in_conv(x) + x = self.resnets(x) + x = self.out_conv(x) + + return x + + +class LightAdapterResnetBlock(nn.Module): + """ + A `LightAdapterResnetBlock` is a helper model that implements a ResNet-like block with a slightly different + architecture than `AdapterResnetBlock`. + + Args: + channels (`int`): + Number of channels of LightAdapterResnetBlock's input and output. + """ + + def __init__(self, channels: int): + super().__init__() + self.block1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1) + self.act = nn.ReLU() + self.block2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + r""" + This function takes input tensor x and processes it through one convolutional layer, ReLU activation, and + another convolutional layer and adds it to input tensor. + """ + + h = self.act(self.block1(x)) + h = self.block2(h) + + return h + x diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..36d0893734c706f6205516827c6fa46b4af456f1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention.py @@ -0,0 +1,1742 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Callable + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import deprecate, logging +from ..utils.import_utils import is_torch_npu_available, is_torch_xla_available, is_xformers_available +from ..utils.torch_utils import maybe_allow_in_graph +from .activations import GEGLU, GELU, ApproximateGELU, FP32SiLU, LinearActivation, SwiGLU +from .attention_processor import Attention, AttentionProcessor, JointAttnProcessor2_0 +from .embeddings import SinusoidalPositionalEmbedding +from .normalization import AdaLayerNorm, AdaLayerNormContinuous, AdaLayerNormZero, RMSNorm, SD35AdaLayerNormZeroX + + +if is_xformers_available(): + import xformers as xops +else: + xops = None + + +logger = logging.get_logger(__name__) + + +class AttentionMixin: + @property + def attn_processors(self) -> dict[str, AttentionProcessor]: + r""" + Returns: + `dict` of attention processors: A dictionary containing all attention processors used in the model with + indexed by its weight name. + """ + # set recursively + processors = {} + + def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: dict[str, AttentionProcessor]): + if hasattr(module, "get_processor"): + processors[f"{name}.processor"] = module.get_processor() + + for sub_name, child in module.named_children(): + fn_recursive_add_processors(f"{name}.{sub_name}", child, processors) + + return processors + + for name, module in self.named_children(): + fn_recursive_add_processors(name, module, processors) + + return processors + + def set_attn_processor(self, processor: AttentionProcessor | dict[str, AttentionProcessor]): + r""" + Sets the attention processor to use to compute attention. + + Parameters: + processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`): + The instantiated processor class or a dictionary of processor classes that will be set as the processor + for **all** `Attention` layers. + + If `processor` is a dict, the key needs to define the path to the corresponding cross attention + processor. This is strongly recommended when setting trainable attention processors. + + """ + count = len(self.attn_processors.keys()) + + if isinstance(processor, dict) and len(processor) != count: + raise ValueError( + f"A dict of processors was passed, but the number of processors {len(processor)} does not match the" + f" number of attention layers: {count}. Please make sure to pass {count} processor classes." + ) + + def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor): + if hasattr(module, "set_processor"): + if not isinstance(processor, dict): + module.set_processor(processor) + else: + module.set_processor(processor.pop(f"{name}.processor")) + + for sub_name, child in module.named_children(): + fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor) + + for name, module in self.named_children(): + fn_recursive_attn_processor(name, module, processor) + + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + """ + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + for module in self.modules(): + if isinstance(module, AttentionModuleMixin) and module._supports_qkv_fusion: + module.fuse_projections() + + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + > [!WARNING] > This API is 🧪 experimental. + """ + for module in self.modules(): + if isinstance(module, AttentionModuleMixin) and module._supports_qkv_fusion: + module.unfuse_projections() + + +class AttentionModuleMixin: + _default_processor_cls = None + _available_processors = [] + _supports_qkv_fusion = True + fused_projections = False + + def set_processor(self, processor: AttentionProcessor) -> None: + """ + Set the attention processor to use. + + Args: + processor (`AttnProcessor`): + The attention processor to use. + """ + # if current processor is in `self._modules` and if passed `processor` is not, we need to + # pop `processor` from `self._modules` + if ( + hasattr(self, "processor") + and isinstance(self.processor, torch.nn.Module) + and not isinstance(processor, torch.nn.Module) + ): + logger.info(f"You are removing possibly trained weights of {self.processor} with {processor}") + self._modules.pop("processor") + + self.processor = processor + + def get_processor(self, return_deprecated_lora: bool = False) -> "AttentionProcessor": + """ + Get the attention processor in use. + + Args: + return_deprecated_lora (`bool`, *optional*, defaults to `False`): + Set to `True` to return the deprecated LoRA attention processor. + + Returns: + "AttentionProcessor": The attention processor in use. + """ + if not return_deprecated_lora: + return self.processor + + def set_attention_backend(self, backend: str): + from .attention_dispatch import AttentionBackendName + + available_backends = {x.value for x in AttentionBackendName.__members__.values()} + if backend not in available_backends: + raise ValueError(f"`{backend=}` must be one of the following: " + ", ".join(available_backends)) + + backend = AttentionBackendName(backend.lower()) + self.processor._attention_backend = backend + + def set_use_npu_flash_attention(self, use_npu_flash_attention: bool) -> None: + """ + Set whether to use NPU flash attention from `torch_npu` or not. + + Args: + use_npu_flash_attention (`bool`): Whether to use NPU flash attention or not. + """ + + if use_npu_flash_attention: + if not is_torch_npu_available(): + raise ImportError("torch_npu is not available") + + self.set_attention_backend("_native_npu") + + def set_use_xla_flash_attention( + self, + use_xla_flash_attention: bool, + partition_spec: tuple[str | None, ...] | None = None, + is_flux=False, + ) -> None: + """ + Set whether to use XLA flash attention from `torch_xla` or not. + + Args: + use_xla_flash_attention (`bool`): + Whether to use pallas flash attention kernel from `torch_xla` or not. + partition_spec (`tuple[]`, *optional*): + Specify the partition specification if using SPMD. Otherwise None. + is_flux (`bool`, *optional*, defaults to `False`): + Whether the model is a Flux model. + """ + if use_xla_flash_attention: + if not is_torch_xla_available(): + raise ImportError("torch_xla is not available") + + self.set_attention_backend("_native_xla") + + def set_use_memory_efficient_attention_xformers( + self, use_memory_efficient_attention_xformers: bool, attention_op: Callable | None = None + ) -> None: + """ + Set whether to use memory efficient attention from `xformers` or not. + + Args: + use_memory_efficient_attention_xformers (`bool`): + Whether to use memory efficient attention from `xformers` or not. + attention_op (`Callable`, *optional*): + The attention operation to use. Defaults to `None` which uses the default attention operation from + `xformers`. + """ + if use_memory_efficient_attention_xformers: + if not is_xformers_available(): + raise ModuleNotFoundError( + "Refer to https://github.com/facebookresearch/xformers for more information on how to install xformers", + name="xformers", + ) + elif not torch.cuda.is_available(): + raise ValueError( + "torch.cuda.is_available() should be True but is False. xformers' memory efficient attention is" + " only available for GPU " + ) + else: + try: + # Make sure we can run the memory efficient attention + if is_xformers_available(): + dtype = None + if attention_op is not None: + op_fw, op_bw = attention_op + dtype, *_ = op_fw.SUPPORTED_DTYPES + q = torch.randn((1, 2, 40), device="cuda", dtype=dtype) + _ = xops.ops.memory_efficient_attention(q, q, q) + except Exception as e: + raise e + + self.set_attention_backend("xformers") + + @torch.no_grad() + def fuse_projections(self): + """ + Fuse the query, key, and value projections into a single projection for efficiency. + """ + # Skip if the AttentionModuleMixin subclass does not support fusion (for example, the QKV projections in Flux2 + # single stream blocks are always fused) + if not self._supports_qkv_fusion: + logger.debug( + f"{self.__class__.__name__} does not support fusing QKV projections, so `fuse_projections` will no-op." + ) + return + + # Skip if already fused + if getattr(self, "fused_projections", False): + return + + device = self.to_q.weight.data.device + dtype = self.to_q.weight.data.dtype + + if hasattr(self, "is_cross_attention") and self.is_cross_attention: + # Fuse cross-attention key-value projections + concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data]) + in_features = concatenated_weights.shape[1] + out_features = concatenated_weights.shape[0] + + self.to_kv = nn.Linear(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype) + self.to_kv.weight.copy_(concatenated_weights) + if hasattr(self, "use_bias") and self.use_bias: + concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data]) + self.to_kv.bias.copy_(concatenated_bias) + else: + # Fuse self-attention projections + concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data]) + in_features = concatenated_weights.shape[1] + out_features = concatenated_weights.shape[0] + + self.to_qkv = nn.Linear(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype) + self.to_qkv.weight.copy_(concatenated_weights) + if hasattr(self, "use_bias") and self.use_bias: + concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data]) + self.to_qkv.bias.copy_(concatenated_bias) + + # Handle added projections for models like SD3, Flux, etc. + if ( + getattr(self, "add_q_proj", None) is not None + and getattr(self, "add_k_proj", None) is not None + and getattr(self, "add_v_proj", None) is not None + ): + concatenated_weights = torch.cat( + [self.add_q_proj.weight.data, self.add_k_proj.weight.data, self.add_v_proj.weight.data] + ) + in_features = concatenated_weights.shape[1] + out_features = concatenated_weights.shape[0] + + self.to_added_qkv = nn.Linear( + in_features, out_features, bias=self.added_proj_bias, device=device, dtype=dtype + ) + self.to_added_qkv.weight.copy_(concatenated_weights) + if self.added_proj_bias: + concatenated_bias = torch.cat( + [self.add_q_proj.bias.data, self.add_k_proj.bias.data, self.add_v_proj.bias.data] + ) + self.to_added_qkv.bias.copy_(concatenated_bias) + + self.fused_projections = True + + @torch.no_grad() + def unfuse_projections(self): + """ + Unfuse the query, key, and value projections back to separate projections. + """ + # Skip if the AttentionModuleMixin subclass does not support fusion (for example, the QKV projections in Flux2 + # single stream blocks are always fused) + if not self._supports_qkv_fusion: + return + + # Skip if not fused + if not getattr(self, "fused_projections", False): + return + + # Remove fused projection layers + if hasattr(self, "to_qkv"): + delattr(self, "to_qkv") + + if hasattr(self, "to_kv"): + delattr(self, "to_kv") + + if hasattr(self, "to_added_qkv"): + delattr(self, "to_added_qkv") + + self.fused_projections = False + + def set_attention_slice(self, slice_size: int) -> None: + """ + Set the slice size for attention computation. + + Args: + slice_size (`int`): + The slice size for attention computation. + """ + if hasattr(self, "sliceable_head_dim") and slice_size is not None and slice_size > self.sliceable_head_dim: + raise ValueError(f"slice_size {slice_size} has to be smaller or equal to {self.sliceable_head_dim}.") + + processor = None + + # Try to get a compatible processor for sliced attention + if slice_size is not None: + processor = self._get_compatible_processor("sliced") + + # If no processor was found or slice_size is None, use default processor + if processor is None: + processor = self.default_processor_cls() + + self.set_processor(processor) + + def batch_to_head_dim(self, tensor: torch.Tensor) -> torch.Tensor: + """ + Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size // heads, seq_len, dim * heads]`. + + Args: + tensor (`torch.Tensor`): The tensor to reshape. + + Returns: + `torch.Tensor`: The reshaped tensor. + """ + head_size = self.heads + batch_size, seq_len, dim = tensor.shape + tensor = tensor.reshape(batch_size // head_size, head_size, seq_len, dim) + tensor = tensor.permute(0, 2, 1, 3).reshape(batch_size // head_size, seq_len, dim * head_size) + return tensor + + def head_to_batch_dim(self, tensor: torch.Tensor, out_dim: int = 3) -> torch.Tensor: + """ + Reshape the tensor for multi-head attention processing. + + Args: + tensor (`torch.Tensor`): The tensor to reshape. + out_dim (`int`, *optional*, defaults to `3`): The output dimension of the tensor. + + Returns: + `torch.Tensor`: The reshaped tensor. + """ + head_size = self.heads + if tensor.ndim == 3: + batch_size, seq_len, dim = tensor.shape + extra_dim = 1 + else: + batch_size, extra_dim, seq_len, dim = tensor.shape + tensor = tensor.reshape(batch_size, seq_len * extra_dim, head_size, dim // head_size) + tensor = tensor.permute(0, 2, 1, 3) + + if out_dim == 3: + tensor = tensor.reshape(batch_size * head_size, seq_len * extra_dim, dim // head_size) + + return tensor + + def get_attention_scores( + self, query: torch.Tensor, key: torch.Tensor, attention_mask: torch.Tensor | None = None + ) -> torch.Tensor: + """ + Compute the attention scores. + + Args: + query (`torch.Tensor`): The query tensor. + key (`torch.Tensor`): The key tensor. + attention_mask (`torch.Tensor`, *optional*): The attention mask to use. + + Returns: + `torch.Tensor`: The attention probabilities/scores. + """ + dtype = query.dtype + if self.upcast_attention: + query = query.float() + key = key.float() + + if attention_mask is None: + baddbmm_input = torch.empty( + query.shape[0], query.shape[1], key.shape[1], dtype=query.dtype, device=query.device + ) + beta = 0 + else: + baddbmm_input = attention_mask + beta = 1 + + attention_scores = torch.baddbmm( + baddbmm_input, + query, + key.transpose(-1, -2), + beta=beta, + alpha=self.scale, + ) + del baddbmm_input + + if self.upcast_softmax: + attention_scores = attention_scores.float() + + attention_probs = attention_scores.softmax(dim=-1) + del attention_scores + + attention_probs = attention_probs.to(dtype) + + return attention_probs + + def prepare_attention_mask( + self, attention_mask: torch.Tensor, target_length: int, batch_size: int, out_dim: int = 3 + ) -> torch.Tensor: + """ + Prepare the attention mask for the attention computation. + + Args: + attention_mask (`torch.Tensor`): The attention mask to prepare. + target_length (`int`): The target length of the attention mask. + batch_size (`int`): The batch size for repeating the attention mask. + out_dim (`int`, *optional*, defaults to `3`): Output dimension. + + Returns: + `torch.Tensor`: The prepared attention mask. + """ + head_size = self.heads + if attention_mask is None: + return attention_mask + + current_length: int = attention_mask.shape[-1] + if current_length != target_length: + if attention_mask.device.type == "mps": + # HACK: MPS: Does not support padding by greater than dimension of input tensor. + # Instead, we can manually construct the padding tensor. + padding_shape = (attention_mask.shape[0], attention_mask.shape[1], target_length) + padding = torch.zeros(padding_shape, dtype=attention_mask.dtype, device=attention_mask.device) + attention_mask = torch.cat([attention_mask, padding], dim=2) + else: + # TODO: for pipelines such as stable-diffusion, padding cross-attn mask: + # we want to instead pad by (0, remaining_length), where remaining_length is: + # remaining_length: int = target_length - current_length + # TODO: re-enable tests/models/test_models_unet_2d_condition.py#test_model_xattn_padding + attention_mask = F.pad(attention_mask, (0, target_length), value=0.0) + + if out_dim == 3: + if attention_mask.shape[0] < batch_size * head_size: + attention_mask = attention_mask.repeat_interleave(head_size, dim=0) + elif out_dim == 4: + attention_mask = attention_mask.unsqueeze(1) + attention_mask = attention_mask.repeat_interleave(head_size, dim=1) + + return attention_mask + + def norm_encoder_hidden_states(self, encoder_hidden_states: torch.Tensor) -> torch.Tensor: + """ + Normalize the encoder hidden states. + + Args: + encoder_hidden_states (`torch.Tensor`): Hidden states of the encoder. + + Returns: + `torch.Tensor`: The normalized encoder hidden states. + """ + assert self.norm_cross is not None, "self.norm_cross must be defined to call self.norm_encoder_hidden_states" + if isinstance(self.norm_cross, nn.LayerNorm): + encoder_hidden_states = self.norm_cross(encoder_hidden_states) + elif isinstance(self.norm_cross, nn.GroupNorm): + # Group norm norms along the channels dimension and expects + # input to be in the shape of (N, C, *). In this case, we want + # to norm along the hidden dimension, so we need to move + # (batch_size, sequence_length, hidden_size) -> + # (batch_size, hidden_size, sequence_length) + encoder_hidden_states = encoder_hidden_states.transpose(1, 2) + encoder_hidden_states = self.norm_cross(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.transpose(1, 2) + else: + assert False + + return encoder_hidden_states + + +def _chunked_feed_forward(ff: nn.Module, hidden_states: torch.Tensor, chunk_dim: int, chunk_size: int): + # "feed_forward_chunk_size" can be used to save memory + if hidden_states.shape[chunk_dim] % chunk_size != 0: + raise ValueError( + f"`hidden_states` dimension to be chunked: {hidden_states.shape[chunk_dim]} has to be divisible by chunk size: {chunk_size}. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`." + ) + + num_chunks = hidden_states.shape[chunk_dim] // chunk_size + ff_output = torch.cat( + [ff(hid_slice) for hid_slice in hidden_states.chunk(num_chunks, dim=chunk_dim)], + dim=chunk_dim, + ) + return ff_output + + +@maybe_allow_in_graph +class GatedSelfAttentionDense(nn.Module): + r""" + A gated self-attention dense layer that combines visual features and object features. + + Parameters: + query_dim (`int`): The number of channels in the query. + context_dim (`int`): The number of channels in the context. + n_heads (`int`): The number of heads to use for attention. + d_head (`int`): The number of channels in each head. + """ + + def __init__(self, query_dim: int, context_dim: int, n_heads: int, d_head: int): + super().__init__() + + # we need a linear projection since we need cat visual feature and obj feature + self.linear = nn.Linear(context_dim, query_dim) + + self.attn = Attention(query_dim=query_dim, heads=n_heads, dim_head=d_head) + self.ff = FeedForward(query_dim, activation_fn="geglu") + + self.norm1 = nn.LayerNorm(query_dim) + self.norm2 = nn.LayerNorm(query_dim) + + self.register_parameter("alpha_attn", nn.Parameter(torch.tensor(0.0))) + self.register_parameter("alpha_dense", nn.Parameter(torch.tensor(0.0))) + + self.enabled = True + + def forward(self, x: torch.Tensor, objs: torch.Tensor) -> torch.Tensor: + if not self.enabled: + return x + + n_visual = x.shape[1] + objs = self.linear(objs) + + x = x + self.alpha_attn.tanh() * self.attn(self.norm1(torch.cat([x, objs], dim=1)))[:, :n_visual, :] + x = x + self.alpha_dense.tanh() * self.ff(self.norm2(x)) + + return x + + +@maybe_allow_in_graph +class JointTransformerBlock(nn.Module): + r""" + A Transformer block following the MMDiT architecture, introduced in Stable Diffusion 3. + + Reference: https://huggingface.co/papers/2403.03206 + + Parameters: + dim (`int`): The number of channels in the input and output. + num_attention_heads (`int`): The number of heads to use for multi-head attention. + attention_head_dim (`int`): The number of channels in each head. + context_pre_only (`bool`): Boolean to determine if we should add some blocks associated with the + processing of `context` conditions. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + context_pre_only: bool = False, + qk_norm: str | None = None, + use_dual_attention: bool = False, + ): + super().__init__() + + self.use_dual_attention = use_dual_attention + self.context_pre_only = context_pre_only + context_norm_type = "ada_norm_continous" if context_pre_only else "ada_norm_zero" + + if use_dual_attention: + self.norm1 = SD35AdaLayerNormZeroX(dim) + else: + self.norm1 = AdaLayerNormZero(dim) + + if context_norm_type == "ada_norm_continous": + self.norm1_context = AdaLayerNormContinuous( + dim, dim, elementwise_affine=False, eps=1e-6, bias=True, norm_type="layer_norm" + ) + elif context_norm_type == "ada_norm_zero": + self.norm1_context = AdaLayerNormZero(dim) + else: + raise ValueError( + f"Unknown context_norm_type: {context_norm_type}, currently only support `ada_norm_continous`, `ada_norm_zero`" + ) + + if hasattr(F, "scaled_dot_product_attention"): + processor = JointAttnProcessor2_0() + else: + raise ValueError( + "The current PyTorch version does not support the `scaled_dot_product_attention` function." + ) + + self.attn = Attention( + query_dim=dim, + cross_attention_dim=None, + added_kv_proj_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=context_pre_only, + bias=True, + processor=processor, + qk_norm=qk_norm, + eps=1e-6, + ) + + if use_dual_attention: + self.attn2 = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + processor=processor, + qk_norm=qk_norm, + eps=1e-6, + ) + else: + self.attn2 = None + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + if not context_pre_only: + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + else: + self.norm2_context = None + self.ff_context = None + + # let chunk size default to None + self._chunk_size = None + self._chunk_dim = 0 + + # Copied from diffusers.models.attention.BasicTransformerBlock.set_chunk_feed_forward + def set_chunk_feed_forward(self, chunk_size: int | None, dim: int = 0): + # Sets chunk feed-forward + self._chunk_size = chunk_size + self._chunk_dim = dim + + def forward( + self, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor, + temb: torch.FloatTensor, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + joint_attention_kwargs = joint_attention_kwargs or {} + if self.use_dual_attention: + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp, norm_hidden_states2, gate_msa2 = self.norm1( + hidden_states, emb=temb + ) + else: + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + + if self.context_pre_only: + norm_encoder_hidden_states = self.norm1_context(encoder_hidden_states, temb) + else: + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + + # Attention. + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + **joint_attention_kwargs, + ) + + # Process attention outputs for the `hidden_states`. + attn_output = gate_msa.unsqueeze(1) * attn_output + hidden_states = hidden_states + attn_output + + if self.use_dual_attention: + attn_output2 = self.attn2(hidden_states=norm_hidden_states2, **joint_attention_kwargs) + attn_output2 = gate_msa2.unsqueeze(1) * attn_output2 + hidden_states = hidden_states + attn_output2 + + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + if self._chunk_size is not None: + # "feed_forward_chunk_size" can be used to save memory + ff_output = _chunked_feed_forward(self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size) + else: + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp.unsqueeze(1) * ff_output + + hidden_states = hidden_states + ff_output + + # Process attention outputs for the `encoder_hidden_states`. + if self.context_pre_only: + encoder_hidden_states = None + else: + context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output + encoder_hidden_states = encoder_hidden_states + context_attn_output + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + if self._chunk_size is not None: + # "feed_forward_chunk_size" can be used to save memory + context_ff_output = _chunked_feed_forward( + self.ff_context, norm_encoder_hidden_states, self._chunk_dim, self._chunk_size + ) + else: + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + + return encoder_hidden_states, hidden_states + + +@maybe_allow_in_graph +class BasicTransformerBlock(nn.Module): + r""" + A basic Transformer block. + + Parameters: + dim (`int`): The number of channels in the input and output. + num_attention_heads (`int`): The number of heads to use for multi-head attention. + attention_head_dim (`int`): The number of channels in each head. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention. + activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward. + num_embeds_ada_norm (: + obj: `int`, *optional*): The number of diffusion steps used during training. See `Transformer2DModel`. + attention_bias (: + obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter. + only_cross_attention (`bool`, *optional*): + Whether to use only cross-attention layers. In this case two cross attention layers are used. + double_self_attention (`bool`, *optional*): + Whether to use two self-attention layers. In this case no cross attention layers are used. + upcast_attention (`bool`, *optional*): + Whether to upcast the attention computation to float32. This is useful for mixed precision training. + norm_elementwise_affine (`bool`, *optional*, defaults to `True`): + Whether to use learnable elementwise affine parameters for normalization. + norm_type (`str`, *optional*, defaults to `"layer_norm"`): + The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`. + final_dropout (`bool` *optional*, defaults to False): + Whether to apply a final dropout after the last feed-forward layer. + attention_type (`str`, *optional*, defaults to `"default"`): + The type of attention to use. Can be `"default"` or `"gated"` or `"gated-text-image"`. + positional_embeddings (`str`, *optional*, defaults to `None`): + The type of positional embeddings to apply to. + num_positional_embeddings (`int`, *optional*, defaults to `None`): + The maximum number of positional embeddings to apply. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + dropout=0.0, + cross_attention_dim: int | None = None, + activation_fn: str = "geglu", + num_embeds_ada_norm: int | None = None, + attention_bias: bool = False, + only_cross_attention: bool = False, + double_self_attention: bool = False, + upcast_attention: bool = False, + norm_elementwise_affine: bool = True, + norm_type: str = "layer_norm", # 'layer_norm', 'ada_norm', 'ada_norm_zero', 'ada_norm_single', 'ada_norm_continuous', 'layer_norm_i2vgen' + norm_eps: float = 1e-5, + final_dropout: bool = False, + attention_type: str = "default", + positional_embeddings: str | None = None, + num_positional_embeddings: int | None = None, + ada_norm_continous_conditioning_embedding_dim: int | None = None, + ada_norm_bias: int | None = None, + ff_inner_dim: int | None = None, + ff_bias: bool = True, + attention_out_bias: bool = True, + ): + super().__init__() + self.dim = dim + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + self.dropout = dropout + self.cross_attention_dim = cross_attention_dim + self.activation_fn = activation_fn + self.attention_bias = attention_bias + self.double_self_attention = double_self_attention + self.norm_elementwise_affine = norm_elementwise_affine + self.positional_embeddings = positional_embeddings + self.num_positional_embeddings = num_positional_embeddings + self.only_cross_attention = only_cross_attention + + # We keep these boolean flags for backward-compatibility. + self.use_ada_layer_norm_zero = (num_embeds_ada_norm is not None) and norm_type == "ada_norm_zero" + self.use_ada_layer_norm = (num_embeds_ada_norm is not None) and norm_type == "ada_norm" + self.use_ada_layer_norm_single = norm_type == "ada_norm_single" + self.use_layer_norm = norm_type == "layer_norm" + self.use_ada_layer_norm_continuous = norm_type == "ada_norm_continuous" + + if norm_type in ("ada_norm", "ada_norm_zero") and num_embeds_ada_norm is None: + raise ValueError( + f"`norm_type` is set to {norm_type}, but `num_embeds_ada_norm` is not defined. Please make sure to" + f" define `num_embeds_ada_norm` if setting `norm_type` to {norm_type}." + ) + + self.norm_type = norm_type + self.num_embeds_ada_norm = num_embeds_ada_norm + + if positional_embeddings and (num_positional_embeddings is None): + raise ValueError( + "If `positional_embedding` type is defined, `num_positition_embeddings` must also be defined." + ) + + if positional_embeddings == "sinusoidal": + self.pos_embed = SinusoidalPositionalEmbedding(dim, max_seq_length=num_positional_embeddings) + else: + self.pos_embed = None + + # Define 3 blocks. Each block has its own normalization layer. + # 1. Self-Attn + if norm_type == "ada_norm": + self.norm1 = AdaLayerNorm(dim, num_embeds_ada_norm) + elif norm_type == "ada_norm_zero": + self.norm1 = AdaLayerNormZero(dim, num_embeds_ada_norm) + elif norm_type == "ada_norm_continuous": + self.norm1 = AdaLayerNormContinuous( + dim, + ada_norm_continous_conditioning_embedding_dim, + norm_elementwise_affine, + norm_eps, + ada_norm_bias, + "rms_norm", + ) + else: + self.norm1 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + cross_attention_dim=cross_attention_dim if only_cross_attention else None, + upcast_attention=upcast_attention, + out_bias=attention_out_bias, + ) + + # 2. Cross-Attn + if cross_attention_dim is not None or double_self_attention: + # We currently only use AdaLayerNormZero for self attention where there will only be one attention block. + # I.e. the number of returned modulation chunks from AdaLayerZero would not make sense if returned during + # the second cross attention block. + if norm_type == "ada_norm": + self.norm2 = AdaLayerNorm(dim, num_embeds_ada_norm) + elif norm_type == "ada_norm_continuous": + self.norm2 = AdaLayerNormContinuous( + dim, + ada_norm_continous_conditioning_embedding_dim, + norm_elementwise_affine, + norm_eps, + ada_norm_bias, + "rms_norm", + ) + else: + self.norm2 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine) + + self.attn2 = Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim if not double_self_attention else None, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + upcast_attention=upcast_attention, + out_bias=attention_out_bias, + ) # is self-attn if encoder_hidden_states is none + else: + if norm_type == "ada_norm_single": # For Latte + self.norm2 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine) + else: + self.norm2 = None + self.attn2 = None + + # 3. Feed-forward + if norm_type == "ada_norm_continuous": + self.norm3 = AdaLayerNormContinuous( + dim, + ada_norm_continous_conditioning_embedding_dim, + norm_elementwise_affine, + norm_eps, + ada_norm_bias, + "layer_norm", + ) + + elif norm_type in ["ada_norm_zero", "ada_norm", "layer_norm"]: + self.norm3 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine) + elif norm_type == "layer_norm_i2vgen": + self.norm3 = None + + self.ff = FeedForward( + dim, + dropout=dropout, + activation_fn=activation_fn, + final_dropout=final_dropout, + inner_dim=ff_inner_dim, + bias=ff_bias, + ) + + # 4. Fuser + if attention_type == "gated" or attention_type == "gated-text-image": + self.fuser = GatedSelfAttentionDense(dim, cross_attention_dim, num_attention_heads, attention_head_dim) + + # 5. Scale-shift for PixArt-Alpha. + if norm_type == "ada_norm_single": + self.scale_shift_table = nn.Parameter(torch.randn(6, dim) / dim**0.5) + + # let chunk size default to None + self._chunk_size = None + self._chunk_dim = 0 + + def set_chunk_feed_forward(self, chunk_size: int | None, dim: int = 0): + # Sets chunk feed-forward + self._chunk_size = chunk_size + self._chunk_dim = dim + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + timestep: torch.LongTensor | None = None, + cross_attention_kwargs: dict[str, Any] = None, + class_labels: torch.LongTensor | None = None, + added_cond_kwargs: dict[str, torch.Tensor] | None = None, + ) -> torch.Tensor: + if cross_attention_kwargs is not None: + if cross_attention_kwargs.get("scale", None) is not None: + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") + + # Notice that normalization is always applied before the real computation in the following blocks. + # 0. Self-Attention + batch_size = hidden_states.shape[0] + + if self.norm_type == "ada_norm": + norm_hidden_states = self.norm1(hidden_states, timestep) + elif self.norm_type == "ada_norm_zero": + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1( + hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype + ) + elif self.norm_type in ["layer_norm", "layer_norm_i2vgen"]: + norm_hidden_states = self.norm1(hidden_states) + elif self.norm_type == "ada_norm_continuous": + norm_hidden_states = self.norm1(hidden_states, added_cond_kwargs["pooled_text_emb"]) + elif self.norm_type == "ada_norm_single": + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = ( + self.scale_shift_table[None] + timestep.reshape(batch_size, 6, -1) + ).chunk(6, dim=1) + norm_hidden_states = self.norm1(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa + else: + raise ValueError("Incorrect norm used") + + if self.pos_embed is not None: + norm_hidden_states = self.pos_embed(norm_hidden_states) + + # 1. Prepare GLIGEN inputs + cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {} + gligen_kwargs = cross_attention_kwargs.pop("gligen", None) + + attn_output = self.attn1( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None, + attention_mask=attention_mask, + **cross_attention_kwargs, + ) + + if self.norm_type == "ada_norm_zero": + attn_output = gate_msa.unsqueeze(1) * attn_output + elif self.norm_type == "ada_norm_single": + attn_output = gate_msa * attn_output + + hidden_states = attn_output + hidden_states + if hidden_states.ndim == 4: + hidden_states = hidden_states.squeeze(1) + + # 1.2 GLIGEN Control + if gligen_kwargs is not None: + hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"]) + + # 3. Cross-Attention + if self.attn2 is not None: + if self.norm_type == "ada_norm": + norm_hidden_states = self.norm2(hidden_states, timestep) + elif self.norm_type in ["ada_norm_zero", "layer_norm", "layer_norm_i2vgen"]: + norm_hidden_states = self.norm2(hidden_states) + elif self.norm_type == "ada_norm_single": + # For PixArt norm2 isn't applied here: + # https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L70C1-L76C103 + norm_hidden_states = hidden_states + elif self.norm_type == "ada_norm_continuous": + norm_hidden_states = self.norm2(hidden_states, added_cond_kwargs["pooled_text_emb"]) + else: + raise ValueError("Incorrect norm") + + if self.pos_embed is not None and self.norm_type != "ada_norm_single": + norm_hidden_states = self.pos_embed(norm_hidden_states) + + attn_output = self.attn2( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=encoder_attention_mask, + **cross_attention_kwargs, + ) + hidden_states = attn_output + hidden_states + + # 4. Feed-forward + # i2vgen doesn't have this norm 🤷‍♂️ + if self.norm_type == "ada_norm_continuous": + norm_hidden_states = self.norm3(hidden_states, added_cond_kwargs["pooled_text_emb"]) + elif not self.norm_type == "ada_norm_single": + norm_hidden_states = self.norm3(hidden_states) + + if self.norm_type == "ada_norm_zero": + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + + if self.norm_type == "ada_norm_single": + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp + + if self._chunk_size is not None: + # "feed_forward_chunk_size" can be used to save memory + ff_output = _chunked_feed_forward(self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size) + else: + ff_output = self.ff(norm_hidden_states) + + if self.norm_type == "ada_norm_zero": + ff_output = gate_mlp.unsqueeze(1) * ff_output + elif self.norm_type == "ada_norm_single": + ff_output = gate_mlp * ff_output + + hidden_states = ff_output + hidden_states + if hidden_states.ndim == 4: + hidden_states = hidden_states.squeeze(1) + + return hidden_states + + +class LuminaFeedForward(nn.Module): + r""" + A feed-forward layer. + + Parameters: + hidden_size (`int`): + The dimensionality of the hidden layers in the model. This parameter determines the width of the model's + hidden representations. + intermediate_size (`int`): The intermediate dimension of the feedforward layer. + multiple_of (`int`, *optional*): Value to ensure hidden dimension is a multiple + of this value. + ffn_dim_multiplier (float, *optional*): Custom multiplier for hidden + dimension. Defaults to None. + """ + + def __init__( + self, + dim: int, + inner_dim: int, + multiple_of: int | None = 256, + ffn_dim_multiplier: float | None = None, + ): + super().__init__() + # custom hidden_size factor multiplier + if ffn_dim_multiplier is not None: + inner_dim = int(ffn_dim_multiplier * inner_dim) + inner_dim = multiple_of * ((inner_dim + multiple_of - 1) // multiple_of) + + self.linear_1 = nn.Linear( + dim, + inner_dim, + bias=False, + ) + self.linear_2 = nn.Linear( + inner_dim, + dim, + bias=False, + ) + self.linear_3 = nn.Linear( + dim, + inner_dim, + bias=False, + ) + self.silu = FP32SiLU() + + def forward(self, x): + return self.linear_2(self.silu(self.linear_1(x)) * self.linear_3(x)) + + +@maybe_allow_in_graph +class TemporalBasicTransformerBlock(nn.Module): + r""" + A basic Transformer block for video like data. + + Parameters: + dim (`int`): The number of channels in the input and output. + time_mix_inner_dim (`int`): The number of channels for temporal attention. + num_attention_heads (`int`): The number of heads to use for multi-head attention. + attention_head_dim (`int`): The number of channels in each head. + cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention. + """ + + def __init__( + self, + dim: int, + time_mix_inner_dim: int, + num_attention_heads: int, + attention_head_dim: int, + cross_attention_dim: int | None = None, + ): + super().__init__() + self.is_res = dim == time_mix_inner_dim + + self.norm_in = nn.LayerNorm(dim) + + # Define 3 blocks. Each block has its own normalization layer. + # 1. Self-Attn + self.ff_in = FeedForward( + dim, + dim_out=time_mix_inner_dim, + activation_fn="geglu", + ) + + self.norm1 = nn.LayerNorm(time_mix_inner_dim) + self.attn1 = Attention( + query_dim=time_mix_inner_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + cross_attention_dim=None, + ) + + # 2. Cross-Attn + if cross_attention_dim is not None: + # We currently only use AdaLayerNormZero for self attention where there will only be one attention block. + # I.e. the number of returned modulation chunks from AdaLayerZero would not make sense if returned during + # the second cross attention block. + self.norm2 = nn.LayerNorm(time_mix_inner_dim) + self.attn2 = Attention( + query_dim=time_mix_inner_dim, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + ) # is self-attn if encoder_hidden_states is none + else: + self.norm2 = None + self.attn2 = None + + # 3. Feed-forward + self.norm3 = nn.LayerNorm(time_mix_inner_dim) + self.ff = FeedForward(time_mix_inner_dim, activation_fn="geglu") + + # let chunk size default to None + self._chunk_size = None + self._chunk_dim = None + + def set_chunk_feed_forward(self, chunk_size: int | None, **kwargs): + # Sets chunk feed-forward + self._chunk_size = chunk_size + # chunk dim should be hardcoded to 1 to have better speed vs. memory trade-off + self._chunk_dim = 1 + + def forward( + self, + hidden_states: torch.Tensor, + num_frames: int, + encoder_hidden_states: torch.Tensor | None = None, + ) -> torch.Tensor: + # Notice that normalization is always applied before the real computation in the following blocks. + # 0. Self-Attention + batch_size = hidden_states.shape[0] + + batch_frames, seq_length, channels = hidden_states.shape + batch_size = batch_frames // num_frames + + hidden_states = hidden_states[None, :].reshape(batch_size, num_frames, seq_length, channels) + hidden_states = hidden_states.permute(0, 2, 1, 3) + hidden_states = hidden_states.reshape(batch_size * seq_length, num_frames, channels) + + residual = hidden_states + hidden_states = self.norm_in(hidden_states) + + if self._chunk_size is not None: + hidden_states = _chunked_feed_forward(self.ff_in, hidden_states, self._chunk_dim, self._chunk_size) + else: + hidden_states = self.ff_in(hidden_states) + + if self.is_res: + hidden_states = hidden_states + residual + + norm_hidden_states = self.norm1(hidden_states) + attn_output = self.attn1(norm_hidden_states, encoder_hidden_states=None) + hidden_states = attn_output + hidden_states + + # 3. Cross-Attention + if self.attn2 is not None: + norm_hidden_states = self.norm2(hidden_states) + attn_output = self.attn2(norm_hidden_states, encoder_hidden_states=encoder_hidden_states) + hidden_states = attn_output + hidden_states + + # 4. Feed-forward + norm_hidden_states = self.norm3(hidden_states) + + if self._chunk_size is not None: + ff_output = _chunked_feed_forward(self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size) + else: + ff_output = self.ff(norm_hidden_states) + + if self.is_res: + hidden_states = ff_output + hidden_states + else: + hidden_states = ff_output + + hidden_states = hidden_states[None, :].reshape(batch_size, seq_length, num_frames, channels) + hidden_states = hidden_states.permute(0, 2, 1, 3) + hidden_states = hidden_states.reshape(batch_size * num_frames, seq_length, channels) + + return hidden_states + + +class SkipFFTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + kv_input_dim: int, + kv_input_dim_proj_use_bias: bool, + dropout=0.0, + cross_attention_dim: int | None = None, + attention_bias: bool = False, + attention_out_bias: bool = True, + ): + super().__init__() + if kv_input_dim != dim: + self.kv_mapper = nn.Linear(kv_input_dim, dim, kv_input_dim_proj_use_bias) + else: + self.kv_mapper = None + + self.norm1 = RMSNorm(dim, 1e-06) + + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + cross_attention_dim=cross_attention_dim, + out_bias=attention_out_bias, + ) + + self.norm2 = RMSNorm(dim, 1e-06) + + self.attn2 = Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + out_bias=attention_out_bias, + ) + + def forward(self, hidden_states, encoder_hidden_states, cross_attention_kwargs): + cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {} + + if self.kv_mapper is not None: + encoder_hidden_states = self.kv_mapper(F.silu(encoder_hidden_states)) + + norm_hidden_states = self.norm1(hidden_states) + + attn_output = self.attn1( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states, + **cross_attention_kwargs, + ) + + hidden_states = attn_output + hidden_states + + norm_hidden_states = self.norm2(hidden_states) + + attn_output = self.attn2( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states, + **cross_attention_kwargs, + ) + + hidden_states = attn_output + hidden_states + + return hidden_states + + +@maybe_allow_in_graph +class FreeNoiseTransformerBlock(nn.Module): + r""" + A FreeNoise Transformer block. + + Parameters: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`): + The number of channels in each head. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability to use. + cross_attention_dim (`int`, *optional*): + The size of the encoder_hidden_states vector for cross attention. + activation_fn (`str`, *optional*, defaults to `"geglu"`): + Activation function to be used in feed-forward. + num_embeds_ada_norm (`int`, *optional*): + The number of diffusion steps used during training. See `Transformer2DModel`. + attention_bias (`bool`, defaults to `False`): + Configure if the attentions should contain a bias parameter. + only_cross_attention (`bool`, defaults to `False`): + Whether to use only cross-attention layers. In this case two cross attention layers are used. + double_self_attention (`bool`, defaults to `False`): + Whether to use two self-attention layers. In this case no cross attention layers are used. + upcast_attention (`bool`, defaults to `False`): + Whether to upcast the attention computation to float32. This is useful for mixed precision training. + norm_elementwise_affine (`bool`, defaults to `True`): + Whether to use learnable elementwise affine parameters for normalization. + norm_type (`str`, defaults to `"layer_norm"`): + The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`. + final_dropout (`bool` defaults to `False`): + Whether to apply a final dropout after the last feed-forward layer. + attention_type (`str`, defaults to `"default"`): + The type of attention to use. Can be `"default"` or `"gated"` or `"gated-text-image"`. + positional_embeddings (`str`, *optional*): + The type of positional embeddings to apply to. + num_positional_embeddings (`int`, *optional*, defaults to `None`): + The maximum number of positional embeddings to apply. + ff_inner_dim (`int`, *optional*): + Hidden dimension of feed-forward MLP. + ff_bias (`bool`, defaults to `True`): + Whether or not to use bias in feed-forward MLP. + attention_out_bias (`bool`, defaults to `True`): + Whether or not to use bias in attention output project layer. + context_length (`int`, defaults to `16`): + The maximum number of frames that the FreeNoise block processes at once. + context_stride (`int`, defaults to `4`): + The number of frames to be skipped before starting to process a new batch of `context_length` frames. + weighting_scheme (`str`, defaults to `"pyramid"`): + The weighting scheme to use for weighting averaging of processed latent frames. As described in the + Equation 9. of the [FreeNoise](https://huggingface.co/papers/2310.15169) paper, "pyramid" is the default + setting used. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + dropout: float = 0.0, + cross_attention_dim: int | None = None, + activation_fn: str = "geglu", + num_embeds_ada_norm: int | None = None, + attention_bias: bool = False, + only_cross_attention: bool = False, + double_self_attention: bool = False, + upcast_attention: bool = False, + norm_elementwise_affine: bool = True, + norm_type: str = "layer_norm", + norm_eps: float = 1e-5, + final_dropout: bool = False, + positional_embeddings: str | None = None, + num_positional_embeddings: int | None = None, + ff_inner_dim: int | None = None, + ff_bias: bool = True, + attention_out_bias: bool = True, + context_length: int = 16, + context_stride: int = 4, + weighting_scheme: str = "pyramid", + ): + super().__init__() + self.dim = dim + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + self.dropout = dropout + self.cross_attention_dim = cross_attention_dim + self.activation_fn = activation_fn + self.attention_bias = attention_bias + self.double_self_attention = double_self_attention + self.norm_elementwise_affine = norm_elementwise_affine + self.positional_embeddings = positional_embeddings + self.num_positional_embeddings = num_positional_embeddings + self.only_cross_attention = only_cross_attention + + self.set_free_noise_properties(context_length, context_stride, weighting_scheme) + + # We keep these boolean flags for backward-compatibility. + self.use_ada_layer_norm_zero = (num_embeds_ada_norm is not None) and norm_type == "ada_norm_zero" + self.use_ada_layer_norm = (num_embeds_ada_norm is not None) and norm_type == "ada_norm" + self.use_ada_layer_norm_single = norm_type == "ada_norm_single" + self.use_layer_norm = norm_type == "layer_norm" + self.use_ada_layer_norm_continuous = norm_type == "ada_norm_continuous" + + if norm_type in ("ada_norm", "ada_norm_zero") and num_embeds_ada_norm is None: + raise ValueError( + f"`norm_type` is set to {norm_type}, but `num_embeds_ada_norm` is not defined. Please make sure to" + f" define `num_embeds_ada_norm` if setting `norm_type` to {norm_type}." + ) + + self.norm_type = norm_type + self.num_embeds_ada_norm = num_embeds_ada_norm + + if positional_embeddings and (num_positional_embeddings is None): + raise ValueError( + "If `positional_embedding` type is defined, `num_positition_embeddings` must also be defined." + ) + + if positional_embeddings == "sinusoidal": + self.pos_embed = SinusoidalPositionalEmbedding(dim, max_seq_length=num_positional_embeddings) + else: + self.pos_embed = None + + # Define 3 blocks. Each block has its own normalization layer. + # 1. Self-Attn + self.norm1 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + cross_attention_dim=cross_attention_dim if only_cross_attention else None, + upcast_attention=upcast_attention, + out_bias=attention_out_bias, + ) + + # 2. Cross-Attn + if cross_attention_dim is not None or double_self_attention: + self.norm2 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine) + + self.attn2 = Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim if not double_self_attention else None, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + upcast_attention=upcast_attention, + out_bias=attention_out_bias, + ) # is self-attn if encoder_hidden_states is none + + # 3. Feed-forward + self.ff = FeedForward( + dim, + dropout=dropout, + activation_fn=activation_fn, + final_dropout=final_dropout, + inner_dim=ff_inner_dim, + bias=ff_bias, + ) + + self.norm3 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine) + + # let chunk size default to None + self._chunk_size = None + self._chunk_dim = 0 + + def _get_frame_indices(self, num_frames: int) -> list[tuple[int, int]]: + frame_indices = [] + for i in range(0, num_frames - self.context_length + 1, self.context_stride): + window_start = i + window_end = min(num_frames, i + self.context_length) + frame_indices.append((window_start, window_end)) + return frame_indices + + def _get_frame_weights(self, num_frames: int, weighting_scheme: str = "pyramid") -> list[float]: + if weighting_scheme == "flat": + weights = [1.0] * num_frames + + elif weighting_scheme == "pyramid": + if num_frames % 2 == 0: + # num_frames = 4 => [1, 2, 2, 1] + mid = num_frames // 2 + weights = list(range(1, mid + 1)) + weights = weights + weights[::-1] + else: + # num_frames = 5 => [1, 2, 3, 2, 1] + mid = (num_frames + 1) // 2 + weights = list(range(1, mid)) + weights = weights + [mid] + weights[::-1] + + elif weighting_scheme == "delayed_reverse_sawtooth": + if num_frames % 2 == 0: + # num_frames = 4 => [0.01, 2, 2, 1] + mid = num_frames // 2 + weights = [0.01] * (mid - 1) + [mid] + weights = weights + list(range(mid, 0, -1)) + else: + # num_frames = 5 => [0.01, 0.01, 3, 2, 1] + mid = (num_frames + 1) // 2 + weights = [0.01] * mid + weights = weights + list(range(mid, 0, -1)) + else: + raise ValueError(f"Unsupported value for weighting_scheme={weighting_scheme}") + + return weights + + def set_free_noise_properties( + self, context_length: int, context_stride: int, weighting_scheme: str = "pyramid" + ) -> None: + self.context_length = context_length + self.context_stride = context_stride + self.weighting_scheme = weighting_scheme + + def set_chunk_feed_forward(self, chunk_size: int | None, dim: int = 0) -> None: + # Sets chunk feed-forward + self._chunk_size = chunk_size + self._chunk_dim = dim + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + cross_attention_kwargs: dict[str, Any] = None, + *args, + **kwargs, + ) -> torch.Tensor: + if cross_attention_kwargs is not None: + if cross_attention_kwargs.get("scale", None) is not None: + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") + + cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {} + + # hidden_states: [B x H x W, F, C] + device = hidden_states.device + dtype = hidden_states.dtype + + num_frames = hidden_states.size(1) + frame_indices = self._get_frame_indices(num_frames) + frame_weights = self._get_frame_weights(self.context_length, self.weighting_scheme) + frame_weights = torch.tensor(frame_weights, device=device, dtype=dtype).unsqueeze(0).unsqueeze(-1) + is_last_frame_batch_complete = frame_indices[-1][1] == num_frames + + # Handle out-of-bounds case if num_frames isn't perfectly divisible by context_length + # For example, num_frames=25, context_length=16, context_stride=4, then we expect the ranges: + # [(0, 16), (4, 20), (8, 24), (10, 26)] + if not is_last_frame_batch_complete: + if num_frames < self.context_length: + raise ValueError(f"Expected {num_frames=} to be greater or equal than {self.context_length=}") + last_frame_batch_length = num_frames - frame_indices[-1][1] + frame_indices.append((num_frames - self.context_length, num_frames)) + + num_times_accumulated = torch.zeros((1, num_frames, 1), device=device) + accumulated_values = torch.zeros_like(hidden_states) + + for i, (frame_start, frame_end) in enumerate(frame_indices): + # The reason for slicing here is to ensure that if (frame_end - frame_start) is to handle + # cases like frame_indices=[(0, 16), (16, 20)], if the user provided a video with 19 frames, or + # essentially a non-multiple of `context_length`. + weights = torch.ones_like(num_times_accumulated[:, frame_start:frame_end]) + weights *= frame_weights + + hidden_states_chunk = hidden_states[:, frame_start:frame_end] + + # Notice that normalization is always applied before the real computation in the following blocks. + # 1. Self-Attention + norm_hidden_states = self.norm1(hidden_states_chunk) + + if self.pos_embed is not None: + norm_hidden_states = self.pos_embed(norm_hidden_states) + + attn_output = self.attn1( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None, + attention_mask=attention_mask, + **cross_attention_kwargs, + ) + + hidden_states_chunk = attn_output + hidden_states_chunk + if hidden_states_chunk.ndim == 4: + hidden_states_chunk = hidden_states_chunk.squeeze(1) + + # 2. Cross-Attention + if self.attn2 is not None: + norm_hidden_states = self.norm2(hidden_states_chunk) + + if self.pos_embed is not None and self.norm_type != "ada_norm_single": + norm_hidden_states = self.pos_embed(norm_hidden_states) + + attn_output = self.attn2( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=encoder_attention_mask, + **cross_attention_kwargs, + ) + hidden_states_chunk = attn_output + hidden_states_chunk + + if i == len(frame_indices) - 1 and not is_last_frame_batch_complete: + accumulated_values[:, -last_frame_batch_length:] += ( + hidden_states_chunk[:, -last_frame_batch_length:] * weights[:, -last_frame_batch_length:] + ) + num_times_accumulated[:, -last_frame_batch_length:] += weights[:, -last_frame_batch_length] + else: + accumulated_values[:, frame_start:frame_end] += hidden_states_chunk * weights + num_times_accumulated[:, frame_start:frame_end] += weights + + # TODO(aryan): Maybe this could be done in a better way. + # + # Previously, this was: + # hidden_states = torch.where( + # num_times_accumulated > 0, accumulated_values / num_times_accumulated, accumulated_values + # ) + # + # The reasoning for the change here is `torch.where` became a bottleneck at some point when golfing memory + # spikes. It is particularly noticeable when the number of frames is high. My understanding is that this comes + # from tensors being copied - which is why we resort to spliting and concatenating here. I've not particularly + # looked into this deeply because other memory optimizations led to more pronounced reductions. + hidden_states = torch.cat( + [ + torch.where(num_times_split > 0, accumulated_split / num_times_split, accumulated_split) + for accumulated_split, num_times_split in zip( + accumulated_values.split(self.context_length, dim=1), + num_times_accumulated.split(self.context_length, dim=1), + ) + ], + dim=1, + ).to(dtype) + + # 3. Feed-forward + norm_hidden_states = self.norm3(hidden_states) + + if self._chunk_size is not None: + ff_output = _chunked_feed_forward(self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size) + else: + ff_output = self.ff(norm_hidden_states) + + hidden_states = ff_output + hidden_states + if hidden_states.ndim == 4: + hidden_states = hidden_states.squeeze(1) + + return hidden_states + + +class FeedForward(nn.Module): + r""" + A feed-forward layer. + + Parameters: + dim (`int`): The number of channels in the input. + dim_out (`int`, *optional*): The number of channels in the output. If not given, defaults to `dim`. + mult (`int`, *optional*, defaults to 4): The multiplier to use for the hidden dimension. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward. + final_dropout (`bool` *optional*, defaults to False): Apply a final dropout. + bias (`bool`, defaults to True): Whether to use a bias in the linear layer. + """ + + def __init__( + self, + dim: int, + dim_out: int | None = None, + mult: int = 4, + dropout: float = 0.0, + activation_fn: str = "geglu", + final_dropout: bool = False, + inner_dim=None, + bias: bool = True, + ): + super().__init__() + if inner_dim is None: + inner_dim = int(dim * mult) + dim_out = dim_out if dim_out is not None else dim + + if activation_fn == "gelu": + act_fn = GELU(dim, inner_dim, bias=bias) + if activation_fn == "gelu-approximate": + act_fn = GELU(dim, inner_dim, approximate="tanh", bias=bias) + elif activation_fn == "geglu": + act_fn = GEGLU(dim, inner_dim, bias=bias) + elif activation_fn == "geglu-approximate": + act_fn = ApproximateGELU(dim, inner_dim, bias=bias) + elif activation_fn == "swiglu": + act_fn = SwiGLU(dim, inner_dim, bias=bias) + elif activation_fn == "linear-silu": + act_fn = LinearActivation(dim, inner_dim, bias=bias, activation="silu") + + self.net = nn.ModuleList([]) + # project in + self.net.append(act_fn) + # project dropout + self.net.append(nn.Dropout(dropout)) + # project out + self.net.append(nn.Linear(inner_dim, dim_out, bias=bias)) + # FF as used in Vision Transformer, MLP-Mixer, etc. have a final dropout + if final_dropout: + self.net.append(nn.Dropout(dropout)) + + def forward(self, hidden_states: torch.Tensor, *args, **kwargs) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + for module in self.net: + hidden_states = module(hidden_states) + return hidden_states diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_dispatch.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_dispatch.py new file mode 100644 index 0000000000000000000000000000000000000000..5b1f831ed060629bc92de5738a2da72e2ff486ab --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_dispatch.py @@ -0,0 +1,3540 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import contextlib +import functools +import inspect +import math +from dataclasses import dataclass +from enum import Enum +from typing import TYPE_CHECKING, Any, Callable + +import torch +import torch.distributed as dist +import torch.nn.functional as F + + +if torch.distributed.is_available(): + import torch.distributed._functional_collectives as funcol + +from ..utils import ( + get_logger, + is_aiter_available, + is_aiter_version, + is_flash_attn_3_available, + is_flash_attn_available, + is_flash_attn_version, + is_kernels_available, + is_kernels_version, + is_sageattention_available, + is_sageattention_version, + is_torch_npu_available, + is_torch_version, + is_torch_xla_available, + is_torch_xla_version, + is_xformers_available, + is_xformers_version, +) +from ..utils.constants import DIFFUSERS_ATTN_BACKEND, DIFFUSERS_ATTN_CHECKS +from ..utils.torch_utils import maybe_allow_in_graph +from ._modeling_parallel import gather_size_by_comm + + +if TYPE_CHECKING: + from ._modeling_parallel import ParallelConfig + +_REQUIRED_FLASH_VERSION = "2.6.3" +_REQUIRED_AITER_VERSION = "0.1.5" +_REQUIRED_SAGE_VERSION = "2.1.1" +_REQUIRED_FLEX_VERSION = "2.5.0" +_REQUIRED_XLA_VERSION = "2.2" +_REQUIRED_XFORMERS_VERSION = "0.0.29" + +logger = get_logger(__name__) # pylint: disable=invalid-name + +_CAN_USE_FLASH_ATTN = is_flash_attn_available() and is_flash_attn_version(">=", _REQUIRED_FLASH_VERSION) +_CAN_USE_FLASH_ATTN_3 = is_flash_attn_3_available() +_CAN_USE_AITER_ATTN = is_aiter_available() and is_aiter_version(">=", _REQUIRED_AITER_VERSION) +_CAN_USE_SAGE_ATTN = is_sageattention_available() and is_sageattention_version(">=", _REQUIRED_SAGE_VERSION) +_CAN_USE_FLEX_ATTN = is_torch_version(">=", _REQUIRED_FLEX_VERSION) +_CAN_USE_NPU_ATTN = is_torch_npu_available() +_CAN_USE_XLA_ATTN = is_torch_xla_available() and is_torch_xla_version(">=", _REQUIRED_XLA_VERSION) +_CAN_USE_XFORMERS_ATTN = is_xformers_available() and is_xformers_version(">=", _REQUIRED_XFORMERS_VERSION) + + +if _CAN_USE_FLASH_ATTN: + try: + from flash_attn import flash_attn_func, flash_attn_varlen_func + from flash_attn.flash_attn_interface import _wrapped_flash_attn_backward, _wrapped_flash_attn_forward + except (ImportError, OSError, RuntimeError) as e: + # Handle ABI mismatch or other import failures gracefully. + # This can happen when flash_attn was compiled against a different PyTorch version. + logger.warning(f"flash_attn is installed but failed to import: {e}. Falling back to native PyTorch attention.") + _CAN_USE_FLASH_ATTN = False + flash_attn_func = None + flash_attn_varlen_func = None + _wrapped_flash_attn_backward = None + _wrapped_flash_attn_forward = None +else: + flash_attn_func = None + flash_attn_varlen_func = None + _wrapped_flash_attn_backward = None + _wrapped_flash_attn_forward = None + + +if _CAN_USE_FLASH_ATTN_3: + try: + from flash_attn_interface import flash_attn_func as flash_attn_3_func + from flash_attn_interface import flash_attn_varlen_func as flash_attn_3_varlen_func + except (ImportError, OSError, RuntimeError) as e: + logger.warning(f"flash_attn_3 failed to import: {e}. Falling back to native attention.") + _CAN_USE_FLASH_ATTN_3 = False + flash_attn_3_func = None + flash_attn_3_varlen_func = None +else: + flash_attn_3_func = None + flash_attn_3_varlen_func = None + +if _CAN_USE_AITER_ATTN: + try: + from aiter import flash_attn_func as aiter_flash_attn_func + except (ImportError, OSError, RuntimeError) as e: + logger.warning(f"aiter failed to import: {e}. Falling back to native attention.") + _CAN_USE_AITER_ATTN = False + aiter_flash_attn_func = None +else: + aiter_flash_attn_func = None + +if _CAN_USE_SAGE_ATTN: + try: + from sageattention import ( + sageattn, + sageattn_qk_int8_pv_fp8_cuda, + sageattn_qk_int8_pv_fp8_cuda_sm90, + sageattn_qk_int8_pv_fp16_cuda, + sageattn_qk_int8_pv_fp16_triton, + sageattn_varlen, + ) + except (ImportError, OSError, RuntimeError) as e: + logger.warning(f"sageattention failed to import: {e}. Falling back to native attention.") + _CAN_USE_SAGE_ATTN = False + sageattn = None + sageattn_qk_int8_pv_fp8_cuda = None + sageattn_qk_int8_pv_fp8_cuda_sm90 = None + sageattn_qk_int8_pv_fp16_cuda = None + sageattn_qk_int8_pv_fp16_triton = None + sageattn_varlen = None +else: + sageattn = None + sageattn_qk_int8_pv_fp16_cuda = None + sageattn_qk_int8_pv_fp16_triton = None + sageattn_qk_int8_pv_fp8_cuda = None + sageattn_qk_int8_pv_fp8_cuda_sm90 = None + sageattn_varlen = None + + +if _CAN_USE_FLEX_ATTN: + try: + # We cannot import the flex_attention function from the package directly because it is expected (from the + # pytorch documentation) that the user may compile it. If we import directly, we will not have access to the + # compiled function. + import torch.nn.attention.flex_attention as flex_attention + except (ImportError, OSError, RuntimeError) as e: + logger.warning(f"flex_attention failed to import: {e}. Falling back to native attention.") + _CAN_USE_FLEX_ATTN = False + flex_attention = None +else: + flex_attention = None + + +if _CAN_USE_NPU_ATTN: + try: + from torch_npu import npu_fusion_attention + except (ImportError, OSError, RuntimeError) as e: + logger.warning(f"torch_npu failed to import: {e}. Falling back to native attention.") + _CAN_USE_NPU_ATTN = False + npu_fusion_attention = None +else: + npu_fusion_attention = None + + +if _CAN_USE_XLA_ATTN: + try: + from torch_xla.experimental.custom_kernel import flash_attention as xla_flash_attention + except (ImportError, OSError, RuntimeError) as e: + logger.warning(f"torch_xla failed to import: {e}. Falling back to native attention.") + _CAN_USE_XLA_ATTN = False + xla_flash_attention = None +else: + xla_flash_attention = None + + +if _CAN_USE_XFORMERS_ATTN: + try: + import xformers.ops as xops + except (ImportError, OSError, RuntimeError) as e: + logger.warning(f"xformers failed to import: {e}. Falling back to native attention.") + _CAN_USE_XFORMERS_ATTN = False + xops = None +else: + xops = None + +# Version guard for PyTorch compatibility - custom_op was added in PyTorch 2.4 +if torch.__version__ >= "2.4.0": + _custom_op = torch.library.custom_op + _register_fake = torch.library.register_fake +else: + + def custom_op_no_op(name, fn=None, /, *, mutates_args, device_types=None, schema=None): + def wrap(func): + return func + + return wrap if fn is None else fn + + def register_fake_no_op(op, fn=None, /, *, lib=None, _stacklevel=1): + def wrap(func): + return func + + return wrap if fn is None else fn + + _custom_op = custom_op_no_op + _register_fake = register_fake_no_op + + +# TODO(aryan): Add support for the following: +# - Sage Attention++ +# - block sparse, radial and other attention methods +# - CP with sage attention, flex, xformers, other missing backends +# - Add support for normal and CP training with backends that don't support it yet + + +class AttentionBackendName(str, Enum): + # EAGER = "eager" + + # `flash-attn` + FLASH = "flash" + FLASH_HUB = "flash_hub" + FLASH_VARLEN = "flash_varlen" + FLASH_VARLEN_HUB = "flash_varlen_hub" + _FLASH_3 = "_flash_3" + _FLASH_VARLEN_3 = "_flash_varlen_3" + _FLASH_3_HUB = "_flash_3_hub" + _FLASH_3_VARLEN_HUB = "_flash_3_varlen_hub" + + # `aiter` + AITER = "aiter" + + # PyTorch native + FLEX = "flex" + NATIVE = "native" + _NATIVE_CUDNN = "_native_cudnn" + _NATIVE_EFFICIENT = "_native_efficient" + _NATIVE_FLASH = "_native_flash" + _NATIVE_MATH = "_native_math" + _NATIVE_NPU = "_native_npu" + _NATIVE_XLA = "_native_xla" + + # `sageattention` + SAGE = "sage" + SAGE_HUB = "sage_hub" + SAGE_VARLEN = "sage_varlen" + _SAGE_QK_INT8_PV_FP8_CUDA = "_sage_qk_int8_pv_fp8_cuda" + _SAGE_QK_INT8_PV_FP8_CUDA_SM90 = "_sage_qk_int8_pv_fp8_cuda_sm90" + _SAGE_QK_INT8_PV_FP16_CUDA = "_sage_qk_int8_pv_fp16_cuda" + _SAGE_QK_INT8_PV_FP16_TRITON = "_sage_qk_int8_pv_fp16_triton" + # TODO: let's not add support for Sparge Attention now because it requires tuning per model + # We can look into supporting something "autotune"-ing in the future + # SPARGE = "sparge" + + # `xformers` + XFORMERS = "xformers" + + +class _AttentionBackendRegistry: + _backends = {} + _constraints = {} + _supported_arg_names = {} + _supports_context_parallel = set() + _active_backend = AttentionBackendName(DIFFUSERS_ATTN_BACKEND) + _checks_enabled = DIFFUSERS_ATTN_CHECKS + + @classmethod + def register( + cls, + backend: AttentionBackendName, + constraints: list[Callable] | None = None, + supports_context_parallel: bool = False, + ): + logger.debug(f"Registering attention backend: {backend} with constraints: {constraints}") + + def decorator(func): + cls._backends[backend] = func + cls._constraints[backend] = constraints or [] + cls._supported_arg_names[backend] = set(inspect.signature(func).parameters.keys()) + if supports_context_parallel: + cls._supports_context_parallel.add(backend.value) + + return func + + return decorator + + @classmethod + def get_active_backend(cls): + return cls._active_backend, cls._backends[cls._active_backend] + + @classmethod + def set_active_backend(cls, backend: str): + cls._active_backend = backend + + @classmethod + def list_backends(cls): + return list(cls._backends.keys()) + + @classmethod + def _is_context_parallel_available( + cls, + backend: AttentionBackendName, + ) -> bool: + supports_context_parallel = backend.value in cls._supports_context_parallel + return supports_context_parallel + + +@dataclass +class _HubKernelConfig: + """Configuration for downloading and using a hub-based attention kernel.""" + + repo_id: str + function_attr: str + revision: str | None = None + version: int | None = None + kernel_fn: Callable | None = None + wrapped_forward_attr: str | None = None + wrapped_backward_attr: str | None = None + wrapped_forward_fn: Callable | None = None + wrapped_backward_fn: Callable | None = None + + +# Registry for hub-based attention kernels +_HUB_KERNELS_REGISTRY: dict["AttentionBackendName", _HubKernelConfig] = { + AttentionBackendName._FLASH_3_HUB: _HubKernelConfig( + repo_id="kernels-community/flash-attn3", + function_attr="flash_attn_func", + wrapped_forward_attr="flash_attn_interface._flash_attn_forward", + wrapped_backward_attr="flash_attn_interface._flash_attn_backward", + version=1, + ), + AttentionBackendName._FLASH_3_VARLEN_HUB: _HubKernelConfig( + repo_id="kernels-community/flash-attn3", + function_attr="flash_attn_varlen_func", + version=1, + ), + AttentionBackendName.FLASH_HUB: _HubKernelConfig( + repo_id="kernels-community/flash-attn2", + function_attr="flash_attn_func", + wrapped_forward_attr="flash_attn_interface._wrapped_flash_attn_forward", + wrapped_backward_attr="flash_attn_interface._wrapped_flash_attn_backward", + version=1, + ), + AttentionBackendName.FLASH_VARLEN_HUB: _HubKernelConfig( + repo_id="kernels-community/flash-attn2", + function_attr="flash_attn_varlen_func", + version=1, + ), + AttentionBackendName.SAGE_HUB: _HubKernelConfig( + repo_id="kernels-community/sage-attention", + function_attr="sageattn", + version=1, + ), +} + + +@contextlib.contextmanager +def attention_backend(backend: str | AttentionBackendName = AttentionBackendName.NATIVE): + """ + Context manager to set the active attention backend. + """ + if backend not in _AttentionBackendRegistry._backends: + raise ValueError(f"Backend {backend} is not registered.") + + backend = AttentionBackendName(backend) + _check_attention_backend_requirements(backend) + _maybe_download_kernel_for_backend(backend) + + old_backend = _AttentionBackendRegistry._active_backend + _AttentionBackendRegistry.set_active_backend(backend) + + try: + yield + finally: + _AttentionBackendRegistry.set_active_backend(old_backend) + + +def dispatch_attention_fn( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + attention_kwargs: dict[str, Any] | None = None, + *, + backend: AttentionBackendName | None = None, + parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + attention_kwargs = attention_kwargs or {} + + if backend is None: + # If no backend is specified, we either use the default backend (set via the DIFFUSERS_ATTN_BACKEND environment + # variable), or we use a custom backend based on whether user is using the `attention_backend` context manager + backend_name, backend_fn = _AttentionBackendRegistry.get_active_backend() + else: + backend_name = AttentionBackendName(backend) + backend_fn = _AttentionBackendRegistry._backends.get(backend_name) + + kwargs = { + "query": query, + "key": key, + "value": value, + "attn_mask": attn_mask, + "dropout_p": dropout_p, + "is_causal": is_causal, + "scale": scale, + **attention_kwargs, + "_parallel_config": parallel_config, + } + if is_torch_version(">=", "2.5.0"): + kwargs["enable_gqa"] = enable_gqa + + if _AttentionBackendRegistry._checks_enabled: + removed_kwargs = set(kwargs) - set(_AttentionBackendRegistry._supported_arg_names[backend_name]) + if removed_kwargs: + logger.warning(f"Removing unsupported arguments for attention backend {backend_name}: {removed_kwargs}.") + for check in _AttentionBackendRegistry._constraints.get(backend_name): + check(**kwargs) + + kwargs = {k: v for k, v in kwargs.items() if k in _AttentionBackendRegistry._supported_arg_names[backend_name]} + + return backend_fn(**kwargs) + + +# ===== Checks ===== +# A list of very simple functions to catch common errors quickly when debugging. + + +def _check_attn_mask_or_causal(attn_mask: torch.Tensor | None, is_causal: bool, **kwargs) -> None: + if attn_mask is not None and is_causal: + raise ValueError("`is_causal` cannot be True when `attn_mask` is not None.") + + +def _check_device(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, **kwargs) -> None: + if query.device != key.device or query.device != value.device: + raise ValueError("Query, key, and value must be on the same device.") + if query.dtype != key.dtype or query.dtype != value.dtype: + raise ValueError("Query, key, and value must have the same dtype.") + + +def _check_device_cuda(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, **kwargs) -> None: + _check_device(query, key, value) + if query.device.type != "cuda": + raise ValueError("Query, key, and value must be on a CUDA device.") + + +def _check_device_cuda_atleast_smXY(major: int, minor: int) -> Callable: + def check_device_cuda(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, **kwargs) -> None: + _check_device_cuda(query, key, value) + if torch.cuda.get_device_capability(query.device) < (major, minor): + raise ValueError( + f"Query, key, and value must be on a CUDA device with compute capability >= {major}.{minor}." + ) + + return check_device_cuda + + +def _check_qkv_dtype_match(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, **kwargs) -> None: + if query.dtype != key.dtype: + raise ValueError("Query and key must have the same dtype.") + if query.dtype != value.dtype: + raise ValueError("Query and value must have the same dtype.") + + +def _check_qkv_dtype_bf16_or_fp16(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, **kwargs) -> None: + _check_qkv_dtype_match(query, key, value) + if query.dtype not in (torch.bfloat16, torch.float16): + raise ValueError("Query, key, and value must be either bfloat16 or float16.") + + +def _check_shape( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + **kwargs, +) -> None: + # Expected shapes: + # query: (batch_size, seq_len_q, num_heads, head_dim) + # key: (batch_size, seq_len_kv, num_heads, head_dim) + # value: (batch_size, seq_len_kv, num_heads, head_dim) + # attn_mask: (seq_len_q, seq_len_kv) or (batch_size, seq_len_q, seq_len_kv) + # or (batch_size, num_heads, seq_len_q, seq_len_kv) + if query.shape[-1] != key.shape[-1]: + raise ValueError("Query and key must have the same head dimension.") + if key.shape[-3] != value.shape[-3]: + raise ValueError("Key and value must have the same sequence length.") + if attn_mask is not None and attn_mask.shape[-1] != key.shape[-3]: + raise ValueError("Attention mask must match the key's sequence length.") + + +# ===== Helper functions ===== + + +def _check_attention_backend_requirements(backend: AttentionBackendName) -> None: + if backend in [AttentionBackendName.FLASH, AttentionBackendName.FLASH_VARLEN]: + if not _CAN_USE_FLASH_ATTN: + raise RuntimeError( + f"Flash Attention backend '{backend.value}' is not usable because of missing package or the version is too old. Please install `flash-attn>={_REQUIRED_FLASH_VERSION}`." + ) + + elif backend in [AttentionBackendName._FLASH_3, AttentionBackendName._FLASH_VARLEN_3]: + if not _CAN_USE_FLASH_ATTN_3: + raise RuntimeError( + f"Flash Attention 3 backend '{backend.value}' is not usable because of missing package or the version is too old. Please build FA3 beta release from source." + ) + + elif backend in [ + AttentionBackendName.FLASH_HUB, + AttentionBackendName.FLASH_VARLEN_HUB, + AttentionBackendName._FLASH_3_HUB, + AttentionBackendName._FLASH_3_VARLEN_HUB, + AttentionBackendName.SAGE_HUB, + ]: + if not is_kernels_available(): + raise RuntimeError( + f"Backend '{backend.value}' is not usable because the `kernels` package isn't available. Please install it with `pip install kernels`." + ) + if not is_kernels_version(">=", "0.12"): + raise RuntimeError( + f"Backend '{backend.value}' needs to be used with a `kernels` version of at least 0.12. Please update with `pip install -U kernels`." + ) + + elif backend == AttentionBackendName.AITER: + if not _CAN_USE_AITER_ATTN: + raise RuntimeError( + f"Aiter Attention backend '{backend.value}' is not usable because of missing package or the version is too old. Please install `aiter>={_REQUIRED_AITER_VERSION}`." + ) + + elif backend in [ + AttentionBackendName.SAGE, + AttentionBackendName.SAGE_VARLEN, + AttentionBackendName._SAGE_QK_INT8_PV_FP8_CUDA, + AttentionBackendName._SAGE_QK_INT8_PV_FP8_CUDA_SM90, + AttentionBackendName._SAGE_QK_INT8_PV_FP16_CUDA, + AttentionBackendName._SAGE_QK_INT8_PV_FP16_TRITON, + ]: + if not _CAN_USE_SAGE_ATTN: + raise RuntimeError( + f"Sage Attention backend '{backend.value}' is not usable because of missing package or the version is too old. Please install `sageattention>={_REQUIRED_SAGE_VERSION}`." + ) + + elif backend == AttentionBackendName.FLEX: + if not _CAN_USE_FLEX_ATTN: + raise RuntimeError( + f"Flex Attention backend '{backend.value}' is not usable because of missing package or the version is too old. Please install `torch>=2.5.0`." + ) + + elif backend == AttentionBackendName._NATIVE_NPU: + if not _CAN_USE_NPU_ATTN: + raise RuntimeError( + f"NPU Attention backend '{backend.value}' is not usable because of missing package or the version is too old. Please install `torch_npu`." + ) + + elif backend == AttentionBackendName._NATIVE_XLA: + if not _CAN_USE_XLA_ATTN: + raise RuntimeError( + f"XLA Attention backend '{backend.value}' is not usable because of missing package or the version is too old. Please install `torch_xla>={_REQUIRED_XLA_VERSION}`." + ) + + elif backend == AttentionBackendName.XFORMERS: + if not _CAN_USE_XFORMERS_ATTN: + raise RuntimeError( + f"Xformers Attention backend '{backend.value}' is not usable because of missing package or the version is too old. Please install `xformers>={_REQUIRED_XFORMERS_VERSION}`." + ) + + +@functools.lru_cache(maxsize=128) +def _prepare_for_flash_attn_or_sage_varlen_without_mask( + batch_size: int, + seq_len_q: int, + seq_len_kv: int, + device: torch.device | None = None, +): + seqlens_q = torch.full((batch_size,), seq_len_q, dtype=torch.int32, device=device) + seqlens_k = torch.full((batch_size,), seq_len_kv, dtype=torch.int32, device=device) + cu_seqlens_q = torch.zeros(batch_size + 1, dtype=torch.int32, device=device) + cu_seqlens_k = torch.zeros(batch_size + 1, dtype=torch.int32, device=device) + cu_seqlens_q[1:] = torch.cumsum(seqlens_q, dim=0) + cu_seqlens_k[1:] = torch.cumsum(seqlens_k, dim=0) + max_seqlen_q = seqlens_q.max().item() + max_seqlen_k = seqlens_k.max().item() + return (seqlens_q, seqlens_k), (cu_seqlens_q, cu_seqlens_k), (max_seqlen_q, max_seqlen_k) + + +def _prepare_for_flash_attn_or_sage_varlen_with_mask( + batch_size: int, + seq_len_q: int, + attn_mask: torch.Tensor, + device: torch.device | None = None, +): + seqlens_q = torch.full((batch_size,), seq_len_q, dtype=torch.int32, device=device) + seqlens_k = attn_mask.sum(dim=1, dtype=torch.int32) + cu_seqlens_q = torch.zeros(batch_size + 1, dtype=torch.int32, device=device) + cu_seqlens_k = torch.zeros(batch_size + 1, dtype=torch.int32, device=device) + cu_seqlens_q[1:] = torch.cumsum(seqlens_q, dim=0) + cu_seqlens_k[1:] = torch.cumsum(seqlens_k, dim=0) + max_seqlen_q = seqlens_q.max().item() + max_seqlen_k = seqlens_k.max().item() + return (seqlens_q, seqlens_k), (cu_seqlens_q, cu_seqlens_k), (max_seqlen_q, max_seqlen_k) + + +def _prepare_for_flash_attn_or_sage_varlen( + batch_size: int, + seq_len_q: int, + seq_len_kv: int, + attn_mask: torch.Tensor | None = None, + device: torch.device | None = None, +) -> None: + if attn_mask is None: + return _prepare_for_flash_attn_or_sage_varlen_without_mask(batch_size, seq_len_q, seq_len_kv, device) + return _prepare_for_flash_attn_or_sage_varlen_with_mask(batch_size, seq_len_q, attn_mask, device) + + +def _normalize_attn_mask(attn_mask: torch.Tensor, batch_size: int, seq_len_k: int) -> torch.Tensor: + """ + Normalize an attention mask to shape [batch_size, seq_len_k] (bool) suitable for inferring seqlens_[q|k] in + FlashAttention/Sage varlen. + + Supports 1D to 4D shapes and common broadcasting patterns. + """ + if attn_mask.dtype != torch.bool: + raise ValueError(f"Attention mask must be of type bool, got {attn_mask.dtype}.") + + if attn_mask.ndim == 1: + # [seq_len_k] -> broadcast across batch + attn_mask = attn_mask.unsqueeze(0).expand(batch_size, seq_len_k) + + elif attn_mask.ndim == 2: + # [batch_size, seq_len_k]. Maybe broadcast across batch + if attn_mask.size(0) not in [1, batch_size]: + raise ValueError( + f"attn_mask.shape[0] ({attn_mask.shape[0]}) must be 1 or {batch_size} for 2D attention mask." + ) + attn_mask = attn_mask.expand(batch_size, seq_len_k) + + elif attn_mask.ndim == 3: + # [batch_size, seq_len_q, seq_len_k] -> reduce over query dimension + # We do this reduction because we know that arbitrary QK masks is not supported in Flash/Sage varlen. + if attn_mask.size(0) not in [1, batch_size]: + raise ValueError( + f"attn_mask.shape[0] ({attn_mask.shape[0]}) must be 1 or {batch_size} for 3D attention mask." + ) + attn_mask = attn_mask.any(dim=1) + attn_mask = attn_mask.expand(batch_size, seq_len_k) + + elif attn_mask.ndim == 4: + # [batch_size, num_heads, seq_len_q, seq_len_k] or broadcastable versions + if attn_mask.size(0) not in [1, batch_size]: + raise ValueError( + f"attn_mask.shape[0] ({attn_mask.shape[0]}) must be 1 or {batch_size} for 4D attention mask." + ) + attn_mask = attn_mask.expand(batch_size, -1, -1, seq_len_k) # [B, H, Q, K] + attn_mask = attn_mask.any(dim=(1, 2)) # [B, K] + + else: + raise ValueError(f"Unsupported attention mask shape: {attn_mask.shape}") + + if attn_mask.shape != (batch_size, seq_len_k): + raise ValueError( + f"Normalized attention mask shape mismatch: got {attn_mask.shape}, expected ({batch_size}, {seq_len_k})" + ) + + return attn_mask + + +def _flex_attention_causal_mask_mod(batch_idx, head_idx, q_idx, kv_idx): + return q_idx >= kv_idx + + +# ===== Helpers for downloading kernels ===== +def _resolve_kernel_attr(module, attr_path: str): + target = module + for attr in attr_path.split("."): + if not hasattr(target, attr): + raise AttributeError(f"Kernel module '{module.__name__}' does not define attribute path '{attr_path}'.") + target = getattr(target, attr) + return target + + +def _maybe_download_kernel_for_backend(backend: AttentionBackendName) -> None: + if backend not in _HUB_KERNELS_REGISTRY: + return + config = _HUB_KERNELS_REGISTRY[backend] + + needs_kernel = config.kernel_fn is None + needs_wrapped_forward = config.wrapped_forward_attr is not None and config.wrapped_forward_fn is None + needs_wrapped_backward = config.wrapped_backward_attr is not None and config.wrapped_backward_fn is None + + if not (needs_kernel or needs_wrapped_forward or needs_wrapped_backward): + return + + try: + from kernels import get_kernel + + kernel_module = get_kernel(config.repo_id, revision=config.revision, version=config.version) + if needs_kernel: + config.kernel_fn = _resolve_kernel_attr(kernel_module, config.function_attr) + + if needs_wrapped_forward: + config.wrapped_forward_fn = _resolve_kernel_attr(kernel_module, config.wrapped_forward_attr) + + if needs_wrapped_backward: + config.wrapped_backward_fn = _resolve_kernel_attr(kernel_module, config.wrapped_backward_attr) + + except Exception as e: + logger.error(f"An error occurred while fetching kernel '{config.repo_id}' from the Hub: {e}") + raise + + +# ===== torch op registrations ===== +# Registrations are required for fullgraph tracing compatibility +# TODO: this is only required because the beta release FA3 does not have it. There is a PR adding +# this but it was never merged: https://github.com/Dao-AILab/flash-attention/pull/1590 +@_custom_op("_diffusers_flash_attn_3::_flash_attn_forward", mutates_args=(), device_types="cuda") +def _wrapped_flash_attn_3( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + softmax_scale: float | None = None, + causal: bool = False, + qv: torch.Tensor | None = None, + q_descale: torch.Tensor | None = None, + k_descale: torch.Tensor | None = None, + v_descale: torch.Tensor | None = None, + attention_chunk: int = 0, + softcap: float = 0.0, + num_splits: int = 1, + pack_gqa: bool | None = None, + deterministic: bool = False, + sm_margin: int = 0, +) -> tuple[torch.Tensor, torch.Tensor]: + # Hardcoded for now because pytorch does not support tuple/int type hints + window_size = (-1, -1) + result = flash_attn_3_func( + q=q, + k=k, + v=v, + softmax_scale=softmax_scale, + causal=causal, + qv=qv, + q_descale=q_descale, + k_descale=k_descale, + v_descale=v_descale, + window_size=window_size, + attention_chunk=attention_chunk, + softcap=softcap, + num_splits=num_splits, + pack_gqa=pack_gqa, + deterministic=deterministic, + sm_margin=sm_margin, + return_attn_probs=True, + ) + out, lse, *_ = result + lse = lse.permute(0, 2, 1) + return out, lse + + +@_register_fake("_diffusers_flash_attn_3::_flash_attn_forward") +def _( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + softmax_scale: float | None = None, + causal: bool = False, + qv: torch.Tensor | None = None, + q_descale: torch.Tensor | None = None, + k_descale: torch.Tensor | None = None, + v_descale: torch.Tensor | None = None, + attention_chunk: int = 0, + softcap: float = 0.0, + num_splits: int = 1, + pack_gqa: bool | None = None, + deterministic: bool = False, + sm_margin: int = 0, +) -> tuple[torch.Tensor, torch.Tensor]: + window_size = (-1, -1) # noqa: F841 + # A lot of the parameters here are not yet used in any way within diffusers. + # We can safely ignore for now and keep the fake op shape propagation simple. + batch_size, seq_len, num_heads, head_dim = q.shape + lse_shape = (batch_size, seq_len, num_heads) + return torch.empty_like(q), q.new_empty(lse_shape) + + +# ===== Helper functions to use attention backends with templated CP autograd functions ===== + + +def _native_attention_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, +): + # Native attention does not return_lse + if return_lse: + raise ValueError("Native attention does not support return_lse=True") + + # used for backward pass + if _save_ctx: + ctx.save_for_backward(query, key, value) + ctx.attn_mask = attn_mask + ctx.dropout_p = dropout_p + ctx.is_causal = is_causal + ctx.scale = scale + ctx.enable_gqa = enable_gqa + + query, key, value = (x.permute(0, 2, 1, 3) for x in (query, key, value)) + out = torch.nn.functional.scaled_dot_product_attention( + query=query, + key=key, + value=value, + attn_mask=attn_mask, + dropout_p=dropout_p, + is_causal=is_causal, + scale=scale, + enable_gqa=enable_gqa, + ) + out = out.permute(0, 2, 1, 3) + + return out + + +def _native_attention_backward_op( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + **kwargs, +): + query, key, value = ctx.saved_tensors + + query.requires_grad_(True) + key.requires_grad_(True) + value.requires_grad_(True) + + query_t, key_t, value_t = (x.permute(0, 2, 1, 3) for x in (query, key, value)) + out = torch.nn.functional.scaled_dot_product_attention( + query=query_t, + key=key_t, + value=value_t, + attn_mask=ctx.attn_mask, + dropout_p=ctx.dropout_p, + is_causal=ctx.is_causal, + scale=ctx.scale, + enable_gqa=ctx.enable_gqa, + ) + out = out.permute(0, 2, 1, 3) + + grad_out_t = grad_out.permute(0, 2, 1, 3) + grad_query_t, grad_key_t, grad_value_t = torch.autograd.grad( + outputs=out, inputs=[query_t, key_t, value_t], grad_outputs=grad_out_t, retain_graph=False + ) + + grad_query = grad_query_t.permute(0, 2, 1, 3) + grad_key = grad_key_t.permute(0, 2, 1, 3) + grad_value = grad_value_t.permute(0, 2, 1, 3) + + return grad_query, grad_key, grad_value + + +# https://github.com/pytorch/pytorch/blob/8904ba638726f8c9a5aff5977c4aa76c9d2edfa6/aten/src/ATen/native/native_functions.yaml#L14958 +# forward declaration: +# aten::_scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0., bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask) +def _cudnn_attention_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, +): + if enable_gqa: + raise ValueError("`enable_gqa` is not yet supported for cuDNN attention.") + + tensors_to_save = () + + # Contiguous is a must here! Calling cuDNN backend with aten ops produces incorrect results + # if the input tensors are not contiguous. + query = query.transpose(1, 2).contiguous() + key = key.transpose(1, 2).contiguous() + value = value.transpose(1, 2).contiguous() + tensors_to_save += (query, key, value) + + out, lse, cum_seq_q, cum_seq_k, max_q, max_k, philox_seed, philox_offset, debug_attn_mask = ( + torch.ops.aten._scaled_dot_product_cudnn_attention( + query=query, + key=key, + value=value, + attn_bias=attn_mask, + compute_log_sumexp=return_lse, + dropout_p=dropout_p, + is_causal=is_causal, + return_debug_mask=False, + scale=scale, + ) + ) + + tensors_to_save += (out, lse, cum_seq_q, cum_seq_k, philox_seed, philox_offset) + if _save_ctx: + ctx.save_for_backward(*tensors_to_save) + ctx.dropout_p = dropout_p + ctx.is_causal = is_causal + ctx.scale = scale + ctx.attn_mask = attn_mask + ctx.max_q = max_q + ctx.max_k = max_k + + out = out.transpose(1, 2).contiguous() + if lse is not None: + lse = lse.transpose(1, 2).contiguous() + return (out, lse) if return_lse else out + + +# backward declaration: +# aten::_scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor) +def _cudnn_attention_backward_op( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + **kwargs, +): + query, key, value, out, lse, cum_seq_q, cum_seq_k, philox_seed, philox_offset = ctx.saved_tensors + + grad_out = grad_out.transpose(1, 2).contiguous() + key = key.transpose(1, 2).contiguous() + value = value.transpose(1, 2).contiguous() + + # Cannot pass first 5 arguments as kwargs because: https://github.com/pytorch/pytorch/blob/d26ca5de058dbcf56ac52bb43e84dd98df2ace97/torch/_dynamo/variables/torch.py#L1341 + grad_query, grad_key, grad_value = torch.ops.aten._scaled_dot_product_cudnn_attention_backward( + grad_out, + query, + key, + value, + out, + logsumexp=lse, + philox_seed=philox_seed, + philox_offset=philox_offset, + attn_bias=ctx.attn_mask, + cum_seq_q=cum_seq_q, + cum_seq_k=cum_seq_k, + max_q=ctx.max_q, + max_k=ctx.max_k, + dropout_p=ctx.dropout_p, + is_causal=ctx.is_causal, + scale=ctx.scale, + ) + grad_query, grad_key, grad_value = (x.transpose(1, 2).contiguous() for x in (grad_query, grad_key, grad_value)) + + return grad_query, grad_key, grad_value + + +# https://github.com/pytorch/pytorch/blob/e33fa0ece36a93dbc8ff19b0251b8d99f8ae8668/aten/src/ATen/native/native_functions.yaml#L15135 +# forward declaration: +# aten::_scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor rng_state, Tensor unused, Tensor debug_attn_mask) +def _native_flash_attention_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, +): + if enable_gqa: + raise ValueError("`enable_gqa` is not yet supported for native flash attention.") + + tensors_to_save = () + + query = query.transpose(1, 2).contiguous() + key = key.transpose(1, 2).contiguous() + value = value.transpose(1, 2).contiguous() + tensors_to_save += (query, key, value) + + out, lse, cum_seq_q, cum_seq_k, max_q, max_k, philox_seed, philox_offset, debug_attn_mask = ( + torch.ops.aten._scaled_dot_product_flash_attention( + query=query, + key=key, + value=value, + dropout_p=dropout_p, + is_causal=is_causal, + return_debug_mask=False, + scale=scale, + ) + ) + + tensors_to_save += (out, lse, cum_seq_q, cum_seq_k, philox_seed, philox_offset) + if _save_ctx: + ctx.save_for_backward(*tensors_to_save) + ctx.dropout_p = dropout_p + ctx.is_causal = is_causal + ctx.scale = scale + ctx.max_q = max_q + ctx.max_k = max_k + + out = out.transpose(1, 2).contiguous() + if lse is not None: + lse = lse.transpose(1, 2).contiguous() + return (out, lse) if return_lse else out + + +# https://github.com/pytorch/pytorch/blob/e33fa0ece36a93dbc8ff19b0251b8d99f8ae8668/aten/src/ATen/native/native_functions.yaml#L15153 +# backward declaration: +# aten::_scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value) +def _native_flash_attention_backward_op( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + **kwargs, +): + query, key, value, out, lse, cum_seq_q, cum_seq_k, philox_seed, philox_offset = ctx.saved_tensors + + grad_out = grad_out.transpose(1, 2).contiguous() + key = key.transpose(1, 2).contiguous() + value = value.transpose(1, 2).contiguous() + + grad_query, grad_key, grad_value = torch.ops.aten._scaled_dot_product_flash_attention_backward( + grad_out, + query, + key, + value, + out, + logsumexp=lse, + philox_seed=philox_seed, + philox_offset=philox_offset, + cum_seq_q=cum_seq_q, + cum_seq_k=cum_seq_k, + max_q=ctx.max_q, + max_k=ctx.max_k, + dropout_p=ctx.dropout_p, + is_causal=ctx.is_causal, + scale=ctx.scale, + ) + grad_query, grad_key, grad_value = (x.transpose(1, 2).contiguous() for x in (grad_query, grad_key, grad_value)) + + return grad_query, grad_key, grad_value + + +# Adapted from: https://github.com/Dao-AILab/flash-attention/blob/fd2fc9d85c8e54e5c20436465bca709bc1a6c5a1/flash_attn/flash_attn_interface.py#L807 +def _flash_attention_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, +): + if attn_mask is not None: + raise ValueError("`attn_mask` is not yet supported for flash-attn 2.") + if enable_gqa: + raise ValueError("`enable_gqa` is not yet supported for flash-attn 2.") + + # Hardcoded for now + window_size = (-1, -1) + softcap = 0.0 + alibi_slopes = None + deterministic = False + grad_enabled = any(x.requires_grad for x in (query, key, value)) + + if scale is None: + scale = query.shape[-1] ** (-0.5) + + # flash-attn only returns LSE if dropout_p > 0. So, we need to workaround. + if grad_enabled or (_parallel_config is not None and _parallel_config.context_parallel_config._world_size > 1): + dropout_p = dropout_p if dropout_p > 0 else 1e-30 + + with torch.set_grad_enabled(grad_enabled): + out, lse, S_dmask, rng_state = _wrapped_flash_attn_forward( + query, + key, + value, + dropout_p, + scale, + is_causal, + window_size[0], + window_size[1], + softcap, + alibi_slopes, + return_lse, + ) + lse = lse.permute(0, 2, 1) + + if _save_ctx: + ctx.save_for_backward(query, key, value, out, lse, rng_state) + ctx.dropout_p = dropout_p + ctx.scale = scale + ctx.is_causal = is_causal + ctx.window_size = window_size + ctx.softcap = softcap + ctx.alibi_slopes = alibi_slopes + ctx.deterministic = deterministic + + return (out, lse) if return_lse else out + + +def _flash_attention_backward_op( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + **kwargs, +): + query, key, value, out, lse, rng_state = ctx.saved_tensors + grad_query, grad_key, grad_value = torch.empty_like(query), torch.empty_like(key), torch.empty_like(value) + + lse_d = _wrapped_flash_attn_backward( # noqa: F841 + grad_out, + query, + key, + value, + out, + lse, + grad_query, + grad_key, + grad_value, + ctx.dropout_p, + ctx.scale, + ctx.is_causal, + ctx.window_size[0], + ctx.window_size[1], + ctx.softcap, + ctx.alibi_slopes, + ctx.deterministic, + rng_state, + ) + + # Head dimension may have been padded + grad_query = grad_query[..., : grad_out.shape[-1]] + grad_key = grad_key[..., : grad_out.shape[-1]] + grad_value = grad_value[..., : grad_out.shape[-1]] + + return grad_query, grad_key, grad_value + + +def _flash_attention_hub_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, +): + if attn_mask is not None: + raise ValueError("`attn_mask` is not yet supported for flash-attn hub kernels.") + if enable_gqa: + raise ValueError("`enable_gqa` is not yet supported for flash-attn hub kernels.") + + config = _HUB_KERNELS_REGISTRY[AttentionBackendName.FLASH_HUB] + wrapped_forward_fn = config.wrapped_forward_fn + wrapped_backward_fn = config.wrapped_backward_fn + if wrapped_forward_fn is None or wrapped_backward_fn is None: + raise RuntimeError( + "Flash attention hub kernels must expose `_wrapped_flash_attn_forward` and `_wrapped_flash_attn_backward` " + "for context parallel execution." + ) + + if scale is None: + scale = query.shape[-1] ** (-0.5) + + window_size = (-1, -1) + softcap = 0.0 + alibi_slopes = None + deterministic = False + grad_enabled = any(x.requires_grad for x in (query, key, value)) + + if grad_enabled or (_parallel_config is not None and _parallel_config.context_parallel_config._world_size > 1): + dropout_p = dropout_p if dropout_p > 0 else 1e-30 + + with torch.set_grad_enabled(grad_enabled): + out, lse, S_dmask, rng_state = wrapped_forward_fn( + query, + key, + value, + dropout_p, + scale, + is_causal, + window_size[0], + window_size[1], + softcap, + alibi_slopes, + return_lse, + ) + lse = lse.permute(0, 2, 1).contiguous() + + if _save_ctx: + ctx.save_for_backward(query, key, value, out, lse, rng_state) + ctx.dropout_p = dropout_p + ctx.scale = scale + ctx.is_causal = is_causal + ctx.window_size = window_size + ctx.softcap = softcap + ctx.alibi_slopes = alibi_slopes + ctx.deterministic = deterministic + + return (out, lse) if return_lse else out + + +def _flash_attention_hub_backward_op( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + **kwargs, +): + config = _HUB_KERNELS_REGISTRY[AttentionBackendName.FLASH_HUB] + wrapped_backward_fn = config.wrapped_backward_fn + if wrapped_backward_fn is None: + raise RuntimeError( + "Flash attention hub kernels must expose `_wrapped_flash_attn_backward` for context parallel execution." + ) + + query, key, value, out, lse, rng_state = ctx.saved_tensors + grad_query, grad_key, grad_value = torch.empty_like(query), torch.empty_like(key), torch.empty_like(value) + + _ = wrapped_backward_fn( + grad_out, + query, + key, + value, + out, + lse, + grad_query, + grad_key, + grad_value, + ctx.dropout_p, + ctx.scale, + ctx.is_causal, + ctx.window_size[0], + ctx.window_size[1], + ctx.softcap, + ctx.alibi_slopes, + ctx.deterministic, + rng_state, + ) + + grad_query = grad_query[..., : grad_out.shape[-1]] + grad_key = grad_key[..., : grad_out.shape[-1]] + grad_value = grad_value[..., : grad_out.shape[-1]] + + return grad_query, grad_key, grad_value + + +def _flash_attention_3_hub_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, + *, + window_size: tuple[int, int] = (-1, -1), + softcap: float = 0.0, + num_splits: int = 1, + pack_gqa: bool | None = None, + deterministic: bool = False, + sm_margin: int = 0, +): + if attn_mask is not None: + raise ValueError("`attn_mask` is not yet supported for flash-attn 3 hub kernels.") + if dropout_p != 0.0: + raise ValueError("`dropout_p` is not yet supported for flash-attn 3 hub kernels.") + if enable_gqa: + raise ValueError("`enable_gqa` is not yet supported for flash-attn 3 hub kernels.") + + config = _HUB_KERNELS_REGISTRY[AttentionBackendName._FLASH_3_HUB] + wrapped_forward_fn = config.wrapped_forward_fn + if wrapped_forward_fn is None: + raise RuntimeError( + "Flash attention 3 hub kernels must expose `flash_attn_interface._flash_attn_forward` " + "for context parallel execution." + ) + + if scale is None: + scale = query.shape[-1] ** (-0.5) + + out, softmax_lse, *_ = wrapped_forward_fn( + query, + key, + value, + None, + None, # k_new, v_new + None, # qv + None, # out + None, + None, + None, # cu_seqlens_q/k/k_new + None, + None, # seqused_q/k + None, + None, # max_seqlen_q/k + None, + None, + None, # page_table, kv_batch_idx, leftpad_k + None, + None, + None, # rotary_cos/sin, seqlens_rotary + None, + None, + None, # q_descale, k_descale, v_descale + scale, + causal=is_causal, + window_size_left=window_size[0], + window_size_right=window_size[1], + attention_chunk=0, + softcap=softcap, + num_splits=num_splits, + pack_gqa=pack_gqa, + sm_margin=sm_margin, + ) + + lse = softmax_lse.permute(0, 2, 1).contiguous() if return_lse else None + + if _save_ctx: + ctx.save_for_backward(query, key, value, out, softmax_lse) + ctx.scale = scale + ctx.is_causal = is_causal + ctx.window_size = window_size + ctx.softcap = softcap + ctx.deterministic = deterministic + ctx.sm_margin = sm_margin + + return (out, lse) if return_lse else out + + +def _flash_attention_3_hub_backward_op( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + **kwargs, +): + config = _HUB_KERNELS_REGISTRY[AttentionBackendName._FLASH_3_HUB] + wrapped_backward_fn = config.wrapped_backward_fn + if wrapped_backward_fn is None: + raise RuntimeError( + "Flash attention 3 hub kernels must expose `flash_attn_interface._flash_attn_backward` " + "for context parallel execution." + ) + + query, key, value, out, softmax_lse = ctx.saved_tensors + grad_query = torch.empty_like(query) + grad_key = torch.empty_like(key) + grad_value = torch.empty_like(value) + + wrapped_backward_fn( + grad_out, + query, + key, + value, + out, + softmax_lse, + None, + None, # cu_seqlens_q, cu_seqlens_k + None, + None, # seqused_q, seqused_k + None, + None, # max_seqlen_q, max_seqlen_k + grad_query, + grad_key, + grad_value, + ctx.scale, + ctx.is_causal, + ctx.window_size[0], + ctx.window_size[1], + ctx.softcap, + ctx.deterministic, + ctx.sm_margin, + ) + + grad_query = grad_query[..., : grad_out.shape[-1]] + grad_key = grad_key[..., : grad_out.shape[-1]] + grad_value = grad_value[..., : grad_out.shape[-1]] + + return grad_query, grad_key, grad_value + + +def _sage_attention_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, +): + if attn_mask is not None: + raise ValueError("`attn_mask` is not yet supported for Sage attention.") + if dropout_p > 0.0: + raise ValueError("`dropout_p` is not yet supported for Sage attention.") + if enable_gqa: + raise ValueError("`enable_gqa` is not yet supported for Sage attention.") + + out = sageattn( + q=query, + k=key, + v=value, + tensor_layout="NHD", + is_causal=is_causal, + sm_scale=scale, + return_lse=return_lse, + ) + lse = None + if return_lse: + out, lse, *_ = out + lse = lse.permute(0, 2, 1) + + return (out, lse) if return_lse else out + + +def _sage_attention_hub_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, +): + if attn_mask is not None: + raise ValueError("`attn_mask` is not yet supported for Sage attention.") + if dropout_p > 0.0: + raise ValueError("`dropout_p` is not yet supported for Sage attention.") + if enable_gqa: + raise ValueError("`enable_gqa` is not yet supported for Sage attention.") + + func = _HUB_KERNELS_REGISTRY[AttentionBackendName.SAGE_HUB].kernel_fn + out = func( + q=query, + k=key, + v=value, + tensor_layout="NHD", + is_causal=is_causal, + sm_scale=scale, + return_lse=return_lse, + ) + + lse = None + if return_lse: + out, lse, *_ = out + lse = lse.permute(0, 2, 1).contiguous() + + return (out, lse) if return_lse else out + + +def _sage_attention_backward_op( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, +): + raise NotImplementedError("Backward pass is not implemented for Sage attention.") + + +def _maybe_modify_attn_mask_npu(query: torch.Tensor, key: torch.Tensor, attn_mask: torch.Tensor | None = None): + # Skip Attention Mask if all values are 1, `None` mask can speedup the computation + if attn_mask is not None and torch.all(attn_mask != 0): + attn_mask = None + + # Reshape Attention Mask: [batch_size, seq_len_k] -> [batch_size, 1, sqe_len_q, seq_len_k] + # https://www.hiascend.com/document/detail/zh/Pytorch/730/apiref/torchnpuCustomsapi/docs/context/torch_npu-npu_fusion_attention.md + if ( + attn_mask is not None + and attn_mask.ndim == 2 + and attn_mask.shape[0] == query.shape[0] + and attn_mask.shape[1] == key.shape[1] + ): + B, Sq, Skv = attn_mask.shape[0], query.shape[1], key.shape[1] + attn_mask = ~attn_mask.to(torch.bool) + attn_mask = attn_mask.unsqueeze(1).expand(B, Sq, Skv).unsqueeze(1).contiguous() + + return attn_mask + + +def _npu_attention_forward_op( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _save_ctx: bool = True, + _parallel_config: "ParallelConfig" | None = None, +): + if return_lse: + raise ValueError("NPU attention backend does not support setting `return_lse=True`.") + + attn_mask = _maybe_modify_attn_mask_npu(query, key, attn_mask) + + out = npu_fusion_attention( + query, + key, + value, + query.size(2), # num_heads + atten_mask=attn_mask, + input_layout="BSND", + pse=None, + scale=1.0 / math.sqrt(query.shape[-1]) if scale is None else scale, + pre_tockens=65536, + next_tockens=65536, + keep_prob=1.0 - dropout_p, + sync=False, + inner_precise=0, + )[0] + + return out + + +# Not implemented yet. +def _npu_attention_backward_op( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + **kwargs, +): + raise NotImplementedError("Backward pass is not implemented for Npu Fusion Attention.") + + +# ===== Context parallel ===== + + +# Reference: +# - https://github.com/pytorch/pytorch/blob/f58a680d09e13658a52c6ba05c63c15759846bcc/torch/distributed/_functional_collectives.py#L827 +# - https://github.com/pytorch/pytorch/blob/f58a680d09e13658a52c6ba05c63c15759846bcc/torch/distributed/_functional_collectives.py#L246 +# For fullgraph=True tracing compatibility (since FakeTensor does not have a `wait` method): +def _wait_tensor(tensor): + if isinstance(tensor, funcol.AsyncCollectiveTensor): + tensor = tensor.wait() + return tensor + + +def _all_to_all_single(x: torch.Tensor, group) -> torch.Tensor: + shape = x.shape + # HACK: We need to flatten because despite making tensors contiguous, torch single-file-ization + # to benchmark triton codegen fails somewhere: + # buf25 = torch.ops._c10d_functional.all_to_all_single.default(buf24, [1, 1], [1, 1], '3') + # ValueError: Tensors must be contiguous + x = x.flatten() + x = funcol.all_to_all_single(x, None, None, group) + x = x.reshape(shape) + x = _wait_tensor(x) + return x + + +def _all_to_all_dim_exchange(x: torch.Tensor, scatter_idx: int = 2, gather_idx: int = 1, group=None) -> torch.Tensor: + """ + Perform dimension sharding / reassembly across processes using _all_to_all_single. + + This utility reshapes and redistributes tensor `x` across the given process group, across sequence dimension or + head dimension flexibly by accepting scatter_idx and gather_idx. + + Args: + x (torch.Tensor): + Input tensor. Expected shapes: + - When scatter_idx=2, gather_idx=1: (batch_size, seq_len_local, num_heads, head_dim) + - When scatter_idx=1, gather_idx=2: (batch_size, seq_len, num_heads_local, head_dim) + scatter_idx (int) : + Dimension along which the tensor is partitioned before all-to-all. + gather_idx (int): + Dimension along which the output is reassembled after all-to-all. + group : + Distributed process group for the Ulysses group. + + Returns: + torch.Tensor: Tensor with globally exchanged dimensions. + - For (scatter_idx=2 → gather_idx=1): (batch_size, seq_len, num_heads_local, head_dim) + - For (scatter_idx=1 → gather_idx=2): (batch_size, seq_len_local, num_heads, head_dim) + """ + group_world_size = torch.distributed.get_world_size(group) + + if scatter_idx == 2 and gather_idx == 1: + # Used before Ulysses sequence parallel (SP) attention. Scatters the gathers sequence + # dimension and scatters head dimension + batch_size, seq_len_local, num_heads, head_dim = x.shape + seq_len = seq_len_local * group_world_size + num_heads_local = num_heads // group_world_size + + # B, S_LOCAL, H, D -> group_world_size, S_LOCAL, B, H_LOCAL, D + x_temp = ( + x.reshape(batch_size, seq_len_local, group_world_size, num_heads_local, head_dim) + .transpose(0, 2) + .contiguous() + ) + + if group_world_size > 1: + out = _all_to_all_single(x_temp, group=group) + else: + out = x_temp + # group_world_size, S_LOCAL, B, H_LOCAL, D -> B, S, H_LOCAL, D + out = out.reshape(seq_len, batch_size, num_heads_local, head_dim).permute(1, 0, 2, 3).contiguous() + out = out.reshape(batch_size, seq_len, num_heads_local, head_dim) + return out + elif scatter_idx == 1 and gather_idx == 2: + # Used after ulysses sequence parallel in unified SP. gathers the head dimension + # scatters back the sequence dimension. + batch_size, seq_len, num_heads_local, head_dim = x.shape + num_heads = num_heads_local * group_world_size + seq_len_local = seq_len // group_world_size + + # B, S, H_LOCAL, D -> group_world_size, H_LOCAL, S_LOCAL, B, D + x_temp = ( + x.reshape(batch_size, group_world_size, seq_len_local, num_heads_local, head_dim) + .permute(1, 3, 2, 0, 4) + .reshape(group_world_size, num_heads_local, seq_len_local, batch_size, head_dim) + ) + + if group_world_size > 1: + output = _all_to_all_single(x_temp, group) + else: + output = x_temp + output = output.reshape(num_heads, seq_len_local, batch_size, head_dim).transpose(0, 2).contiguous() + output = output.reshape(batch_size, seq_len_local, num_heads, head_dim) + return output + else: + raise ValueError("Invalid scatter/gather indices for _all_to_all_dim_exchange.") + + +class SeqAllToAllDim(torch.autograd.Function): + """ + all_to_all operation for unified sequence parallelism. uses _all_to_all_dim_exchange, see _all_to_all_dim_exchange + for more info. + """ + + @staticmethod + def forward(ctx, group, input, scatter_id=2, gather_id=1): + ctx.group = group + ctx.scatter_id = scatter_id + ctx.gather_id = gather_id + return _all_to_all_dim_exchange(input, scatter_id, gather_id, group) + + @staticmethod + def backward(ctx, grad_outputs): + grad_input = SeqAllToAllDim.apply( + ctx.group, + grad_outputs, + ctx.gather_id, # reversed + ctx.scatter_id, # reversed + ) + return (None, grad_input, None, None) + + +# Below are helper functions to handle abritrary head num and abritrary sequence length for Ulysses Anything Attention. +def _maybe_pad_qkv_head(x: torch.Tensor, H: int, group: dist.ProcessGroup) -> tuple[torch.Tensor, int]: + r"""Maybe pad the head dimension to be divisible by world_size. + x: torch.Tensor, shape (B, S_LOCAL, H, D) H: int, original global head num return: tuple[torch.Tensor, int], padded + tensor (B, S_LOCAL, H + H_PAD, D) and H_PAD + """ + world_size = dist.get_world_size(group=group) + H_PAD = 0 + if H % world_size != 0: + H_PAD = world_size - (H % world_size) + NEW_H_LOCAL = (H + H_PAD) // world_size + # e.g., Allow: H=30, world_size=8 -> NEW_H_LOCAL=4, H_PAD=2. + # NOT ALLOW: H=30, world_size=16 -> NEW_H_LOCAL=2, H_PAD=14. + assert H_PAD < NEW_H_LOCAL, f"Padding head num {H_PAD} should be less than new local head num {NEW_H_LOCAL}" + x = F.pad(x, (0, 0, 0, H_PAD)).contiguous() + return x, H_PAD + + +def _maybe_unpad_qkv_head(x: torch.Tensor, H_PAD: int, group: dist.ProcessGroup) -> torch.Tensor: + r"""Maybe unpad the head dimension. + x: torch.Tensor, shape (B, S_GLOBAL, H_LOCAL + H_PAD, D) H_PAD: int, head padding num return: torch.Tensor, + unpadded tensor (B, S_GLOBAL, H_LOCAL, D) + """ + rank = dist.get_rank(group=group) + world_size = dist.get_world_size(group=group) + # Only the last rank may have padding + if H_PAD > 0 and rank == world_size - 1: + x = x[:, :, :-H_PAD, :] + return x.contiguous() + + +def _maybe_pad_o_head(x: torch.Tensor, H: int, group: dist.ProcessGroup) -> tuple[torch.Tensor, int]: + r"""Maybe pad the head dimension to be divisible by world_size. + x: torch.Tensor, shape (B, S_GLOBAL, H_LOCAL, D) H: int, original global head num return: tuple[torch.Tensor, int], + padded tensor (B, S_GLOBAL, H_LOCAL + H_PAD, D) and H_PAD + """ + if H is None: + return x, 0 + + rank = dist.get_rank(group=group) + world_size = dist.get_world_size(group=group) + H_PAD = 0 + # Only the last rank may need padding + if H % world_size != 0: + # We need to broadcast H_PAD to all ranks to keep consistency + # in unpadding step later for all ranks. + H_PAD = world_size - (H % world_size) + NEW_H_LOCAL = (H + H_PAD) // world_size + assert H_PAD < NEW_H_LOCAL, f"Padding head num {H_PAD} should be less than new local head num {NEW_H_LOCAL}" + if rank == world_size - 1: + x = F.pad(x, (0, 0, 0, H_PAD)).contiguous() + return x, H_PAD + + +def _maybe_unpad_o_head(x: torch.Tensor, H_PAD: int, group: dist.ProcessGroup) -> torch.Tensor: + r"""Maybe unpad the head dimension. + x: torch.Tensor, shape (B, S_LOCAL, H_GLOBAL + H_PAD, D) H_PAD: int, head padding num return: torch.Tensor, + unpadded tensor (B, S_LOCAL, H_GLOBAL, D) + """ + if H_PAD > 0: + x = x[:, :, :-H_PAD, :] + return x.contiguous() + + +def ulysses_anything_metadata(query: torch.Tensor, **kwargs) -> dict: + # query: (B, S_LOCAL, H_GLOBAL, D) + assert len(query.shape) == 4, "Query tensor must be 4-dimensional of shape (B, S_LOCAL, H_GLOBAL, D)" + extra_kwargs = {} + extra_kwargs["NUM_QO_HEAD"] = query.shape[2] + extra_kwargs["Q_S_LOCAL"] = query.shape[1] + # Add other kwargs if needed in future + return extra_kwargs + + +@maybe_allow_in_graph +def all_to_all_single_any_qkv_async( + x: torch.Tensor, group: dist.ProcessGroup, **kwargs +) -> Callable[..., torch.Tensor]: + r""" + x: torch.Tensor, shape (B, S_LOCAL, H, D) return: Callable that returns (B, S_GLOBAL, H_LOCAL, D) + """ + world_size = dist.get_world_size(group=group) + B, S_LOCAL, H, D = x.shape + x, H_PAD = _maybe_pad_qkv_head(x, H, group) + H_LOCAL = (H + H_PAD) // world_size + # (world_size, S_LOCAL, B, H_LOCAL, D) + x = x.reshape(B, S_LOCAL, world_size, H_LOCAL, D).permute(2, 1, 0, 3, 4).contiguous() + + input_split_sizes = [S_LOCAL] * world_size + # S_LOCAL maybe not equal for all ranks in dynamic shape case, + # since we don't know the actual shape before this timing, thus, + # we have to use all gather to collect the S_LOCAL first. + output_split_sizes = gather_size_by_comm(S_LOCAL, group) + x = x.flatten(0, 1) # (world_size * S_LOCAL, B, H_LOCAL, D) + x = funcol.all_to_all_single(x, output_split_sizes, input_split_sizes, group) + + def wait() -> torch.Tensor: + nonlocal x, H_PAD + x = _wait_tensor(x) # (S_GLOBAL, B, H_LOCAL, D) + # (S_GLOBAL, B, H_LOCAL, D) + # -> (B, S_GLOBAL, H_LOCAL, D) + x = x.permute(1, 0, 2, 3).contiguous() + x = _maybe_unpad_qkv_head(x, H_PAD, group) + return x + + return wait + + +@maybe_allow_in_graph +def all_to_all_single_any_o_async(x: torch.Tensor, group: dist.ProcessGroup, **kwargs) -> Callable[..., torch.Tensor]: + r""" + x: torch.Tensor, shape (B, S_GLOBAL, H_LOCAL, D) return: Callable that returns (B, S_LOCAL, H_GLOBAL, D) + """ + # Assume H is provided in kwargs, since we can't infer H from x's shape. + # The padding logic needs H to determine if padding is necessary. + H = kwargs.get("NUM_QO_HEAD", None) + world_size = dist.get_world_size(group=group) + + x, H_PAD = _maybe_pad_o_head(x, H, group) + shape = x.shape # (B, S_GLOBAL, H_LOCAL, D) + (B, S_GLOBAL, H_LOCAL, D) = shape + + # input_split: e.g, S_GLOBAL=9 input splits across ranks [[5,4], [5,4],..] + # output_split: e.g, S_GLOBAL=9 output splits across ranks [[5,5], [4,4],..] + + # WARN: In some cases, e.g, joint attn in Qwen-Image, the S_LOCAL can not infer + # from tensor split due to: if c = torch.cat((a, b)), world_size=4, then, + # c.tensor_split(4)[0].shape[1] may != to (a.tensor_split(4)[0].shape[1] + + # b.tensor_split(4)[0].shape[1]) + + S_LOCAL = kwargs.get("Q_S_LOCAL") + input_split_sizes = gather_size_by_comm(S_LOCAL, group) + x = x.permute(1, 0, 2, 3).contiguous() # (S_GLOBAL, B, H_LOCAL, D) + output_split_sizes = [S_LOCAL] * world_size + x = funcol.all_to_all_single(x, output_split_sizes, input_split_sizes, group) + + def wait() -> torch.Tensor: + nonlocal x, H_PAD + x = _wait_tensor(x) # (S_GLOBAL, B, H_LOCAL, D) + x = x.reshape(world_size, S_LOCAL, B, H_LOCAL, D) + x = x.permute(2, 1, 0, 3, 4).contiguous() + x = x.reshape(B, S_LOCAL, world_size * H_LOCAL, D) + x = _maybe_unpad_o_head(x, H_PAD, group) + return x + + return wait + + +class TemplatedRingAttention(torch.autograd.Function): + @staticmethod + def forward( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None, + dropout_p: float, + is_causal: bool, + scale: float | None, + enable_gqa: bool, + return_lse: bool, + forward_op, + backward_op, + _parallel_config: "ParallelConfig" | None = None, + ): + ring_mesh = _parallel_config.context_parallel_config._ring_mesh + rank = _parallel_config.context_parallel_config._ring_local_rank + world_size = _parallel_config.context_parallel_config.ring_degree + next_rank = (rank + 1) % world_size + prev_out = prev_lse = None + + ctx.forward_op = forward_op + ctx.backward_op = backward_op + ctx.q_shape = query.shape + ctx.kv_shape = key.shape + ctx._parallel_config = _parallel_config + + kv_buffer = torch.cat([key.flatten(), value.flatten()]).contiguous() + kv_buffer = funcol.all_gather_tensor(kv_buffer, gather_dim=0, group=ring_mesh.get_group()) + kv_buffer = kv_buffer.chunk(world_size) + + for i in range(world_size): + if i > 0: + kv = kv_buffer[next_rank] + key_numel = key.numel() + key = kv[:key_numel].reshape_as(key) + value = kv[key_numel:].reshape_as(value) + next_rank = (next_rank + 1) % world_size + + out, lse = forward_op( + ctx, + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + True, + _save_ctx=i == 0, + _parallel_config=_parallel_config, + ) + + if _parallel_config.context_parallel_config.convert_to_fp32: + out = out.to(torch.float32) + lse = lse.to(torch.float32) + + # Refer to: + # https://github.com/huggingface/diffusers/pull/12693#issuecomment-3627519544 + if is_torch_version("<", "2.9.0"): + lse = lse.unsqueeze(-1) + if prev_out is not None: + out = prev_out - torch.nn.functional.sigmoid(lse - prev_lse) * (prev_out - out) + lse = prev_lse - torch.nn.functional.logsigmoid(prev_lse - lse) + prev_out = out + prev_lse = lse + + out = out.to(query.dtype) + lse = lse.squeeze(-1) + + return (out, lse) if return_lse else out + + @staticmethod + def backward( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + ): + ring_mesh = ctx._parallel_config.context_parallel_config._ring_mesh + rank = ctx._parallel_config.context_parallel_config._ring_local_rank + world_size = ctx._parallel_config.context_parallel_config.ring_degree + next_rank = (rank + 1) % world_size + next_ranks = list(range(1, world_size)) + [0] + + accum_dtype = torch.float32 if ctx._parallel_config.context_parallel_config.convert_to_fp32 else grad_out.dtype + grad_query = torch.zeros(ctx.q_shape, dtype=accum_dtype, device=grad_out.device) + grad_key = torch.zeros(ctx.kv_shape, dtype=accum_dtype, device=grad_out.device) + grad_value = torch.zeros(ctx.kv_shape, dtype=accum_dtype, device=grad_out.device) + next_grad_kv = None + + query, key, value, *_ = ctx.saved_tensors + kv_buffer = torch.cat([key.flatten(), value.flatten()]).contiguous() + kv_buffer = funcol.all_gather_tensor(kv_buffer, gather_dim=0, group=ring_mesh.get_group()) + kv_buffer = kv_buffer.chunk(world_size) + + for i in range(world_size): + if i > 0: + kv = kv_buffer[next_rank] + key_numel = key.numel() + key = kv[:key_numel].reshape_as(key) + value = kv[key_numel:].reshape_as(value) + next_rank = (next_rank + 1) % world_size + + grad_query_op, grad_key_op, grad_value_op, *_ = ctx.backward_op(ctx, grad_out) + + if i > 0: + grad_kv_buffer = _wait_tensor(next_grad_kv) + grad_key_numel = grad_key.numel() + grad_key = grad_kv_buffer[:grad_key_numel].reshape_as(grad_key) + grad_value = grad_kv_buffer[grad_key_numel:].reshape_as(grad_value) + + grad_query += grad_query_op + grad_key += grad_key_op + grad_value += grad_value_op + + if i < world_size - 1: + grad_kv_buffer = torch.cat([grad_key.flatten(), grad_value.flatten()]).contiguous() + next_grad_kv = funcol.permute_tensor(grad_kv_buffer, next_ranks, group=ring_mesh.get_group()) + + grad_query, grad_key, grad_value = (x.to(grad_out.dtype) for x in (grad_query, grad_key, grad_value)) + + return grad_query, grad_key, grad_value, None, None, None, None, None, None, None, None, None + + +class TemplatedUlyssesAttention(torch.autograd.Function): + @staticmethod + def forward( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None, + dropout_p: float, + is_causal: bool, + scale: float | None, + enable_gqa: bool, + return_lse: bool, + forward_op, + backward_op, + _parallel_config: "ParallelConfig" | None = None, + ): + ulysses_mesh = _parallel_config.context_parallel_config._ulysses_mesh + world_size = _parallel_config.context_parallel_config.ulysses_degree + group = ulysses_mesh.get_group() + + ctx.forward_op = forward_op + ctx.backward_op = backward_op + ctx._parallel_config = _parallel_config + + B, S_Q_LOCAL, H, D = query.shape + _, S_KV_LOCAL, _, _ = key.shape + H_LOCAL = H // world_size + query = query.reshape(B, S_Q_LOCAL, world_size, H_LOCAL, D).permute(2, 1, 0, 3, 4).contiguous() + key = key.reshape(B, S_KV_LOCAL, world_size, H_LOCAL, D).permute(2, 1, 0, 3, 4).contiguous() + value = value.reshape(B, S_KV_LOCAL, world_size, H_LOCAL, D).permute(2, 1, 0, 3, 4).contiguous() + query, key, value = (_all_to_all_single(x, group) for x in (query, key, value)) + query, key, value = (x.flatten(0, 1).permute(1, 0, 2, 3).contiguous() for x in (query, key, value)) + + out = forward_op( + ctx, + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + _save_ctx=True, + _parallel_config=_parallel_config, + ) + if return_lse: + out, lse, *_ = out + + out = out.reshape(B, world_size, S_Q_LOCAL, H_LOCAL, D).permute(1, 3, 0, 2, 4).contiguous() + out = _all_to_all_single(out, group) + out = out.flatten(0, 1).permute(1, 2, 0, 3).contiguous() + + if return_lse: + lse = lse.reshape(B, world_size, S_Q_LOCAL, H_LOCAL).permute(1, 3, 0, 2).contiguous() + lse = _all_to_all_single(lse, group) + lse = lse.flatten(0, 1).permute(1, 2, 0).contiguous() + else: + lse = None + + return (out, lse) if return_lse else out + + @staticmethod + def backward( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + ): + ulysses_mesh = ctx._parallel_config.context_parallel_config._ulysses_mesh + world_size = ctx._parallel_config.context_parallel_config.ulysses_degree + group = ulysses_mesh.get_group() + + B, S_LOCAL, H, D = grad_out.shape + H_LOCAL = H // world_size + + grad_out = grad_out.reshape(B, S_LOCAL, world_size, H_LOCAL, D).permute(2, 1, 0, 3, 4).contiguous() + grad_out = _all_to_all_single(grad_out, group) + grad_out = grad_out.flatten(0, 1).permute(1, 0, 2, 3).contiguous() + + grad_query_op, grad_key_op, grad_value_op, *_ = ctx.backward_op(ctx, grad_out) + + grad_query, grad_key, grad_value = ( + x.reshape(B, world_size, S_LOCAL, H_LOCAL, D).permute(1, 3, 0, 2, 4).contiguous() + for x in (grad_query_op, grad_key_op, grad_value_op) + ) + grad_query, grad_key, grad_value = (_all_to_all_single(x, group) for x in (grad_query, grad_key, grad_value)) + grad_query, grad_key, grad_value = ( + x.flatten(0, 1).permute(1, 2, 0, 3).contiguous() for x in (grad_query, grad_key, grad_value) + ) + + return grad_query, grad_key, grad_value, None, None, None, None, None, None, None, None, None + + +class TemplatedUlyssesAnythingAttention(torch.autograd.Function): + @staticmethod + def forward( + ctx: torch.autograd.function.FunctionCtx, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor, + dropout_p: float, + is_causal: bool, + scale: float, + enable_gqa: bool, + return_lse: bool, + forward_op, + backward_op, + _parallel_config: "ParallelConfig" | None = None, + **kwargs, + ): + ulysses_mesh = _parallel_config.context_parallel_config._ulysses_mesh + group = ulysses_mesh.get_group() + + ctx.forward_op = forward_op + ctx.backward_op = backward_op + ctx._parallel_config = _parallel_config + + metadata = ulysses_anything_metadata(query) + query_wait = all_to_all_single_any_qkv_async(query, group, **metadata) + key_wait = all_to_all_single_any_qkv_async(key, group, **metadata) + value_wait = all_to_all_single_any_qkv_async(value, group, **metadata) + + query = query_wait() # type: torch.Tensor + key = key_wait() # type: torch.Tensor + value = value_wait() # type: torch.Tensor + + out = forward_op( + ctx, + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + _save_ctx=False, # ulysses anything only support forward pass now. + _parallel_config=_parallel_config, + ) + if return_lse: + out, lse, *_ = out + + # out: (B, S_Q_GLOBAL, H_LOCAL, D) -> (B, S_Q_LOCAL, H_GLOBAL, D) + out_wait = all_to_all_single_any_o_async(out, group, **metadata) + + if return_lse: + # lse: (B, S_Q_GLOBAL, H_LOCAL) + lse = lse.unsqueeze(-1) # (B, S_Q_GLOBAL, H_LOCAL, D=1) + lse_wait = all_to_all_single_any_o_async(lse, group, **metadata) + out = out_wait() # type: torch.Tensor + lse = lse_wait() # type: torch.Tensor + lse = lse.squeeze(-1).contiguous() # (B, S_Q_LOCAL, H_GLOBAL) + else: + out = out_wait() # type: torch.Tensor + lse = None + + return (out, lse) if return_lse else out + + @staticmethod + def backward( + ctx: torch.autograd.function.FunctionCtx, + grad_out: torch.Tensor, + *args, + ): + raise NotImplementedError("Backward pass for Ulysses Anything Attention in diffusers is not implemented yet.") + + +def _templated_unified_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor, + dropout_p: float, + is_causal: bool, + scale: float, + enable_gqa: bool, + return_lse: bool, + forward_op, + backward_op, + _parallel_config: "ParallelConfig" | None = None, + scatter_idx: int = 2, + gather_idx: int = 1, +): + """ + Unified Sequence Parallelism attention combining Ulysses and ring attention. See: https://arxiv.org/abs/2405.07719 + """ + ulysses_mesh = _parallel_config.context_parallel_config._ulysses_mesh + ulysses_group = ulysses_mesh.get_group() + + query = SeqAllToAllDim.apply(ulysses_group, query, scatter_idx, gather_idx) + key = SeqAllToAllDim.apply(ulysses_group, key, scatter_idx, gather_idx) + value = SeqAllToAllDim.apply(ulysses_group, value, scatter_idx, gather_idx) + out = TemplatedRingAttention.apply( + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + forward_op, + backward_op, + _parallel_config, + ) + if return_lse: + context_layer, lse, *_ = out + else: + context_layer = out + # context_layer is of shape (B, S, H_LOCAL, D) + output = SeqAllToAllDim.apply( + ulysses_group, + context_layer, + gather_idx, + scatter_idx, + ) + if return_lse: + # lse is of shape (B, S, H_LOCAL, 1) + # Refer to: + # https://github.com/huggingface/diffusers/pull/12693#issuecomment-3627519544 + if is_torch_version("<", "2.9.0"): + lse = lse.unsqueeze(-1) # (B, S, H_LOCAL, 1) + lse = SeqAllToAllDim.apply(ulysses_group, lse, gather_idx, scatter_idx) + lse = lse.squeeze(-1) + return (output, lse) + return output + + +def _templated_context_parallel_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + *, + forward_op, + backward_op, + _parallel_config: "ParallelConfig" | None = None, +): + if is_causal: + raise ValueError("Causal attention is not yet supported for templated attention.") + if enable_gqa: + raise ValueError("GQA is not yet supported for templated attention.") + + # TODO: add support for unified attention with ring/ulysses degree both being > 1 + if ( + _parallel_config.context_parallel_config.ring_degree > 1 + and _parallel_config.context_parallel_config.ulysses_degree > 1 + ): + return _templated_unified_attention( + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + forward_op, + backward_op, + _parallel_config, + ) + elif _parallel_config.context_parallel_config.ring_degree > 1: + return TemplatedRingAttention.apply( + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + forward_op, + backward_op, + _parallel_config, + ) + elif _parallel_config.context_parallel_config.ulysses_degree > 1: + if _parallel_config.context_parallel_config.ulysses_anything: + # For Any sequence lengths and Any head num support + return TemplatedUlyssesAnythingAttention.apply( + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + forward_op, + backward_op, + _parallel_config, + ) + else: + return TemplatedUlyssesAttention.apply( + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + forward_op, + backward_op, + _parallel_config, + ) + else: + raise ValueError("Reaching this branch of code is unexpected. Please report a bug.") + + +# ===== Attention backends ===== + + +@_AttentionBackendRegistry.register( + AttentionBackendName.FLASH, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=True, +) +def _flash_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + lse = None + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for flash-attn 2.") + + if _parallel_config is None: + out = flash_attn_func( + q=query, + k=key, + v=value, + dropout_p=dropout_p, + softmax_scale=scale, + causal=is_causal, + return_attn_probs=return_lse, + ) + if return_lse: + out, lse, *_ = out + else: + out = _templated_context_parallel_attention( + query, + key, + value, + None, + dropout_p, + is_causal, + scale, + False, + return_lse, + forward_op=_flash_attention_forward_op, + backward_op=_flash_attention_backward_op, + _parallel_config=_parallel_config, + ) + if return_lse: + out, lse = out + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName.FLASH_HUB, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=True, +) +def _flash_attention_hub( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + lse = None + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for flash-attn 2.") + + func = _HUB_KERNELS_REGISTRY[AttentionBackendName.FLASH_HUB].kernel_fn + if _parallel_config is None: + out = func( + q=query, + k=key, + v=value, + dropout_p=dropout_p, + softmax_scale=scale, + causal=is_causal, + return_attn_probs=return_lse, + ) + if return_lse: + out, lse, *_ = out + else: + out = _templated_context_parallel_attention( + query, + key, + value, + None, + dropout_p, + is_causal, + scale, + False, + return_lse, + forward_op=_flash_attention_hub_forward_op, + backward_op=_flash_attention_hub_backward_op, + _parallel_config=_parallel_config, + ) + if return_lse: + out, lse = out + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName.FLASH_VARLEN_HUB, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=False, +) +def _flash_varlen_attention_hub( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + scale: float | None = None, + is_causal: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + batch_size, seq_len_q, _, _ = query.shape + _, seq_len_kv, _, _ = key.shape + + if attn_mask is not None: + attn_mask = _normalize_attn_mask(attn_mask, batch_size, seq_len_kv) + + (_, seqlens_k), (cu_seqlens_q, cu_seqlens_k), (max_seqlen_q, max_seqlen_k) = ( + _prepare_for_flash_attn_or_sage_varlen( + batch_size, seq_len_q, seq_len_kv, attn_mask=attn_mask, device=query.device + ) + ) + + key_valid, value_valid = [], [] + for b in range(batch_size): + valid_len = seqlens_k[b] + key_valid.append(key[b, :valid_len]) + value_valid.append(value[b, :valid_len]) + + query_packed = query.flatten(0, 1) + key_packed = torch.cat(key_valid, dim=0) + value_packed = torch.cat(value_valid, dim=0) + + func = _HUB_KERNELS_REGISTRY[AttentionBackendName.FLASH_VARLEN_HUB].kernel_fn + out = func( + q=query_packed, + k=key_packed, + v=value_packed, + cu_seqlens_q=cu_seqlens_q, + cu_seqlens_k=cu_seqlens_k, + max_seqlen_q=max_seqlen_q, + max_seqlen_k=max_seqlen_k, + dropout_p=dropout_p, + softmax_scale=scale, + causal=is_causal, + return_attn_probs=return_lse, + ) + out = out.unflatten(0, (batch_size, -1)) + + return out + + +@_AttentionBackendRegistry.register( + AttentionBackendName.FLASH_VARLEN, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], +) +def _flash_varlen_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + scale: float | None = None, + is_causal: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + batch_size, seq_len_q, _, _ = query.shape + _, seq_len_kv, _, _ = key.shape + + if attn_mask is not None: + attn_mask = _normalize_attn_mask(attn_mask, batch_size, seq_len_kv) + + (_, seqlens_k), (cu_seqlens_q, cu_seqlens_k), (max_seqlen_q, max_seqlen_k) = ( + _prepare_for_flash_attn_or_sage_varlen( + batch_size, seq_len_q, seq_len_kv, attn_mask=attn_mask, device=query.device + ) + ) + + key_valid, value_valid = [], [] + for b in range(batch_size): + valid_len = seqlens_k[b] + key_valid.append(key[b, :valid_len]) + value_valid.append(value[b, :valid_len]) + + query_packed = query.flatten(0, 1) + key_packed = torch.cat(key_valid, dim=0) + value_packed = torch.cat(value_valid, dim=0) + + out = flash_attn_varlen_func( + q=query_packed, + k=key_packed, + v=value_packed, + cu_seqlens_q=cu_seqlens_q, + cu_seqlens_k=cu_seqlens_k, + max_seqlen_q=max_seqlen_q, + max_seqlen_k=max_seqlen_k, + dropout_p=dropout_p, + softmax_scale=scale, + causal=is_causal, + return_attn_probs=return_lse, + ) + out = out.unflatten(0, (batch_size, -1)) + + return out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._FLASH_3, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], +) +def _flash_attention_3( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + scale: float | None = None, + is_causal: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for flash-attn 3.") + + out, lse = _wrapped_flash_attn_3( + q=query, + k=key, + v=value, + softmax_scale=scale, + causal=is_causal, + ) + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._FLASH_3_HUB, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=True, +) +def _flash_attention_3_hub( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + scale: float | None = None, + is_causal: bool = False, + window_size: tuple[int, int] = (-1, -1), + softcap: float = 0.0, + deterministic: bool = False, + return_attn_probs: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for flash-attn 3.") + + func = _HUB_KERNELS_REGISTRY[AttentionBackendName._FLASH_3_HUB].kernel_fn + if _parallel_config is None: + out = func( + q=query, + k=key, + v=value, + softmax_scale=scale, + causal=is_causal, + qv=None, + q_descale=None, + k_descale=None, + v_descale=None, + window_size=window_size, + softcap=softcap, + num_splits=1, + pack_gqa=None, + deterministic=deterministic, + sm_margin=0, + return_attn_probs=return_attn_probs, + ) + return (out[0], out[1]) if return_attn_probs else out + + forward_op = functools.partial( + _flash_attention_3_hub_forward_op, + window_size=window_size, + softcap=softcap, + num_splits=1, + pack_gqa=None, + deterministic=deterministic, + sm_margin=0, + ) + backward_op = functools.partial( + _flash_attention_3_hub_backward_op, + window_size=window_size, + softcap=softcap, + num_splits=1, + pack_gqa=None, + deterministic=deterministic, + sm_margin=0, + ) + out = _templated_context_parallel_attention( + query, + key, + value, + None, + 0.0, + is_causal, + scale, + False, + return_attn_probs, + forward_op=forward_op, + backward_op=backward_op, + _parallel_config=_parallel_config, + ) + if return_attn_probs: + out, lse = out + return out, lse + + return out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._FLASH_3_VARLEN_HUB, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=False, +) +def _flash_attention_3_varlen_hub( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + scale: float | None = None, + is_causal: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + batch_size, seq_len_q, _, _ = query.shape + _, seq_len_kv, _, _ = key.shape + + if attn_mask is not None: + attn_mask = _normalize_attn_mask(attn_mask, batch_size, seq_len_kv) + + (_, seqlens_k), (cu_seqlens_q, cu_seqlens_k), (max_seqlen_q, max_seqlen_k) = ( + _prepare_for_flash_attn_or_sage_varlen( + batch_size, seq_len_q, seq_len_kv, attn_mask=attn_mask, device=query.device + ) + ) + + key_valid, value_valid = [], [] + for b in range(batch_size): + valid_len = seqlens_k[b] + key_valid.append(key[b, :valid_len]) + value_valid.append(value[b, :valid_len]) + + query_packed = query.flatten(0, 1) + key_packed = torch.cat(key_valid, dim=0) + value_packed = torch.cat(value_valid, dim=0) + + func = _HUB_KERNELS_REGISTRY[AttentionBackendName._FLASH_3_VARLEN_HUB].kernel_fn + out, lse, *_ = func( + q=query_packed, + k=key_packed, + v=value_packed, + cu_seqlens_q=cu_seqlens_q, + cu_seqlens_k=cu_seqlens_k, + max_seqlen_q=max_seqlen_q, + max_seqlen_k=max_seqlen_k, + softmax_scale=scale, + causal=is_causal, + ) + out = out.unflatten(0, (batch_size, -1)) + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._FLASH_VARLEN_3, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], +) +def _flash_varlen_attention_3( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + scale: float | None = None, + is_causal: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + batch_size, seq_len_q, _, _ = query.shape + _, seq_len_kv, _, _ = key.shape + + if attn_mask is not None: + attn_mask = _normalize_attn_mask(attn_mask, batch_size, seq_len_kv) + + (_, seqlens_k), (cu_seqlens_q, cu_seqlens_k), (max_seqlen_q, max_seqlen_k) = ( + _prepare_for_flash_attn_or_sage_varlen( + batch_size, seq_len_q, seq_len_kv, attn_mask=attn_mask, device=query.device + ) + ) + + key_valid, value_valid = [], [] + for b in range(batch_size): + valid_len = seqlens_k[b] + key_valid.append(key[b, :valid_len]) + value_valid.append(value[b, :valid_len]) + + query_packed = query.flatten(0, 1) + key_packed = torch.cat(key_valid, dim=0) + value_packed = torch.cat(value_valid, dim=0) + + result = flash_attn_3_varlen_func( + q=query_packed, + k=key_packed, + v=value_packed, + cu_seqlens_q=cu_seqlens_q, + cu_seqlens_k=cu_seqlens_k, + max_seqlen_q=max_seqlen_q, + max_seqlen_k=max_seqlen_k, + softmax_scale=scale, + causal=is_causal, + return_attn_probs=return_lse, + ) + if isinstance(result, tuple): + out, lse, *_ = result + else: + out = result + lse = None + out = out.unflatten(0, (batch_size, -1)) + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName.AITER, + constraints=[_check_device_cuda, _check_qkv_dtype_bf16_or_fp16, _check_shape], +) +def _aiter_flash_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for aiter attention") + + if not return_lse and torch.is_grad_enabled(): + # aiter requires return_lse=True by assertion when gradients are enabled. + out, lse, *_ = aiter_flash_attn_func( + q=query, + k=key, + v=value, + dropout_p=dropout_p, + softmax_scale=scale, + causal=is_causal, + return_lse=True, + ) + else: + out = aiter_flash_attn_func( + q=query, + k=key, + v=value, + dropout_p=dropout_p, + softmax_scale=scale, + causal=is_causal, + return_lse=return_lse, + ) + if return_lse: + out, lse, *_ = out + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName.FLEX, + constraints=[_check_attn_mask_or_causal, _check_device, _check_shape], +) +def _native_flex_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | "flex_attention.BlockMask" | None = None, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + # TODO: should we LRU cache the block mask creation? + score_mod = None + block_mask = None + batch_size, seq_len_q, num_heads, _ = query.shape + _, seq_len_kv, _, _ = key.shape + + if attn_mask is None or isinstance(attn_mask, flex_attention.BlockMask): + block_mask = attn_mask + elif is_causal: + block_mask = flex_attention.create_block_mask( + _flex_attention_causal_mask_mod, batch_size, num_heads, seq_len_q, seq_len_kv, query.device + ) + elif torch.is_tensor(attn_mask): + if attn_mask.ndim == 2: + attn_mask = attn_mask.view(attn_mask.size(0), 1, attn_mask.size(1), 1) + + attn_mask = attn_mask.expand(batch_size, num_heads, seq_len_q, seq_len_kv) + + if attn_mask.dtype == torch.bool: + # TODO: this probably does not work but verify! + def mask_mod(batch_idx, head_idx, q_idx, kv_idx): + return attn_mask[batch_idx, head_idx, q_idx, kv_idx] + + block_mask = flex_attention.create_block_mask( + mask_mod, batch_size, None, seq_len_q, seq_len_kv, query.device + ) + else: + + def score_mod(score, batch_idx, head_idx, q_idx, kv_idx): + return score + attn_mask[batch_idx, head_idx, q_idx, kv_idx] + else: + raise ValueError("Attention mask must be either None, a BlockMask, or a 2D/4D tensor.") + + query, key, value = (x.permute(0, 2, 1, 3) for x in (query, key, value)) + out = flex_attention.flex_attention( + query=query, + key=key, + value=value, + score_mod=score_mod, + block_mask=block_mask, + scale=scale, + enable_gqa=enable_gqa, + return_lse=return_lse, + ) + out = out.permute(0, 2, 1, 3) + return out + + +def _prepare_additive_attn_mask( + attn_mask: torch.Tensor, target_dtype: torch.dtype, reshape_4d: bool = True +) -> torch.Tensor: + """ + Convert a 2D attention mask to an additive mask, optionally reshaping to 4D for SDPA. + + This helper is used by both native SDPA and xformers backends to handle both boolean and additive masks. + + Args: + attn_mask: 2D tensor [batch_size, seq_len_k] + - Boolean: True means attend, False means mask out + - Additive: 0.0 means attend, -inf means mask out + target_dtype: The dtype to convert the mask to (usually query.dtype) + reshape_4d: If True, reshape from [batch_size, seq_len_k] to [batch_size, 1, 1, seq_len_k] for broadcasting + + Returns: + Additive mask tensor where 0.0 means attend and -inf means mask out. Shape is [batch_size, seq_len_k] if + reshape_4d=False, or [batch_size, 1, 1, seq_len_k] if reshape_4d=True. + """ + # Check if the mask is boolean or already additive + if attn_mask.dtype == torch.bool: + # Convert boolean to additive: True -> 0.0, False -> -inf + attn_mask = torch.where(attn_mask, 0.0, float("-inf")) + # Convert to target dtype + attn_mask = attn_mask.to(dtype=target_dtype) + else: + # Already additive mask - just ensure correct dtype + attn_mask = attn_mask.to(dtype=target_dtype) + + # Optionally reshape to 4D for broadcasting in attention mechanisms + if reshape_4d: + batch_size, seq_len_k = attn_mask.shape + attn_mask = attn_mask.view(batch_size, 1, 1, seq_len_k) + + return attn_mask + + +@_AttentionBackendRegistry.register( + AttentionBackendName.NATIVE, + constraints=[_check_device, _check_shape], + supports_context_parallel=True, +) +def _native_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if return_lse: + raise ValueError("Native attention backend does not support setting `return_lse=True`.") + + # Reshape 2D mask to 4D for SDPA + # SDPA accepts both boolean masks (torch.bool) and additive masks (float) + if ( + attn_mask is not None + and attn_mask.ndim == 2 + and attn_mask.shape[0] == query.shape[0] + and attn_mask.shape[1] == key.shape[1] + ): + # Just reshape [batch_size, seq_len_k] -> [batch_size, 1, 1, seq_len_k] + # SDPA handles both boolean and additive masks correctly + attn_mask = attn_mask.unsqueeze(1).unsqueeze(1) + + if _parallel_config is None: + query, key, value = (x.permute(0, 2, 1, 3) for x in (query, key, value)) + out = torch.nn.functional.scaled_dot_product_attention( + query=query, + key=key, + value=value, + attn_mask=attn_mask, + dropout_p=dropout_p, + is_causal=is_causal, + scale=scale, + enable_gqa=enable_gqa, + ) + out = out.permute(0, 2, 1, 3) + else: + out = _templated_context_parallel_attention( + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + forward_op=_native_attention_forward_op, + backward_op=_native_attention_backward_op, + _parallel_config=_parallel_config, + ) + + return out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._NATIVE_CUDNN, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=True, +) +def _native_cudnn_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + lse = None + if _parallel_config is None and not return_lse: + query, key, value = (x.permute(0, 2, 1, 3).contiguous() for x in (query, key, value)) + with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.CUDNN_ATTENTION): + out = torch.nn.functional.scaled_dot_product_attention( + query=query, + key=key, + value=value, + attn_mask=attn_mask, + dropout_p=dropout_p, + is_causal=is_causal, + scale=scale, + enable_gqa=enable_gqa, + ) + out = out.permute(0, 2, 1, 3) + else: + out = _templated_context_parallel_attention( + query, + key, + value, + attn_mask, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + forward_op=_cudnn_attention_forward_op, + backward_op=_cudnn_attention_backward_op, + _parallel_config=_parallel_config, + ) + if return_lse: + out, lse = out + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._NATIVE_EFFICIENT, + constraints=[_check_device, _check_shape], +) +def _native_efficient_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if return_lse: + raise ValueError("Native efficient attention backend does not support setting `return_lse=True`.") + query, key, value = (x.permute(0, 2, 1, 3) for x in (query, key, value)) + with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.EFFICIENT_ATTENTION): + out = torch.nn.functional.scaled_dot_product_attention( + query=query, + key=key, + value=value, + attn_mask=attn_mask, + dropout_p=dropout_p, + is_causal=is_causal, + scale=scale, + enable_gqa=enable_gqa, + ) + out = out.permute(0, 2, 1, 3) + return out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._NATIVE_FLASH, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=True, +) +def _native_flash_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for aiter attention") + + lse = None + if _parallel_config is None and not return_lse: + query, key, value = (x.permute(0, 2, 1, 3) for x in (query, key, value)) + with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.FLASH_ATTENTION): + out = torch.nn.functional.scaled_dot_product_attention( + query=query, + key=key, + value=value, + attn_mask=None, # not supported + dropout_p=dropout_p, + is_causal=is_causal, + scale=scale, + enable_gqa=enable_gqa, + ) + out = out.permute(0, 2, 1, 3) + else: + out = _templated_context_parallel_attention( + query, + key, + value, + None, + dropout_p, + is_causal, + scale, + enable_gqa, + return_lse, + forward_op=_native_flash_attention_forward_op, + backward_op=_native_flash_attention_backward_op, + _parallel_config=_parallel_config, + ) + if return_lse: + out, lse = out + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._NATIVE_MATH, + constraints=[_check_device, _check_shape], +) +def _native_math_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if return_lse: + raise ValueError("Native math attention backend does not support setting `return_lse=True`.") + query, key, value = (x.permute(0, 2, 1, 3) for x in (query, key, value)) + with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH): + out = torch.nn.functional.scaled_dot_product_attention( + query=query, + key=key, + value=value, + attn_mask=attn_mask, + dropout_p=dropout_p, + is_causal=is_causal, + scale=scale, + enable_gqa=enable_gqa, + ) + out = out.permute(0, 2, 1, 3) + return out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._NATIVE_NPU, + constraints=[_check_device, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=True, +) +def _native_npu_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if return_lse: + raise ValueError("NPU attention backend does not support setting `return_lse=True`.") + if _parallel_config is None: + attn_mask = _maybe_modify_attn_mask_npu(query, key, attn_mask) + + out = npu_fusion_attention( + query, + key, + value, + query.size(2), # num_heads + atten_mask=attn_mask, + input_layout="BSND", + pse=None, + scale=1.0 / math.sqrt(query.shape[-1]) if scale is None else scale, + pre_tockens=65536, + next_tockens=65536, + keep_prob=1.0 - dropout_p, + sync=False, + inner_precise=0, + )[0] + else: + out = _templated_context_parallel_attention( + query, + key, + value, + attn_mask, + dropout_p, + None, + scale, + None, + return_lse, + forward_op=_npu_attention_forward_op, + backward_op=_npu_attention_backward_op, + _parallel_config=_parallel_config, + ) + return out + + +# Reference: https://github.com/pytorch/xla/blob/06c5533de6588f6b90aa1655d9850bcf733b90b4/torch_xla/experimental/custom_kernel.py#L853 +@_AttentionBackendRegistry.register( + AttentionBackendName._NATIVE_XLA, + constraints=[_check_device, _check_shape], +) +def _native_xla_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + is_causal: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for XLA attention") + if return_lse: + raise ValueError("XLA attention backend does not support setting `return_lse=True`.") + query, key, value = (x.permute(0, 2, 1, 3) for x in (query, key, value)) + query = query / math.sqrt(query.shape[-1]) + out = xla_flash_attention( + q=query, + k=key, + v=value, + causal=is_causal, + ) + out = out.permute(0, 2, 1, 3) + return out + + +@_AttentionBackendRegistry.register( + AttentionBackendName.SAGE, + constraints=[_check_device_cuda, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=True, +) +def _sage_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for sage attention") + lse = None + if _parallel_config is None: + out = sageattn( + q=query, + k=key, + v=value, + tensor_layout="NHD", + is_causal=is_causal, + sm_scale=scale, + return_lse=return_lse, + ) + if return_lse: + out, lse, *_ = out + else: + out = _templated_context_parallel_attention( + query, + key, + value, + None, + 0.0, + is_causal, + scale, + False, + return_lse, + forward_op=_sage_attention_forward_op, + backward_op=_sage_attention_backward_op, + _parallel_config=_parallel_config, + ) + if return_lse: + out, lse = out + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName.SAGE_HUB, + constraints=[_check_device_cuda, _check_qkv_dtype_bf16_or_fp16, _check_shape], + supports_context_parallel=True, +) +def _sage_attention_hub( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for sage attention") + lse = None + func = _HUB_KERNELS_REGISTRY[AttentionBackendName.SAGE_HUB].kernel_fn + if _parallel_config is None: + out = func( + q=query, + k=key, + v=value, + tensor_layout="NHD", + is_causal=is_causal, + sm_scale=scale, + return_lse=return_lse, + ) + if return_lse: + out, lse, *_ = out + else: + out = _templated_context_parallel_attention( + query, + key, + value, + None, + 0.0, + is_causal, + scale, + False, + return_lse, + forward_op=_sage_attention_hub_forward_op, + backward_op=_sage_attention_backward_op, + _parallel_config=_parallel_config, + ) + if return_lse: + out, lse = out + + return (out, lse) if return_lse else out + + +@_AttentionBackendRegistry.register( + AttentionBackendName.SAGE_VARLEN, + constraints=[_check_device_cuda, _check_qkv_dtype_bf16_or_fp16, _check_shape], +) +def _sage_varlen_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if return_lse: + raise ValueError("Sage varlen backend does not support setting `return_lse=True`.") + + batch_size, seq_len_q, _, _ = query.shape + _, seq_len_kv, _, _ = key.shape + + if attn_mask is not None: + attn_mask = _normalize_attn_mask(attn_mask, batch_size, seq_len_kv) + + (_, seqlens_k), (cu_seqlens_q, cu_seqlens_k), (max_seqlen_q, max_seqlen_k) = ( + _prepare_for_flash_attn_or_sage_varlen( + batch_size, seq_len_q, seq_len_kv, attn_mask=attn_mask, device=query.device + ) + ) + + key_valid, value_valid = [], [] + for b in range(batch_size): + valid_len = seqlens_k[b] + key_valid.append(key[b, :valid_len]) + value_valid.append(value[b, :valid_len]) + + query_packed = query.flatten(0, 1) + key_packed = torch.cat(key_valid, dim=0) + value_packed = torch.cat(value_valid, dim=0) + + out = sageattn_varlen( + q=query_packed, + k=key_packed, + v=value_packed, + cu_seqlens_q=cu_seqlens_q, + cu_seqlens_k=cu_seqlens_k, + max_seqlen_q=max_seqlen_q, + max_seqlen_k=max_seqlen_k, + is_causal=is_causal, + sm_scale=scale, + ) + out = out.unflatten(0, (batch_size, -1)) + + return out + + +@_AttentionBackendRegistry.register( + AttentionBackendName._SAGE_QK_INT8_PV_FP8_CUDA, + constraints=[_check_device_cuda_atleast_smXY(9, 0), _check_shape], +) +def _sage_qk_int8_pv_fp8_cuda_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for sage attention") + return sageattn_qk_int8_pv_fp8_cuda( + q=query, + k=key, + v=value, + tensor_layout="NHD", + is_causal=is_causal, + sm_scale=scale, + return_lse=return_lse, + ) + + +@_AttentionBackendRegistry.register( + AttentionBackendName._SAGE_QK_INT8_PV_FP8_CUDA_SM90, + constraints=[_check_device_cuda_atleast_smXY(9, 0), _check_shape], +) +def _sage_qk_int8_pv_fp8_cuda_sm90_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for sage attention") + return sageattn_qk_int8_pv_fp8_cuda_sm90( + q=query, + k=key, + v=value, + tensor_layout="NHD", + is_causal=is_causal, + sm_scale=scale, + return_lse=return_lse, + ) + + +@_AttentionBackendRegistry.register( + AttentionBackendName._SAGE_QK_INT8_PV_FP16_CUDA, + constraints=[_check_device_cuda_atleast_smXY(8, 0), _check_shape], +) +def _sage_qk_int8_pv_fp16_cuda_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for sage attention") + return sageattn_qk_int8_pv_fp16_cuda( + q=query, + k=key, + v=value, + tensor_layout="NHD", + is_causal=is_causal, + sm_scale=scale, + return_lse=return_lse, + ) + + +@_AttentionBackendRegistry.register( + AttentionBackendName._SAGE_QK_INT8_PV_FP16_TRITON, + constraints=[_check_device_cuda_atleast_smXY(8, 0), _check_shape], +) +def _sage_qk_int8_pv_fp16_triton_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + is_causal: bool = False, + scale: float | None = None, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if attn_mask is not None: + raise ValueError("`attn_mask` is not supported for sage attention") + return sageattn_qk_int8_pv_fp16_triton( + q=query, + k=key, + v=value, + tensor_layout="NHD", + is_causal=is_causal, + sm_scale=scale, + return_lse=return_lse, + ) + + +@_AttentionBackendRegistry.register( + AttentionBackendName.XFORMERS, + constraints=[_check_attn_mask_or_causal, _check_device, _check_shape], +) +def _xformers_attention( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attn_mask: torch.Tensor | None = None, + dropout_p: float = 0.0, + is_causal: bool = False, + scale: float | None = None, + enable_gqa: bool = False, + return_lse: bool = False, + _parallel_config: "ParallelConfig" | None = None, +) -> torch.Tensor: + if return_lse: + raise ValueError("xformers attention backend does not support setting `return_lse=True`.") + + batch_size, seq_len_q, num_heads_q, _ = query.shape + _, seq_len_kv, num_heads_kv, _ = key.shape + + if is_causal: + attn_mask = xops.LowerTriangularMask() + elif attn_mask is not None: + if attn_mask.ndim == 2: + # Convert 2D mask to 4D for xformers + # Mask can be boolean (True=attend, False=mask) or additive (0.0=attend, -inf=mask) + # xformers requires 4D additive masks [batch, heads, seq_q, seq_k] + # Need memory alignment - create larger tensor and slice for alignment + original_seq_len = attn_mask.size(1) + aligned_seq_len = ((original_seq_len + 7) // 8) * 8 # Round up to multiple of 8 + + # Create aligned 4D tensor and slice to ensure proper memory layout + aligned_mask = torch.zeros( + (batch_size, num_heads_q, seq_len_q, aligned_seq_len), + dtype=query.dtype, + device=query.device, + ) + # Convert to 4D additive mask (handles both boolean and additive inputs) + mask_additive = _prepare_additive_attn_mask( + attn_mask, target_dtype=query.dtype + ) # [batch, 1, 1, seq_len_k] + # Broadcast to [batch, heads, seq_q, seq_len_k] + aligned_mask[:, :, :, :original_seq_len] = mask_additive + # Mask out the padding (already -inf from zeros -> where with default) + aligned_mask[:, :, :, original_seq_len:] = float("-inf") + + # Slice to actual size with proper alignment + attn_mask = aligned_mask[:, :, :, :seq_len_kv] + elif attn_mask.ndim != 4: + raise ValueError("Only 2D and 4D attention masks are supported for xformers attention.") + elif attn_mask.ndim == 4: + attn_mask = attn_mask.expand(batch_size, num_heads_q, seq_len_q, seq_len_kv).type_as(query) + + if enable_gqa: + if num_heads_q % num_heads_kv != 0: + raise ValueError("Number of heads in query must be divisible by number of heads in key/value.") + num_heads_per_group = num_heads_q // num_heads_kv + query = query.unflatten(2, (num_heads_kv, -1)) + key = key.unflatten(2, (num_heads_kv, -1)).expand(-1, -1, -1, num_heads_per_group, -1) + value = value.unflatten(2, (num_heads_kv, -1)).expand(-1, -1, -1, num_heads_per_group, -1) + + out = xops.memory_efficient_attention(query, key, value, attn_mask, dropout_p, scale) + + if enable_gqa: + out = out.flatten(2, 3) + + return out diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..1bde62e5c6665653f1803ecc0b45e9e227ca8743 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_flax.py @@ -0,0 +1,524 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import math + +import flax.linen as nn +import jax +import jax.numpy as jnp + +from ..utils import logging + + +logger = logging.get_logger(__name__) + + +def _query_chunk_attention(query, key, value, precision, key_chunk_size: int = 4096): + """Multi-head dot product attention with a limited number of queries.""" + num_kv, num_heads, k_features = key.shape[-3:] + v_features = value.shape[-1] + key_chunk_size = min(key_chunk_size, num_kv) + query = query / jnp.sqrt(k_features) + + @functools.partial(jax.checkpoint, prevent_cse=False) + def summarize_chunk(query, key, value): + attn_weights = jnp.einsum("...qhd,...khd->...qhk", query, key, precision=precision) + + max_score = jnp.max(attn_weights, axis=-1, keepdims=True) + max_score = jax.lax.stop_gradient(max_score) + exp_weights = jnp.exp(attn_weights - max_score) + + exp_values = jnp.einsum("...vhf,...qhv->...qhf", value, exp_weights, precision=precision) + max_score = jnp.einsum("...qhk->...qh", max_score) + + return (exp_values, exp_weights.sum(axis=-1), max_score) + + def chunk_scanner(chunk_idx): + # julienne key array + key_chunk = jax.lax.dynamic_slice( + operand=key, + start_indices=[0] * (key.ndim - 3) + [chunk_idx, 0, 0], # [...,k,h,d] + slice_sizes=list(key.shape[:-3]) + [key_chunk_size, num_heads, k_features], # [...,k,h,d] + ) + + # julienne value array + value_chunk = jax.lax.dynamic_slice( + operand=value, + start_indices=[0] * (value.ndim - 3) + [chunk_idx, 0, 0], # [...,v,h,d] + slice_sizes=list(value.shape[:-3]) + [key_chunk_size, num_heads, v_features], # [...,v,h,d] + ) + + return summarize_chunk(query, key_chunk, value_chunk) + + chunk_values, chunk_weights, chunk_max = jax.lax.map(f=chunk_scanner, xs=jnp.arange(0, num_kv, key_chunk_size)) + + global_max = jnp.max(chunk_max, axis=0, keepdims=True) + max_diffs = jnp.exp(chunk_max - global_max) + + chunk_values *= jnp.expand_dims(max_diffs, axis=-1) + chunk_weights *= max_diffs + + all_values = chunk_values.sum(axis=0) + all_weights = jnp.expand_dims(chunk_weights, -1).sum(axis=0) + + return all_values / all_weights + + +def jax_memory_efficient_attention( + query, key, value, precision=jax.lax.Precision.HIGHEST, query_chunk_size: int = 1024, key_chunk_size: int = 4096 +): + r""" + Flax Memory-efficient multi-head dot product attention. https://huggingface.co/papers/2112.05682v2 + https://github.com/AminRezaei0x443/memory-efficient-attention + + Args: + query (`jnp.ndarray`): (batch..., query_length, head, query_key_depth_per_head) + key (`jnp.ndarray`): (batch..., key_value_length, head, query_key_depth_per_head) + value (`jnp.ndarray`): (batch..., key_value_length, head, value_depth_per_head) + precision (`jax.lax.Precision`, *optional*, defaults to `jax.lax.Precision.HIGHEST`): + numerical precision for computation + query_chunk_size (`int`, *optional*, defaults to 1024): + chunk size to divide query array value must divide query_length equally without remainder + key_chunk_size (`int`, *optional*, defaults to 4096): + chunk size to divide key and value array value must divide key_value_length equally without remainder + + Returns: + (`jnp.ndarray`) with shape of (batch..., query_length, head, value_depth_per_head) + """ + num_q, num_heads, q_features = query.shape[-3:] + + def chunk_scanner(chunk_idx, _): + # julienne query array + query_chunk = jax.lax.dynamic_slice( + operand=query, + start_indices=([0] * (query.ndim - 3)) + [chunk_idx, 0, 0], # [...,q,h,d] + slice_sizes=list(query.shape[:-3]) + [min(query_chunk_size, num_q), num_heads, q_features], # [...,q,h,d] + ) + + return ( + chunk_idx + query_chunk_size, # unused ignore it + _query_chunk_attention( + query=query_chunk, key=key, value=value, precision=precision, key_chunk_size=key_chunk_size + ), + ) + + _, res = jax.lax.scan( + f=chunk_scanner, + init=0, + xs=None, + length=math.ceil(num_q / query_chunk_size), # start counter # stop counter + ) + + return jnp.concatenate(res, axis=-3) # fuse the chunked result back + + +class FlaxAttention(nn.Module): + r""" + A Flax multi-head attention module as described in: https://huggingface.co/papers/1706.03762 + + Parameters: + query_dim (:obj:`int`): + Input hidden states dimension + heads (:obj:`int`, *optional*, defaults to 8): + Number of heads + dim_head (:obj:`int`, *optional*, defaults to 64): + Hidden states dimension inside each head + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + use_memory_efficient_attention (`bool`, *optional*, defaults to `False`): + enable memory efficient attention https://huggingface.co/papers/2112.05682 + split_head_dim (`bool`, *optional*, defaults to `False`): + Whether to split the head dimension into a new axis for the self-attention computation. In most cases, + enabling this flag should speed up the computation for Stable Diffusion 2.x and Stable Diffusion XL. + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + + """ + + query_dim: int + heads: int = 8 + dim_head: int = 64 + dropout: float = 0.0 + use_memory_efficient_attention: bool = False + split_head_dim: bool = False + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + inner_dim = self.dim_head * self.heads + self.scale = self.dim_head**-0.5 + + # Weights were exported with old names {to_q, to_k, to_v, to_out} + self.query = nn.Dense(inner_dim, use_bias=False, dtype=self.dtype, name="to_q") + self.key = nn.Dense(inner_dim, use_bias=False, dtype=self.dtype, name="to_k") + self.value = nn.Dense(inner_dim, use_bias=False, dtype=self.dtype, name="to_v") + + self.proj_attn = nn.Dense(self.query_dim, dtype=self.dtype, name="to_out_0") + self.dropout_layer = nn.Dropout(rate=self.dropout) + + def reshape_heads_to_batch_dim(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape(batch_size, seq_len, head_size, dim // head_size) + tensor = jnp.transpose(tensor, (0, 2, 1, 3)) + tensor = tensor.reshape(batch_size * head_size, seq_len, dim // head_size) + return tensor + + def reshape_batch_dim_to_heads(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape(batch_size // head_size, head_size, seq_len, dim) + tensor = jnp.transpose(tensor, (0, 2, 1, 3)) + tensor = tensor.reshape(batch_size // head_size, seq_len, dim * head_size) + return tensor + + def __call__(self, hidden_states, context=None, deterministic=True): + context = hidden_states if context is None else context + + query_proj = self.query(hidden_states) + key_proj = self.key(context) + value_proj = self.value(context) + + if self.split_head_dim: + b = hidden_states.shape[0] + query_states = jnp.reshape(query_proj, (b, -1, self.heads, self.dim_head)) + key_states = jnp.reshape(key_proj, (b, -1, self.heads, self.dim_head)) + value_states = jnp.reshape(value_proj, (b, -1, self.heads, self.dim_head)) + else: + query_states = self.reshape_heads_to_batch_dim(query_proj) + key_states = self.reshape_heads_to_batch_dim(key_proj) + value_states = self.reshape_heads_to_batch_dim(value_proj) + + if self.use_memory_efficient_attention: + query_states = query_states.transpose(1, 0, 2) + key_states = key_states.transpose(1, 0, 2) + value_states = value_states.transpose(1, 0, 2) + + # this if statement create a chunk size for each layer of the unet + # the chunk size is equal to the query_length dimension of the deepest layer of the unet + + flatten_latent_dim = query_states.shape[-3] + if flatten_latent_dim % 64 == 0: + query_chunk_size = int(flatten_latent_dim / 64) + elif flatten_latent_dim % 16 == 0: + query_chunk_size = int(flatten_latent_dim / 16) + elif flatten_latent_dim % 4 == 0: + query_chunk_size = int(flatten_latent_dim / 4) + else: + query_chunk_size = int(flatten_latent_dim) + + hidden_states = jax_memory_efficient_attention( + query_states, key_states, value_states, query_chunk_size=query_chunk_size, key_chunk_size=4096 * 4 + ) + hidden_states = hidden_states.transpose(1, 0, 2) + hidden_states = self.reshape_batch_dim_to_heads(hidden_states) + else: + # compute attentions + if self.split_head_dim: + attention_scores = jnp.einsum("b t n h, b f n h -> b n f t", key_states, query_states) + else: + attention_scores = jnp.einsum("b i d, b j d->b i j", query_states, key_states) + + attention_scores = attention_scores * self.scale + attention_probs = nn.softmax(attention_scores, axis=-1 if self.split_head_dim else 2) + + # attend to values + if self.split_head_dim: + hidden_states = jnp.einsum("b n f t, b t n h -> b f n h", attention_probs, value_states) + b = hidden_states.shape[0] + hidden_states = jnp.reshape(hidden_states, (b, -1, self.heads * self.dim_head)) + else: + hidden_states = jnp.einsum("b i j, b j d -> b i d", attention_probs, value_states) + hidden_states = self.reshape_batch_dim_to_heads(hidden_states) + + hidden_states = self.proj_attn(hidden_states) + return self.dropout_layer(hidden_states, deterministic=deterministic) + + +class FlaxBasicTransformerBlock(nn.Module): + r""" + A Flax transformer block layer with `GLU` (Gated Linear Unit) activation function as described in: + https://huggingface.co/papers/1706.03762 + + + Parameters: + dim (:obj:`int`): + Inner hidden states dimension + n_heads (:obj:`int`): + Number of heads + d_head (:obj:`int`): + Hidden states dimension inside each head + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + only_cross_attention (`bool`, defaults to `False`): + Whether to only apply cross attention. + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + use_memory_efficient_attention (`bool`, *optional*, defaults to `False`): + enable memory efficient attention https://huggingface.co/papers/2112.05682 + split_head_dim (`bool`, *optional*, defaults to `False`): + Whether to split the head dimension into a new axis for the self-attention computation. In most cases, + enabling this flag should speed up the computation for Stable Diffusion 2.x and Stable Diffusion XL. + """ + + dim: int + n_heads: int + d_head: int + dropout: float = 0.0 + only_cross_attention: bool = False + dtype: jnp.dtype = jnp.float32 + use_memory_efficient_attention: bool = False + split_head_dim: bool = False + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + # self attention (or cross_attention if only_cross_attention is True) + self.attn1 = FlaxAttention( + self.dim, + self.n_heads, + self.d_head, + self.dropout, + self.use_memory_efficient_attention, + self.split_head_dim, + dtype=self.dtype, + ) + # cross attention + self.attn2 = FlaxAttention( + self.dim, + self.n_heads, + self.d_head, + self.dropout, + self.use_memory_efficient_attention, + self.split_head_dim, + dtype=self.dtype, + ) + self.ff = FlaxFeedForward(dim=self.dim, dropout=self.dropout, dtype=self.dtype) + self.norm1 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype) + self.norm2 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype) + self.norm3 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype) + self.dropout_layer = nn.Dropout(rate=self.dropout) + + def __call__(self, hidden_states, context, deterministic=True): + # self attention + residual = hidden_states + if self.only_cross_attention: + hidden_states = self.attn1(self.norm1(hidden_states), context, deterministic=deterministic) + else: + hidden_states = self.attn1(self.norm1(hidden_states), deterministic=deterministic) + hidden_states = hidden_states + residual + + # cross attention + residual = hidden_states + hidden_states = self.attn2(self.norm2(hidden_states), context, deterministic=deterministic) + hidden_states = hidden_states + residual + + # feed forward + residual = hidden_states + hidden_states = self.ff(self.norm3(hidden_states), deterministic=deterministic) + hidden_states = hidden_states + residual + + return self.dropout_layer(hidden_states, deterministic=deterministic) + + +class FlaxTransformer2DModel(nn.Module): + r""" + A Spatial Transformer layer with Gated Linear Unit (GLU) activation function as described in: + https://huggingface.co/papers/1506.02025 + + + Parameters: + in_channels (:obj:`int`): + Input number of channels + n_heads (:obj:`int`): + Number of heads + d_head (:obj:`int`): + Hidden states dimension inside each head + depth (:obj:`int`, *optional*, defaults to 1): + Number of transformers block + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + use_linear_projection (`bool`, defaults to `False`): tbd + only_cross_attention (`bool`, defaults to `False`): tbd + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + use_memory_efficient_attention (`bool`, *optional*, defaults to `False`): + enable memory efficient attention https://huggingface.co/papers/2112.05682 + split_head_dim (`bool`, *optional*, defaults to `False`): + Whether to split the head dimension into a new axis for the self-attention computation. In most cases, + enabling this flag should speed up the computation for Stable Diffusion 2.x and Stable Diffusion XL. + """ + + in_channels: int + n_heads: int + d_head: int + depth: int = 1 + dropout: float = 0.0 + use_linear_projection: bool = False + only_cross_attention: bool = False + dtype: jnp.dtype = jnp.float32 + use_memory_efficient_attention: bool = False + split_head_dim: bool = False + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + self.norm = nn.GroupNorm(num_groups=32, epsilon=1e-5) + + inner_dim = self.n_heads * self.d_head + if self.use_linear_projection: + self.proj_in = nn.Dense(inner_dim, dtype=self.dtype) + else: + self.proj_in = nn.Conv( + inner_dim, + kernel_size=(1, 1), + strides=(1, 1), + padding="VALID", + dtype=self.dtype, + ) + + self.transformer_blocks = [ + FlaxBasicTransformerBlock( + inner_dim, + self.n_heads, + self.d_head, + dropout=self.dropout, + only_cross_attention=self.only_cross_attention, + dtype=self.dtype, + use_memory_efficient_attention=self.use_memory_efficient_attention, + split_head_dim=self.split_head_dim, + ) + for _ in range(self.depth) + ] + + if self.use_linear_projection: + self.proj_out = nn.Dense(inner_dim, dtype=self.dtype) + else: + self.proj_out = nn.Conv( + inner_dim, + kernel_size=(1, 1), + strides=(1, 1), + padding="VALID", + dtype=self.dtype, + ) + + self.dropout_layer = nn.Dropout(rate=self.dropout) + + def __call__(self, hidden_states, context, deterministic=True): + batch, height, width, channels = hidden_states.shape + residual = hidden_states + hidden_states = self.norm(hidden_states) + if self.use_linear_projection: + hidden_states = hidden_states.reshape(batch, height * width, channels) + hidden_states = self.proj_in(hidden_states) + else: + hidden_states = self.proj_in(hidden_states) + hidden_states = hidden_states.reshape(batch, height * width, channels) + + for transformer_block in self.transformer_blocks: + hidden_states = transformer_block(hidden_states, context, deterministic=deterministic) + + if self.use_linear_projection: + hidden_states = self.proj_out(hidden_states) + hidden_states = hidden_states.reshape(batch, height, width, channels) + else: + hidden_states = hidden_states.reshape(batch, height, width, channels) + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states + residual + return self.dropout_layer(hidden_states, deterministic=deterministic) + + +class FlaxFeedForward(nn.Module): + r""" + Flax module that encapsulates two Linear layers separated by a non-linearity. It is the counterpart of PyTorch's + [`FeedForward`] class, with the following simplifications: + - The activation function is currently hardcoded to a gated linear unit from: + https://huggingface.co/papers/2002.05202 + - `dim_out` is equal to `dim`. + - The number of hidden dimensions is hardcoded to `dim * 4` in [`FlaxGELU`]. + + Parameters: + dim (:obj:`int`): + Inner hidden states dimension + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + dim: int + dropout: float = 0.0 + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + # The second linear layer needs to be called + # net_2 for now to match the index of the Sequential layer + self.net_0 = FlaxGEGLU(self.dim, self.dropout, self.dtype) + self.net_2 = nn.Dense(self.dim, dtype=self.dtype) + + def __call__(self, hidden_states, deterministic=True): + hidden_states = self.net_0(hidden_states, deterministic=deterministic) + hidden_states = self.net_2(hidden_states) + return hidden_states + + +class FlaxGEGLU(nn.Module): + r""" + Flax implementation of a Linear layer followed by the variant of the gated linear unit activation function from + https://huggingface.co/papers/2002.05202. + + Parameters: + dim (:obj:`int`): + Input hidden states dimension + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + dim: int + dropout: float = 0.0 + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + inner_dim = self.dim * 4 + self.proj = nn.Dense(inner_dim * 2, dtype=self.dtype) + self.dropout_layer = nn.Dropout(rate=self.dropout) + + def __call__(self, hidden_states, deterministic=True): + hidden_states = self.proj(hidden_states) + hidden_linear, hidden_gelu = jnp.split(hidden_states, 2, axis=2) + return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_processor.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..f83a1753dac554abdcd4cf43e50ca19e71f1adc9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/attention_processor.py @@ -0,0 +1,5679 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import inspect +import math +from typing import Callable + +import torch +import torch.nn.functional as F +from torch import nn + +from ..image_processor import IPAdapterMaskProcessor +from ..utils import deprecate, is_torch_xla_available, logging +from ..utils.import_utils import is_torch_npu_available, is_torch_xla_version, is_xformers_available +from ..utils.torch_utils import is_torch_version, maybe_allow_in_graph + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + +if is_torch_npu_available(): + import torch_npu + +if is_xformers_available(): + import xformers + import xformers.ops +else: + xformers = None + +if is_torch_xla_available(): + # flash attention pallas kernel is introduced in the torch_xla 2.3 release. + if is_torch_xla_version(">", "2.2"): + from torch_xla.experimental.custom_kernel import flash_attention + from torch_xla.runtime import is_spmd + XLA_AVAILABLE = True +else: + XLA_AVAILABLE = False + + +@maybe_allow_in_graph +class Attention(nn.Module): + r""" + A cross attention layer. + + Parameters: + query_dim (`int`): + The number of channels in the query. + cross_attention_dim (`int`, *optional*): + The number of channels in the encoder_hidden_states. If not given, defaults to `query_dim`. + heads (`int`, *optional*, defaults to 8): + The number of heads to use for multi-head attention. + kv_heads (`int`, *optional*, defaults to `None`): + The number of key and value heads to use for multi-head attention. Defaults to `heads`. If + `kv_heads=heads`, the model will use Multi Head Attention (MHA), if `kv_heads=1` the model will use Multi + Query Attention (MQA) otherwise GQA is used. + dim_head (`int`, *optional*, defaults to 64): + The number of channels in each head. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability to use. + bias (`bool`, *optional*, defaults to False): + Set to `True` for the query, key, and value linear layers to contain a bias parameter. + upcast_attention (`bool`, *optional*, defaults to False): + Set to `True` to upcast the attention computation to `float32`. + upcast_softmax (`bool`, *optional*, defaults to False): + Set to `True` to upcast the softmax computation to `float32`. + cross_attention_norm (`str`, *optional*, defaults to `None`): + The type of normalization to use for the cross attention. Can be `None`, `layer_norm`, or `group_norm`. + cross_attention_norm_num_groups (`int`, *optional*, defaults to 32): + The number of groups to use for the group norm in the cross attention. + added_kv_proj_dim (`int`, *optional*, defaults to `None`): + The number of channels to use for the added key and value projections. If `None`, no projection is used. + norm_num_groups (`int`, *optional*, defaults to `None`): + The number of groups to use for the group norm in the attention. + spatial_norm_dim (`int`, *optional*, defaults to `None`): + The number of channels to use for the spatial normalization. + out_bias (`bool`, *optional*, defaults to `True`): + Set to `True` to use a bias in the output linear layer. + scale_qk (`bool`, *optional*, defaults to `True`): + Set to `True` to scale the query and key by `1 / sqrt(dim_head)`. + only_cross_attention (`bool`, *optional*, defaults to `False`): + Set to `True` to only use cross attention and not added_kv_proj_dim. Can only be set to `True` if + `added_kv_proj_dim` is not `None`. + eps (`float`, *optional*, defaults to 1e-5): + An additional value added to the denominator in group normalization that is used for numerical stability. + rescale_output_factor (`float`, *optional*, defaults to 1.0): + A factor to rescale the output by dividing it with this value. + residual_connection (`bool`, *optional*, defaults to `False`): + Set to `True` to add the residual connection to the output. + _from_deprecated_attn_block (`bool`, *optional*, defaults to `False`): + Set to `True` if the attention block is loaded from a deprecated state dict. + processor (`AttnProcessor`, *optional*, defaults to `None`): + The attention processor to use. If `None`, defaults to `AttnProcessor2_0` if `torch 2.x` is used and + `AttnProcessor` otherwise. + """ + + def __init__( + self, + query_dim: int, + cross_attention_dim: int | None = None, + heads: int = 8, + kv_heads: int | None = None, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + upcast_attention: bool = False, + upcast_softmax: bool = False, + cross_attention_norm: str | None = None, + cross_attention_norm_num_groups: int = 32, + qk_norm: str | None = None, + added_kv_proj_dim: int | None = None, + added_proj_bias: bool | None = True, + norm_num_groups: int | None = None, + spatial_norm_dim: int | None = None, + out_bias: bool = True, + scale_qk: bool = True, + only_cross_attention: bool = False, + eps: float = 1e-5, + rescale_output_factor: float = 1.0, + residual_connection: bool = False, + _from_deprecated_attn_block: bool = False, + processor: "AttnProcessor" | None = None, + out_dim: int = None, + out_context_dim: int = None, + context_pre_only=None, + pre_only=False, + elementwise_affine: bool = True, + is_causal: bool = False, + ): + super().__init__() + + # To prevent circular import. + from .normalization import FP32LayerNorm, LpNorm, RMSNorm + + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.inner_kv_dim = self.inner_dim if kv_heads is None else dim_head * kv_heads + self.query_dim = query_dim + self.use_bias = bias + self.is_cross_attention = cross_attention_dim is not None + self.cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim + self.upcast_attention = upcast_attention + self.upcast_softmax = upcast_softmax + self.rescale_output_factor = rescale_output_factor + self.residual_connection = residual_connection + self.dropout = dropout + self.fused_projections = False + self.out_dim = out_dim if out_dim is not None else query_dim + self.out_context_dim = out_context_dim if out_context_dim is not None else query_dim + self.context_pre_only = context_pre_only + self.pre_only = pre_only + self.is_causal = is_causal + + # we make use of this private variable to know whether this class is loaded + # with an deprecated state dict so that we can convert it on the fly + self._from_deprecated_attn_block = _from_deprecated_attn_block + + self.scale_qk = scale_qk + self.scale = dim_head**-0.5 if self.scale_qk else 1.0 + + self.heads = out_dim // dim_head if out_dim is not None else heads + # for slice_size > 0 the attention score computation + # is split across the batch axis to save memory + # You can set slice_size with `set_attention_slice` + self.sliceable_head_dim = heads + + self.added_kv_proj_dim = added_kv_proj_dim + self.only_cross_attention = only_cross_attention + + if self.added_kv_proj_dim is None and self.only_cross_attention: + raise ValueError( + "`only_cross_attention` can only be set to True if `added_kv_proj_dim` is not None. Make sure to set either `only_cross_attention=False` or define `added_kv_proj_dim`." + ) + + if norm_num_groups is not None: + self.group_norm = nn.GroupNorm(num_channels=query_dim, num_groups=norm_num_groups, eps=eps, affine=True) + else: + self.group_norm = None + + if spatial_norm_dim is not None: + self.spatial_norm = SpatialNorm(f_channels=query_dim, zq_channels=spatial_norm_dim) + else: + self.spatial_norm = None + + if qk_norm is None: + self.norm_q = None + self.norm_k = None + elif qk_norm == "layer_norm": + self.norm_q = nn.LayerNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = nn.LayerNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + elif qk_norm == "fp32_layer_norm": + self.norm_q = FP32LayerNorm(dim_head, elementwise_affine=False, bias=False, eps=eps) + self.norm_k = FP32LayerNorm(dim_head, elementwise_affine=False, bias=False, eps=eps) + elif qk_norm == "layer_norm_across_heads": + # Lumina applies qk norm across all heads + self.norm_q = nn.LayerNorm(dim_head * heads, eps=eps) + self.norm_k = nn.LayerNorm(dim_head * kv_heads, eps=eps) + elif qk_norm == "rms_norm": + self.norm_q = RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + elif qk_norm == "rms_norm_across_heads": + # LTX applies qk norm across all heads + self.norm_q = RMSNorm(dim_head * heads, eps=eps) + self.norm_k = RMSNorm(dim_head * kv_heads, eps=eps) + elif qk_norm == "l2": + self.norm_q = LpNorm(p=2, dim=-1, eps=eps) + self.norm_k = LpNorm(p=2, dim=-1, eps=eps) + else: + raise ValueError( + f"unknown qk_norm: {qk_norm}. Should be one of None, 'layer_norm', 'fp32_layer_norm', 'layer_norm_across_heads', 'rms_norm', 'rms_norm_across_heads', 'l2'." + ) + + if cross_attention_norm is None: + self.norm_cross = None + elif cross_attention_norm == "layer_norm": + self.norm_cross = nn.LayerNorm(self.cross_attention_dim) + elif cross_attention_norm == "group_norm": + if self.added_kv_proj_dim is not None: + # The given `encoder_hidden_states` are initially of shape + # (batch_size, seq_len, added_kv_proj_dim) before being projected + # to (batch_size, seq_len, cross_attention_dim). The norm is applied + # before the projection, so we need to use `added_kv_proj_dim` as + # the number of channels for the group norm. + norm_cross_num_channels = added_kv_proj_dim + else: + norm_cross_num_channels = self.cross_attention_dim + + self.norm_cross = nn.GroupNorm( + num_channels=norm_cross_num_channels, num_groups=cross_attention_norm_num_groups, eps=1e-5, affine=True + ) + else: + raise ValueError( + f"unknown cross_attention_norm: {cross_attention_norm}. Should be None, 'layer_norm' or 'group_norm'" + ) + + self.to_q = nn.Linear(query_dim, self.inner_dim, bias=bias) + + if not self.only_cross_attention: + # only relevant for the `AddedKVProcessor` classes + self.to_k = nn.Linear(self.cross_attention_dim, self.inner_kv_dim, bias=bias) + self.to_v = nn.Linear(self.cross_attention_dim, self.inner_kv_dim, bias=bias) + else: + self.to_k = None + self.to_v = None + + self.added_proj_bias = added_proj_bias + if self.added_kv_proj_dim is not None: + self.add_k_proj = nn.Linear(added_kv_proj_dim, self.inner_kv_dim, bias=added_proj_bias) + self.add_v_proj = nn.Linear(added_kv_proj_dim, self.inner_kv_dim, bias=added_proj_bias) + if self.context_pre_only is not None: + self.add_q_proj = nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + else: + self.add_q_proj = None + self.add_k_proj = None + self.add_v_proj = None + + if not self.pre_only: + self.to_out = nn.ModuleList([]) + self.to_out.append(nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(nn.Dropout(dropout)) + else: + self.to_out = None + + if self.context_pre_only is not None and not self.context_pre_only: + self.to_add_out = nn.Linear(self.inner_dim, self.out_context_dim, bias=out_bias) + else: + self.to_add_out = None + + if qk_norm is not None and added_kv_proj_dim is not None: + if qk_norm == "layer_norm": + self.norm_added_q = nn.LayerNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_added_k = nn.LayerNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + elif qk_norm == "fp32_layer_norm": + self.norm_added_q = FP32LayerNorm(dim_head, elementwise_affine=False, bias=False, eps=eps) + self.norm_added_k = FP32LayerNorm(dim_head, elementwise_affine=False, bias=False, eps=eps) + elif qk_norm == "rms_norm": + self.norm_added_q = RMSNorm(dim_head, eps=eps) + self.norm_added_k = RMSNorm(dim_head, eps=eps) + elif qk_norm == "rms_norm_across_heads": + # Wan applies qk norm across all heads + # Wan also doesn't apply a q norm + self.norm_added_q = None + self.norm_added_k = RMSNorm(dim_head * kv_heads, eps=eps) + else: + raise ValueError( + f"unknown qk_norm: {qk_norm}. Should be one of `None,'layer_norm','fp32_layer_norm','rms_norm'`" + ) + else: + self.norm_added_q = None + self.norm_added_k = None + + # set attention processor + # We use the AttnProcessor2_0 by default when torch 2.x is used which uses + # torch.nn.functional.scaled_dot_product_attention for native Flash/memory_efficient_attention + # but only if it has the default `scale` argument. TODO remove scale_qk check when we move to torch 2.1 + if processor is None: + processor = ( + AttnProcessor2_0() if hasattr(F, "scaled_dot_product_attention") and self.scale_qk else AttnProcessor() + ) + self.set_processor(processor) + + def set_use_xla_flash_attention( + self, + use_xla_flash_attention: bool, + partition_spec: tuple[str | None, ...] | None = None, + is_flux=False, + ) -> None: + r""" + Set whether to use xla flash attention from `torch_xla` or not. + + Args: + use_xla_flash_attention (`bool`): + Whether to use pallas flash attention kernel from `torch_xla` or not. + partition_spec (`tuple[]`, *optional*): + Specify the partition specification if using SPMD. Otherwise None. + """ + if use_xla_flash_attention: + if not is_torch_xla_available: + raise "torch_xla is not available" + elif is_torch_xla_version("<", "2.3"): + raise "flash attention pallas kernel is supported from torch_xla version 2.3" + elif is_spmd() and is_torch_xla_version("<", "2.4"): + raise "flash attention pallas kernel using SPMD is supported from torch_xla version 2.4" + else: + if is_flux: + processor = XLAFluxFlashAttnProcessor2_0(partition_spec) + else: + processor = XLAFlashAttnProcessor2_0(partition_spec) + else: + processor = ( + AttnProcessor2_0() if hasattr(F, "scaled_dot_product_attention") and self.scale_qk else AttnProcessor() + ) + self.set_processor(processor) + + def set_use_npu_flash_attention(self, use_npu_flash_attention: bool) -> None: + r""" + Set whether to use npu flash attention from `torch_npu` or not. + + """ + if use_npu_flash_attention: + processor = AttnProcessorNPU() + else: + # set attention processor + # We use the AttnProcessor2_0 by default when torch 2.x is used which uses + # torch.nn.functional.scaled_dot_product_attention for native Flash/memory_efficient_attention + # but only if it has the default `scale` argument. TODO remove scale_qk check when we move to torch 2.1 + processor = ( + AttnProcessor2_0() if hasattr(F, "scaled_dot_product_attention") and self.scale_qk else AttnProcessor() + ) + self.set_processor(processor) + + def set_use_memory_efficient_attention_xformers( + self, use_memory_efficient_attention_xformers: bool, attention_op: Callable | None = None + ) -> None: + r""" + Set whether to use memory efficient attention from `xformers` or not. + + Args: + use_memory_efficient_attention_xformers (`bool`): + Whether to use memory efficient attention from `xformers` or not. + attention_op (`Callable`, *optional*): + The attention operation to use. Defaults to `None` which uses the default attention operation from + `xformers`. + """ + is_custom_diffusion = hasattr(self, "processor") and isinstance( + self.processor, + (CustomDiffusionAttnProcessor, CustomDiffusionXFormersAttnProcessor, CustomDiffusionAttnProcessor2_0), + ) + is_added_kv_processor = hasattr(self, "processor") and isinstance( + self.processor, + ( + AttnAddedKVProcessor, + AttnAddedKVProcessor2_0, + SlicedAttnAddedKVProcessor, + XFormersAttnAddedKVProcessor, + ), + ) + is_ip_adapter = hasattr(self, "processor") and isinstance( + self.processor, + (IPAdapterAttnProcessor, IPAdapterAttnProcessor2_0, IPAdapterXFormersAttnProcessor), + ) + is_joint_processor = hasattr(self, "processor") and isinstance( + self.processor, + ( + JointAttnProcessor2_0, + XFormersJointAttnProcessor, + ), + ) + + if use_memory_efficient_attention_xformers: + if is_added_kv_processor and is_custom_diffusion: + raise NotImplementedError( + f"Memory efficient attention is currently not supported for custom diffusion for attention processor type {self.processor}" + ) + if not is_xformers_available(): + raise ModuleNotFoundError( + ( + "Refer to https://github.com/facebookresearch/xformers for more information on how to install" + " xformers" + ), + name="xformers", + ) + elif not torch.cuda.is_available(): + raise ValueError( + "torch.cuda.is_available() should be True but is False. xformers' memory efficient attention is" + " only available for GPU " + ) + else: + try: + # Make sure we can run the memory efficient attention + dtype = None + if attention_op is not None: + op_fw, op_bw = attention_op + dtype, *_ = op_fw.SUPPORTED_DTYPES + q = torch.randn((1, 2, 40), device="cuda", dtype=dtype) + _ = xformers.ops.memory_efficient_attention(q, q, q) + except Exception as e: + raise e + + if is_custom_diffusion: + processor = CustomDiffusionXFormersAttnProcessor( + train_kv=self.processor.train_kv, + train_q_out=self.processor.train_q_out, + hidden_size=self.processor.hidden_size, + cross_attention_dim=self.processor.cross_attention_dim, + attention_op=attention_op, + ) + processor.load_state_dict(self.processor.state_dict()) + if hasattr(self.processor, "to_k_custom_diffusion"): + processor.to(self.processor.to_k_custom_diffusion.weight.device) + elif is_added_kv_processor: + # TODO(Patrick, Suraj, William) - currently xformers doesn't work for UnCLIP + # which uses this type of cross attention ONLY because the attention mask of format + # [0, ..., -10.000, ..., 0, ...,] is not supported + # throw warning + logger.info( + "Memory efficient attention with `xformers` might currently not work correctly if an attention mask is required for the attention operation." + ) + processor = XFormersAttnAddedKVProcessor(attention_op=attention_op) + elif is_ip_adapter: + processor = IPAdapterXFormersAttnProcessor( + hidden_size=self.processor.hidden_size, + cross_attention_dim=self.processor.cross_attention_dim, + num_tokens=self.processor.num_tokens, + scale=self.processor.scale, + attention_op=attention_op, + ) + processor.load_state_dict(self.processor.state_dict()) + if hasattr(self.processor, "to_k_ip"): + processor.to( + device=self.processor.to_k_ip[0].weight.device, dtype=self.processor.to_k_ip[0].weight.dtype + ) + elif is_joint_processor: + processor = XFormersJointAttnProcessor(attention_op=attention_op) + else: + processor = XFormersAttnProcessor(attention_op=attention_op) + else: + if is_custom_diffusion: + attn_processor_class = ( + CustomDiffusionAttnProcessor2_0 + if hasattr(F, "scaled_dot_product_attention") + else CustomDiffusionAttnProcessor + ) + processor = attn_processor_class( + train_kv=self.processor.train_kv, + train_q_out=self.processor.train_q_out, + hidden_size=self.processor.hidden_size, + cross_attention_dim=self.processor.cross_attention_dim, + ) + processor.load_state_dict(self.processor.state_dict()) + if hasattr(self.processor, "to_k_custom_diffusion"): + processor.to(self.processor.to_k_custom_diffusion.weight.device) + elif is_ip_adapter: + processor = IPAdapterAttnProcessor2_0( + hidden_size=self.processor.hidden_size, + cross_attention_dim=self.processor.cross_attention_dim, + num_tokens=self.processor.num_tokens, + scale=self.processor.scale, + ) + processor.load_state_dict(self.processor.state_dict()) + if hasattr(self.processor, "to_k_ip"): + processor.to( + device=self.processor.to_k_ip[0].weight.device, dtype=self.processor.to_k_ip[0].weight.dtype + ) + else: + # set attention processor + # We use the AttnProcessor2_0 by default when torch 2.x is used which uses + # torch.nn.functional.scaled_dot_product_attention for native Flash/memory_efficient_attention + # but only if it has the default `scale` argument. TODO remove scale_qk check when we move to torch 2.1 + processor = ( + AttnProcessor2_0() + if hasattr(F, "scaled_dot_product_attention") and self.scale_qk + else AttnProcessor() + ) + + self.set_processor(processor) + + def set_attention_slice(self, slice_size: int) -> None: + r""" + Set the slice size for attention computation. + + Args: + slice_size (`int`): + The slice size for attention computation. + """ + if slice_size is not None and slice_size > self.sliceable_head_dim: + raise ValueError(f"slice_size {slice_size} has to be smaller or equal to {self.sliceable_head_dim}.") + + if slice_size is not None and self.added_kv_proj_dim is not None: + processor = SlicedAttnAddedKVProcessor(slice_size) + elif slice_size is not None: + processor = SlicedAttnProcessor(slice_size) + elif self.added_kv_proj_dim is not None: + processor = AttnAddedKVProcessor() + else: + # set attention processor + # We use the AttnProcessor2_0 by default when torch 2.x is used which uses + # torch.nn.functional.scaled_dot_product_attention for native Flash/memory_efficient_attention + # but only if it has the default `scale` argument. TODO remove scale_qk check when we move to torch 2.1 + processor = ( + AttnProcessor2_0() if hasattr(F, "scaled_dot_product_attention") and self.scale_qk else AttnProcessor() + ) + + self.set_processor(processor) + + def set_processor(self, processor: "AttnProcessor") -> None: + r""" + Set the attention processor to use. + + Args: + processor (`AttnProcessor`): + The attention processor to use. + """ + # if current processor is in `self._modules` and if passed `processor` is not, we need to + # pop `processor` from `self._modules` + if ( + hasattr(self, "processor") + and isinstance(self.processor, torch.nn.Module) + and not isinstance(processor, torch.nn.Module) + ): + logger.info(f"You are removing possibly trained weights of {self.processor} with {processor}") + self._modules.pop("processor") + + self.processor = processor + + def get_processor(self, return_deprecated_lora: bool = False) -> "AttentionProcessor": + r""" + Get the attention processor in use. + + Args: + return_deprecated_lora (`bool`, *optional*, defaults to `False`): + Set to `True` to return the deprecated LoRA attention processor. + + Returns: + "AttentionProcessor": The attention processor in use. + """ + if not return_deprecated_lora: + return self.processor + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + **cross_attention_kwargs, + ) -> torch.Tensor: + r""" + The forward method of the `Attention` class. + + Args: + hidden_states (`torch.Tensor`): + The hidden states of the query. + encoder_hidden_states (`torch.Tensor`, *optional*): + The hidden states of the encoder. + attention_mask (`torch.Tensor`, *optional*): + The attention mask to use. If `None`, no mask is applied. + **cross_attention_kwargs: + Additional keyword arguments to pass along to the cross attention. + + Returns: + `torch.Tensor`: The output of the attention layer. + """ + # The `Attention` class can call different attention processors / attention functions + # here we simply pass along all tensors to the selected processor class + # For standard processors that are defined here, `**cross_attention_kwargs` is empty + + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + quiet_attn_parameters = {"ip_adapter_masks", "ip_hidden_states"} + unused_kwargs = [ + k for k, _ in cross_attention_kwargs.items() if k not in attn_parameters and k not in quiet_attn_parameters + ] + if len(unused_kwargs) > 0: + logger.warning( + f"cross_attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + cross_attention_kwargs = {k: w for k, w in cross_attention_kwargs.items() if k in attn_parameters} + + return self.processor( + self, + hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + **cross_attention_kwargs, + ) + + def batch_to_head_dim(self, tensor: torch.Tensor) -> torch.Tensor: + r""" + Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size // heads, seq_len, dim * heads]`. `heads` + is the number of heads initialized while constructing the `Attention` class. + + Args: + tensor (`torch.Tensor`): The tensor to reshape. + + Returns: + `torch.Tensor`: The reshaped tensor. + """ + head_size = self.heads + batch_size, seq_len, dim = tensor.shape + tensor = tensor.reshape(batch_size // head_size, head_size, seq_len, dim) + tensor = tensor.permute(0, 2, 1, 3).reshape(batch_size // head_size, seq_len, dim * head_size) + return tensor + + def head_to_batch_dim(self, tensor: torch.Tensor, out_dim: int = 3) -> torch.Tensor: + r""" + Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size, seq_len, heads, dim // heads]` `heads` is + the number of heads initialized while constructing the `Attention` class. + + Args: + tensor (`torch.Tensor`): The tensor to reshape. + out_dim (`int`, *optional*, defaults to `3`): The output dimension of the tensor. If `3`, the tensor is + reshaped to `[batch_size * heads, seq_len, dim // heads]`. + + Returns: + `torch.Tensor`: The reshaped tensor. + """ + head_size = self.heads + if tensor.ndim == 3: + batch_size, seq_len, dim = tensor.shape + extra_dim = 1 + else: + batch_size, extra_dim, seq_len, dim = tensor.shape + tensor = tensor.reshape(batch_size, seq_len * extra_dim, head_size, dim // head_size) + tensor = tensor.permute(0, 2, 1, 3) + + if out_dim == 3: + tensor = tensor.reshape(batch_size * head_size, seq_len * extra_dim, dim // head_size) + + return tensor + + def get_attention_scores( + self, query: torch.Tensor, key: torch.Tensor, attention_mask: torch.Tensor | None = None + ) -> torch.Tensor: + r""" + Compute the attention scores. + + Args: + query (`torch.Tensor`): The query tensor. + key (`torch.Tensor`): The key tensor. + attention_mask (`torch.Tensor`, *optional*): The attention mask to use. If `None`, no mask is applied. + + Returns: + `torch.Tensor`: The attention probabilities/scores. + """ + dtype = query.dtype + if self.upcast_attention: + query = query.float() + key = key.float() + + if attention_mask is None: + baddbmm_input = torch.empty( + query.shape[0], query.shape[1], key.shape[1], dtype=query.dtype, device=query.device + ) + beta = 0 + else: + baddbmm_input = attention_mask + beta = 1 + + attention_scores = torch.baddbmm( + baddbmm_input, + query, + key.transpose(-1, -2), + beta=beta, + alpha=self.scale, + ) + del baddbmm_input + + if self.upcast_softmax: + attention_scores = attention_scores.float() + + attention_probs = attention_scores.softmax(dim=-1) + del attention_scores + + attention_probs = attention_probs.to(dtype) + + return attention_probs + + def prepare_attention_mask( + self, attention_mask: torch.Tensor, target_length: int, batch_size: int, out_dim: int = 3 + ) -> torch.Tensor: + r""" + Prepare the attention mask for the attention computation. + + Args: + attention_mask (`torch.Tensor`): + The attention mask to prepare. + target_length (`int`): + The target length of the attention mask. This is the length of the attention mask after padding. + batch_size (`int`): + The batch size, which is used to repeat the attention mask. + out_dim (`int`, *optional*, defaults to `3`): + The output dimension of the attention mask. Can be either `3` or `4`. + + Returns: + `torch.Tensor`: The prepared attention mask. + """ + head_size = self.heads + if attention_mask is None: + return attention_mask + + current_length: int = attention_mask.shape[-1] + if current_length != target_length: + if attention_mask.device.type == "mps": + # HACK: MPS: Does not support padding by greater than dimension of input tensor. + # Instead, we can manually construct the padding tensor. + padding_shape = (attention_mask.shape[0], attention_mask.shape[1], target_length) + padding = torch.zeros(padding_shape, dtype=attention_mask.dtype, device=attention_mask.device) + attention_mask = torch.cat([attention_mask, padding], dim=2) + else: + # TODO: for pipelines such as stable-diffusion, padding cross-attn mask: + # we want to instead pad by (0, remaining_length), where remaining_length is: + # remaining_length: int = target_length - current_length + # TODO: re-enable tests/models/test_models_unet_2d_condition.py#test_model_xattn_padding + attention_mask = F.pad(attention_mask, (0, target_length), value=0.0) + + if out_dim == 3: + if attention_mask.shape[0] < batch_size * head_size: + attention_mask = attention_mask.repeat_interleave( + head_size, dim=0, output_size=attention_mask.shape[0] * head_size + ) + elif out_dim == 4: + attention_mask = attention_mask.unsqueeze(1) + attention_mask = attention_mask.repeat_interleave( + head_size, dim=1, output_size=attention_mask.shape[1] * head_size + ) + + return attention_mask + + def norm_encoder_hidden_states(self, encoder_hidden_states: torch.Tensor) -> torch.Tensor: + r""" + Normalize the encoder hidden states. Requires `self.norm_cross` to be specified when constructing the + `Attention` class. + + Args: + encoder_hidden_states (`torch.Tensor`): Hidden states of the encoder. + + Returns: + `torch.Tensor`: The normalized encoder hidden states. + """ + assert self.norm_cross is not None, "self.norm_cross must be defined to call self.norm_encoder_hidden_states" + + if isinstance(self.norm_cross, nn.LayerNorm): + encoder_hidden_states = self.norm_cross(encoder_hidden_states) + elif isinstance(self.norm_cross, nn.GroupNorm): + # Group norm norms along the channels dimension and expects + # input to be in the shape of (N, C, *). In this case, we want + # to norm along the hidden dimension, so we need to move + # (batch_size, sequence_length, hidden_size) -> + # (batch_size, hidden_size, sequence_length) + encoder_hidden_states = encoder_hidden_states.transpose(1, 2) + encoder_hidden_states = self.norm_cross(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.transpose(1, 2) + else: + assert False + + return encoder_hidden_states + + @torch.no_grad() + def fuse_projections(self, fuse=True): + device = self.to_q.weight.data.device + dtype = self.to_q.weight.data.dtype + + if not self.is_cross_attention: + # fetch weight matrices. + concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data]) + in_features = concatenated_weights.shape[1] + out_features = concatenated_weights.shape[0] + + # create a new single projection layer and copy over the weights. + self.to_qkv = nn.Linear(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype) + self.to_qkv.weight.copy_(concatenated_weights) + if self.use_bias: + concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data]) + self.to_qkv.bias.copy_(concatenated_bias) + + else: + concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data]) + in_features = concatenated_weights.shape[1] + out_features = concatenated_weights.shape[0] + + self.to_kv = nn.Linear(in_features, out_features, bias=self.use_bias, device=device, dtype=dtype) + self.to_kv.weight.copy_(concatenated_weights) + if self.use_bias: + concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data]) + self.to_kv.bias.copy_(concatenated_bias) + + # handle added projections for SD3 and others. + if ( + getattr(self, "add_q_proj", None) is not None + and getattr(self, "add_k_proj", None) is not None + and getattr(self, "add_v_proj", None) is not None + ): + concatenated_weights = torch.cat( + [self.add_q_proj.weight.data, self.add_k_proj.weight.data, self.add_v_proj.weight.data] + ) + in_features = concatenated_weights.shape[1] + out_features = concatenated_weights.shape[0] + + self.to_added_qkv = nn.Linear( + in_features, out_features, bias=self.added_proj_bias, device=device, dtype=dtype + ) + self.to_added_qkv.weight.copy_(concatenated_weights) + if self.added_proj_bias: + concatenated_bias = torch.cat( + [self.add_q_proj.bias.data, self.add_k_proj.bias.data, self.add_v_proj.bias.data] + ) + self.to_added_qkv.bias.copy_(concatenated_bias) + + self.fused_projections = fuse + + +class SanaMultiscaleAttentionProjection(nn.Module): + def __init__( + self, + in_channels: int, + num_attention_heads: int, + kernel_size: int, + ) -> None: + super().__init__() + + channels = 3 * in_channels + self.proj_in = nn.Conv2d( + channels, + channels, + kernel_size, + padding=kernel_size // 2, + groups=channels, + bias=False, + ) + self.proj_out = nn.Conv2d(channels, channels, 1, 1, 0, groups=3 * num_attention_heads, bias=False) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.proj_in(hidden_states) + hidden_states = self.proj_out(hidden_states) + return hidden_states + + +class SanaMultiscaleLinearAttention(nn.Module): + r"""Lightweight multi-scale linear attention""" + + def __init__( + self, + in_channels: int, + out_channels: int, + num_attention_heads: int | None = None, + attention_head_dim: int = 8, + mult: float = 1.0, + norm_type: str = "batch_norm", + kernel_sizes: tuple[int, ...] = (5,), + eps: float = 1e-15, + residual_connection: bool = False, + ): + super().__init__() + + # To prevent circular import + from .normalization import get_normalization + + self.eps = eps + self.attention_head_dim = attention_head_dim + self.norm_type = norm_type + self.residual_connection = residual_connection + + num_attention_heads = ( + int(in_channels // attention_head_dim * mult) if num_attention_heads is None else num_attention_heads + ) + inner_dim = num_attention_heads * attention_head_dim + + self.to_q = nn.Linear(in_channels, inner_dim, bias=False) + self.to_k = nn.Linear(in_channels, inner_dim, bias=False) + self.to_v = nn.Linear(in_channels, inner_dim, bias=False) + + self.to_qkv_multiscale = nn.ModuleList() + for kernel_size in kernel_sizes: + self.to_qkv_multiscale.append( + SanaMultiscaleAttentionProjection(inner_dim, num_attention_heads, kernel_size) + ) + + self.nonlinearity = nn.ReLU() + self.to_out = nn.Linear(inner_dim * (1 + len(kernel_sizes)), out_channels, bias=False) + self.norm_out = get_normalization(norm_type, num_features=out_channels) + + self.processor = SanaMultiscaleAttnProcessor2_0() + + def apply_linear_attention(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> torch.Tensor: + value = F.pad(value, (0, 0, 0, 1), mode="constant", value=1) # Adds padding + scores = torch.matmul(value, key.transpose(-1, -2)) + hidden_states = torch.matmul(scores, query) + + hidden_states = hidden_states.to(dtype=torch.float32) + hidden_states = hidden_states[:, :, :-1] / (hidden_states[:, :, -1:] + self.eps) + return hidden_states + + def apply_quadratic_attention(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> torch.Tensor: + scores = torch.matmul(key.transpose(-1, -2), query) + scores = scores.to(dtype=torch.float32) + scores = scores / (torch.sum(scores, dim=2, keepdim=True) + self.eps) + hidden_states = torch.matmul(value, scores.to(value.dtype)) + return hidden_states + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + return self.processor(self, hidden_states) + + +class MochiAttention(nn.Module): + def __init__( + self, + query_dim: int, + added_kv_proj_dim: int, + processor: "MochiAttnProcessor2_0", + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + added_proj_bias: bool = True, + out_dim: int | None = None, + out_context_dim: int | None = None, + out_bias: bool = True, + context_pre_only: bool = False, + eps: float = 1e-5, + ): + super().__init__() + from .normalization import MochiRMSNorm + + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.out_dim = out_dim if out_dim is not None else query_dim + self.out_context_dim = out_context_dim if out_context_dim else query_dim + self.context_pre_only = context_pre_only + + self.heads = out_dim // dim_head if out_dim is not None else heads + + self.norm_q = MochiRMSNorm(dim_head, eps, True) + self.norm_k = MochiRMSNorm(dim_head, eps, True) + self.norm_added_q = MochiRMSNorm(dim_head, eps, True) + self.norm_added_k = MochiRMSNorm(dim_head, eps, True) + + self.to_q = nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_v = nn.Linear(query_dim, self.inner_dim, bias=bias) + + self.add_k_proj = nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_v_proj = nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + if self.context_pre_only is not None: + self.add_q_proj = nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + + self.to_out = nn.ModuleList([]) + self.to_out.append(nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(nn.Dropout(dropout)) + + if not self.context_pre_only: + self.to_add_out = nn.Linear(self.inner_dim, self.out_context_dim, bias=out_bias) + + self.processor = processor + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + **kwargs, + ): + return self.processor( + self, + hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + **kwargs, + ) + + +class MochiAttnProcessor2_0: + """Attention processor used in Mochi.""" + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("MochiAttnProcessor2_0 requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: "MochiAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + encoder_query = encoder_query.unflatten(2, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(2, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(2, (attn.heads, -1)) + + if attn.norm_added_q is not None: + encoder_query = attn.norm_added_q(encoder_query) + if attn.norm_added_k is not None: + encoder_key = attn.norm_added_k(encoder_key) + + if image_rotary_emb is not None: + + def apply_rotary_emb(x, freqs_cos, freqs_sin): + x_even = x[..., 0::2].float() + x_odd = x[..., 1::2].float() + + cos = (x_even * freqs_cos - x_odd * freqs_sin).to(x.dtype) + sin = (x_even * freqs_sin + x_odd * freqs_cos).to(x.dtype) + + return torch.stack([cos, sin], dim=-1).flatten(-2) + + query = apply_rotary_emb(query, *image_rotary_emb) + key = apply_rotary_emb(key, *image_rotary_emb) + + query, key, value = query.transpose(1, 2), key.transpose(1, 2), value.transpose(1, 2) + encoder_query, encoder_key, encoder_value = ( + encoder_query.transpose(1, 2), + encoder_key.transpose(1, 2), + encoder_value.transpose(1, 2), + ) + + sequence_length = query.size(2) + encoder_sequence_length = encoder_query.size(2) + total_length = sequence_length + encoder_sequence_length + + batch_size, heads, _, dim = query.shape + attn_outputs = [] + for idx in range(batch_size): + mask = attention_mask[idx][None, :] + valid_prompt_token_indices = torch.nonzero(mask.flatten(), as_tuple=False).flatten() + + valid_encoder_query = encoder_query[idx : idx + 1, :, valid_prompt_token_indices, :] + valid_encoder_key = encoder_key[idx : idx + 1, :, valid_prompt_token_indices, :] + valid_encoder_value = encoder_value[idx : idx + 1, :, valid_prompt_token_indices, :] + + valid_query = torch.cat([query[idx : idx + 1], valid_encoder_query], dim=2) + valid_key = torch.cat([key[idx : idx + 1], valid_encoder_key], dim=2) + valid_value = torch.cat([value[idx : idx + 1], valid_encoder_value], dim=2) + + attn_output = F.scaled_dot_product_attention( + valid_query, valid_key, valid_value, dropout_p=0.0, is_causal=False + ) + valid_sequence_length = attn_output.size(2) + attn_output = F.pad(attn_output, (0, 0, 0, total_length - valid_sequence_length)) + attn_outputs.append(attn_output) + + hidden_states = torch.cat(attn_outputs, dim=0) + hidden_states = hidden_states.transpose(1, 2).flatten(2, 3) + + hidden_states, encoder_hidden_states = hidden_states.split_with_sizes( + (sequence_length, encoder_sequence_length), dim=1 + ) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if hasattr(attn, "to_add_out"): + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + return hidden_states, encoder_hidden_states + + +class AttnProcessor: + r""" + Default processor for performing attention-related computations. + """ + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + residual = hidden_states + + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query = attn.head_to_batch_dim(query) + key = attn.head_to_batch_dim(key) + value = attn.head_to_batch_dim(value) + + attention_probs = attn.get_attention_scores(query, key, attention_mask) + hidden_states = torch.bmm(attention_probs, value) + hidden_states = attn.batch_to_head_dim(hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class CustomDiffusionAttnProcessor(nn.Module): + r""" + Processor for implementing attention for the Custom Diffusion method. + + Args: + train_kv (`bool`, defaults to `True`): + Whether to newly train the key and value matrices corresponding to the text features. + train_q_out (`bool`, defaults to `True`): + Whether to newly train query matrices corresponding to the latent image features. + hidden_size (`int`, *optional*, defaults to `None`): + The hidden size of the attention layer. + cross_attention_dim (`int`, *optional*, defaults to `None`): + The number of channels in the `encoder_hidden_states`. + out_bias (`bool`, defaults to `True`): + Whether to include the bias parameter in `train_q_out`. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability to use. + """ + + def __init__( + self, + train_kv: bool = True, + train_q_out: bool = True, + hidden_size: int | None = None, + cross_attention_dim: int | None = None, + out_bias: bool = True, + dropout: float = 0.0, + ): + super().__init__() + self.train_kv = train_kv + self.train_q_out = train_q_out + + self.hidden_size = hidden_size + self.cross_attention_dim = cross_attention_dim + + # `_custom_diffusion` id for easy serialization and loading. + if self.train_kv: + self.to_k_custom_diffusion = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) + self.to_v_custom_diffusion = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) + if self.train_q_out: + self.to_q_custom_diffusion = nn.Linear(hidden_size, hidden_size, bias=False) + self.to_out_custom_diffusion = nn.ModuleList([]) + self.to_out_custom_diffusion.append(nn.Linear(hidden_size, hidden_size, bias=out_bias)) + self.to_out_custom_diffusion.append(nn.Dropout(dropout)) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = hidden_states.shape + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + if self.train_q_out: + query = self.to_q_custom_diffusion(hidden_states).to(attn.to_q.weight.dtype) + else: + query = attn.to_q(hidden_states.to(attn.to_q.weight.dtype)) + + if encoder_hidden_states is None: + crossattn = False + encoder_hidden_states = hidden_states + else: + crossattn = True + if attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + if self.train_kv: + key = self.to_k_custom_diffusion(encoder_hidden_states.to(self.to_k_custom_diffusion.weight.dtype)) + value = self.to_v_custom_diffusion(encoder_hidden_states.to(self.to_v_custom_diffusion.weight.dtype)) + key = key.to(attn.to_q.weight.dtype) + value = value.to(attn.to_q.weight.dtype) + else: + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + if crossattn: + detach = torch.ones_like(key) + detach[:, :1, :] = detach[:, :1, :] * 0.0 + key = detach * key + (1 - detach) * key.detach() + value = detach * value + (1 - detach) * value.detach() + + query = attn.head_to_batch_dim(query) + key = attn.head_to_batch_dim(key) + value = attn.head_to_batch_dim(value) + + attention_probs = attn.get_attention_scores(query, key, attention_mask) + hidden_states = torch.bmm(attention_probs, value) + hidden_states = attn.batch_to_head_dim(hidden_states) + + if self.train_q_out: + # linear proj + hidden_states = self.to_out_custom_diffusion[0](hidden_states) + # dropout + hidden_states = self.to_out_custom_diffusion[1](hidden_states) + else: + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states + + +class AttnAddedKVProcessor: + r""" + Processor for performing attention-related computations with extra learnable key and value matrices for the text + encoder. + """ + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + residual = hidden_states + + hidden_states = hidden_states.view(hidden_states.shape[0], hidden_states.shape[1], -1).transpose(1, 2) + batch_size, sequence_length, _ = hidden_states.shape + + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + query = attn.head_to_batch_dim(query) + + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + encoder_hidden_states_key_proj = attn.head_to_batch_dim(encoder_hidden_states_key_proj) + encoder_hidden_states_value_proj = attn.head_to_batch_dim(encoder_hidden_states_value_proj) + + if not attn.only_cross_attention: + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + key = attn.head_to_batch_dim(key) + value = attn.head_to_batch_dim(value) + key = torch.cat([encoder_hidden_states_key_proj, key], dim=1) + value = torch.cat([encoder_hidden_states_value_proj, value], dim=1) + else: + key = encoder_hidden_states_key_proj + value = encoder_hidden_states_value_proj + + attention_probs = attn.get_attention_scores(query, key, attention_mask) + hidden_states = torch.bmm(attention_probs, value) + hidden_states = attn.batch_to_head_dim(hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + hidden_states = hidden_states.transpose(-1, -2).reshape(residual.shape) + hidden_states = hidden_states + residual + + return hidden_states + + +class AttnAddedKVProcessor2_0: + r""" + Processor for performing scaled dot-product attention (enabled by default if you're using PyTorch 2.0), with extra + learnable key and value matrices for the text encoder. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "AttnAddedKVProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + residual = hidden_states + + hidden_states = hidden_states.view(hidden_states.shape[0], hidden_states.shape[1], -1).transpose(1, 2) + batch_size, sequence_length, _ = hidden_states.shape + + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size, out_dim=4) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + query = attn.head_to_batch_dim(query, out_dim=4) + + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + encoder_hidden_states_key_proj = attn.head_to_batch_dim(encoder_hidden_states_key_proj, out_dim=4) + encoder_hidden_states_value_proj = attn.head_to_batch_dim(encoder_hidden_states_value_proj, out_dim=4) + + if not attn.only_cross_attention: + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + key = attn.head_to_batch_dim(key, out_dim=4) + value = attn.head_to_batch_dim(value, out_dim=4) + key = torch.cat([encoder_hidden_states_key_proj, key], dim=2) + value = torch.cat([encoder_hidden_states_value_proj, value], dim=2) + else: + key = encoder_hidden_states_key_proj + value = encoder_hidden_states_value_proj + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, residual.shape[1]) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + hidden_states = hidden_states.transpose(-1, -2).reshape(residual.shape) + hidden_states = hidden_states + residual + + return hidden_states + + +class JointAttnProcessor2_0: + """Attention processor used typically in processing the SD3-like self-attention projections.""" + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("JointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: torch.FloatTensor | None = None, + *args, + **kwargs, + ) -> torch.FloatTensor: + residual = hidden_states + + batch_size = hidden_states.shape[0] + + # `sample` projections. + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # `context` projections. + if encoder_hidden_states is not None: + encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states) + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + + encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view( + batch_size, -1, attn.heads, head_dim + ).transpose(1, 2) + encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view( + batch_size, -1, attn.heads, head_dim + ).transpose(1, 2) + encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view( + batch_size, -1, attn.heads, head_dim + ).transpose(1, 2) + + if attn.norm_added_q is not None: + encoder_hidden_states_query_proj = attn.norm_added_q(encoder_hidden_states_query_proj) + if attn.norm_added_k is not None: + encoder_hidden_states_key_proj = attn.norm_added_k(encoder_hidden_states_key_proj) + + query = torch.cat([query, encoder_hidden_states_query_proj], dim=2) + key = torch.cat([key, encoder_hidden_states_key_proj], dim=2) + value = torch.cat([value, encoder_hidden_states_value_proj], dim=2) + + hidden_states = F.scaled_dot_product_attention(query, key, value, dropout_p=0.0, is_causal=False) + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + # Split the attention outputs. + hidden_states, encoder_hidden_states = ( + hidden_states[:, : residual.shape[1]], + hidden_states[:, residual.shape[1] :], + ) + if not attn.context_pre_only: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if encoder_hidden_states is not None: + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class PAGJointAttnProcessor2_0: + """Attention processor used typically in processing the SD3-like self-attention projections.""" + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "PAGJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: torch.FloatTensor | None = None, + ) -> torch.FloatTensor: + residual = hidden_states + + input_ndim = hidden_states.ndim + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + context_input_ndim = encoder_hidden_states.ndim + if context_input_ndim == 4: + batch_size, channel, height, width = encoder_hidden_states.shape + encoder_hidden_states = encoder_hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + # store the length of image patch sequences to create a mask that prevents interaction between patches + # similar to making the self-attention map an identity matrix + identity_block_size = hidden_states.shape[1] + + # chunk + hidden_states_org, hidden_states_ptb = hidden_states.chunk(2) + encoder_hidden_states_org, encoder_hidden_states_ptb = encoder_hidden_states.chunk(2) + + ################## original path ################## + batch_size = encoder_hidden_states_org.shape[0] + + # `sample` projections. + query_org = attn.to_q(hidden_states_org) + key_org = attn.to_k(hidden_states_org) + value_org = attn.to_v(hidden_states_org) + + # `context` projections. + encoder_hidden_states_org_query_proj = attn.add_q_proj(encoder_hidden_states_org) + encoder_hidden_states_org_key_proj = attn.add_k_proj(encoder_hidden_states_org) + encoder_hidden_states_org_value_proj = attn.add_v_proj(encoder_hidden_states_org) + + # attention + query_org = torch.cat([query_org, encoder_hidden_states_org_query_proj], dim=1) + key_org = torch.cat([key_org, encoder_hidden_states_org_key_proj], dim=1) + value_org = torch.cat([value_org, encoder_hidden_states_org_value_proj], dim=1) + + inner_dim = key_org.shape[-1] + head_dim = inner_dim // attn.heads + query_org = query_org.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key_org = key_org.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value_org = value_org.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + hidden_states_org = F.scaled_dot_product_attention( + query_org, key_org, value_org, dropout_p=0.0, is_causal=False + ) + hidden_states_org = hidden_states_org.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states_org = hidden_states_org.to(query_org.dtype) + + # Split the attention outputs. + hidden_states_org, encoder_hidden_states_org = ( + hidden_states_org[:, : residual.shape[1]], + hidden_states_org[:, residual.shape[1] :], + ) + + # linear proj + hidden_states_org = attn.to_out[0](hidden_states_org) + # dropout + hidden_states_org = attn.to_out[1](hidden_states_org) + if not attn.context_pre_only: + encoder_hidden_states_org = attn.to_add_out(encoder_hidden_states_org) + + if input_ndim == 4: + hidden_states_org = hidden_states_org.transpose(-1, -2).reshape(batch_size, channel, height, width) + if context_input_ndim == 4: + encoder_hidden_states_org = encoder_hidden_states_org.transpose(-1, -2).reshape( + batch_size, channel, height, width + ) + + ################## perturbed path ################## + + batch_size = encoder_hidden_states_ptb.shape[0] + + # `sample` projections. + query_ptb = attn.to_q(hidden_states_ptb) + key_ptb = attn.to_k(hidden_states_ptb) + value_ptb = attn.to_v(hidden_states_ptb) + + # `context` projections. + encoder_hidden_states_ptb_query_proj = attn.add_q_proj(encoder_hidden_states_ptb) + encoder_hidden_states_ptb_key_proj = attn.add_k_proj(encoder_hidden_states_ptb) + encoder_hidden_states_ptb_value_proj = attn.add_v_proj(encoder_hidden_states_ptb) + + # attention + query_ptb = torch.cat([query_ptb, encoder_hidden_states_ptb_query_proj], dim=1) + key_ptb = torch.cat([key_ptb, encoder_hidden_states_ptb_key_proj], dim=1) + value_ptb = torch.cat([value_ptb, encoder_hidden_states_ptb_value_proj], dim=1) + + inner_dim = key_ptb.shape[-1] + head_dim = inner_dim // attn.heads + query_ptb = query_ptb.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key_ptb = key_ptb.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value_ptb = value_ptb.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # create a full mask with all entries set to 0 + seq_len = query_ptb.size(2) + full_mask = torch.zeros((seq_len, seq_len), device=query_ptb.device, dtype=query_ptb.dtype) + + # set the attention value between image patches to -inf + full_mask[:identity_block_size, :identity_block_size] = float("-inf") + + # set the diagonal of the attention value between image patches to 0 + full_mask[:identity_block_size, :identity_block_size].fill_diagonal_(0) + + # expand the mask to match the attention weights shape + full_mask = full_mask.unsqueeze(0).unsqueeze(0) # Add batch and num_heads dimensions + + hidden_states_ptb = F.scaled_dot_product_attention( + query_ptb, key_ptb, value_ptb, attn_mask=full_mask, dropout_p=0.0, is_causal=False + ) + hidden_states_ptb = hidden_states_ptb.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states_ptb = hidden_states_ptb.to(query_ptb.dtype) + + # split the attention outputs. + hidden_states_ptb, encoder_hidden_states_ptb = ( + hidden_states_ptb[:, : residual.shape[1]], + hidden_states_ptb[:, residual.shape[1] :], + ) + + # linear proj + hidden_states_ptb = attn.to_out[0](hidden_states_ptb) + # dropout + hidden_states_ptb = attn.to_out[1](hidden_states_ptb) + if not attn.context_pre_only: + encoder_hidden_states_ptb = attn.to_add_out(encoder_hidden_states_ptb) + + if input_ndim == 4: + hidden_states_ptb = hidden_states_ptb.transpose(-1, -2).reshape(batch_size, channel, height, width) + if context_input_ndim == 4: + encoder_hidden_states_ptb = encoder_hidden_states_ptb.transpose(-1, -2).reshape( + batch_size, channel, height, width + ) + + ################ concat ############### + hidden_states = torch.cat([hidden_states_org, hidden_states_ptb]) + encoder_hidden_states = torch.cat([encoder_hidden_states_org, encoder_hidden_states_ptb]) + + return hidden_states, encoder_hidden_states + + +class PAGCFGJointAttnProcessor2_0: + """Attention processor used typically in processing the SD3-like self-attention projections.""" + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "PAGCFGJointAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: torch.FloatTensor | None = None, + *args, + **kwargs, + ) -> torch.FloatTensor: + residual = hidden_states + + input_ndim = hidden_states.ndim + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + context_input_ndim = encoder_hidden_states.ndim + if context_input_ndim == 4: + batch_size, channel, height, width = encoder_hidden_states.shape + encoder_hidden_states = encoder_hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + identity_block_size = hidden_states.shape[ + 1 + ] # patch embeddings width * height (correspond to self-attention map width or height) + + # chunk + hidden_states_uncond, hidden_states_org, hidden_states_ptb = hidden_states.chunk(3) + hidden_states_org = torch.cat([hidden_states_uncond, hidden_states_org]) + + ( + encoder_hidden_states_uncond, + encoder_hidden_states_org, + encoder_hidden_states_ptb, + ) = encoder_hidden_states.chunk(3) + encoder_hidden_states_org = torch.cat([encoder_hidden_states_uncond, encoder_hidden_states_org]) + + ################## original path ################## + batch_size = encoder_hidden_states_org.shape[0] + + # `sample` projections. + query_org = attn.to_q(hidden_states_org) + key_org = attn.to_k(hidden_states_org) + value_org = attn.to_v(hidden_states_org) + + # `context` projections. + encoder_hidden_states_org_query_proj = attn.add_q_proj(encoder_hidden_states_org) + encoder_hidden_states_org_key_proj = attn.add_k_proj(encoder_hidden_states_org) + encoder_hidden_states_org_value_proj = attn.add_v_proj(encoder_hidden_states_org) + + # attention + query_org = torch.cat([query_org, encoder_hidden_states_org_query_proj], dim=1) + key_org = torch.cat([key_org, encoder_hidden_states_org_key_proj], dim=1) + value_org = torch.cat([value_org, encoder_hidden_states_org_value_proj], dim=1) + + inner_dim = key_org.shape[-1] + head_dim = inner_dim // attn.heads + query_org = query_org.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key_org = key_org.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value_org = value_org.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + hidden_states_org = F.scaled_dot_product_attention( + query_org, key_org, value_org, dropout_p=0.0, is_causal=False + ) + hidden_states_org = hidden_states_org.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states_org = hidden_states_org.to(query_org.dtype) + + # Split the attention outputs. + hidden_states_org, encoder_hidden_states_org = ( + hidden_states_org[:, : residual.shape[1]], + hidden_states_org[:, residual.shape[1] :], + ) + + # linear proj + hidden_states_org = attn.to_out[0](hidden_states_org) + # dropout + hidden_states_org = attn.to_out[1](hidden_states_org) + if not attn.context_pre_only: + encoder_hidden_states_org = attn.to_add_out(encoder_hidden_states_org) + + if input_ndim == 4: + hidden_states_org = hidden_states_org.transpose(-1, -2).reshape(batch_size, channel, height, width) + if context_input_ndim == 4: + encoder_hidden_states_org = encoder_hidden_states_org.transpose(-1, -2).reshape( + batch_size, channel, height, width + ) + + ################## perturbed path ################## + + batch_size = encoder_hidden_states_ptb.shape[0] + + # `sample` projections. + query_ptb = attn.to_q(hidden_states_ptb) + key_ptb = attn.to_k(hidden_states_ptb) + value_ptb = attn.to_v(hidden_states_ptb) + + # `context` projections. + encoder_hidden_states_ptb_query_proj = attn.add_q_proj(encoder_hidden_states_ptb) + encoder_hidden_states_ptb_key_proj = attn.add_k_proj(encoder_hidden_states_ptb) + encoder_hidden_states_ptb_value_proj = attn.add_v_proj(encoder_hidden_states_ptb) + + # attention + query_ptb = torch.cat([query_ptb, encoder_hidden_states_ptb_query_proj], dim=1) + key_ptb = torch.cat([key_ptb, encoder_hidden_states_ptb_key_proj], dim=1) + value_ptb = torch.cat([value_ptb, encoder_hidden_states_ptb_value_proj], dim=1) + + inner_dim = key_ptb.shape[-1] + head_dim = inner_dim // attn.heads + query_ptb = query_ptb.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key_ptb = key_ptb.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value_ptb = value_ptb.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # create a full mask with all entries set to 0 + seq_len = query_ptb.size(2) + full_mask = torch.zeros((seq_len, seq_len), device=query_ptb.device, dtype=query_ptb.dtype) + + # set the attention value between image patches to -inf + full_mask[:identity_block_size, :identity_block_size] = float("-inf") + + # set the diagonal of the attention value between image patches to 0 + full_mask[:identity_block_size, :identity_block_size].fill_diagonal_(0) + + # expand the mask to match the attention weights shape + full_mask = full_mask.unsqueeze(0).unsqueeze(0) # Add batch and num_heads dimensions + + hidden_states_ptb = F.scaled_dot_product_attention( + query_ptb, key_ptb, value_ptb, attn_mask=full_mask, dropout_p=0.0, is_causal=False + ) + hidden_states_ptb = hidden_states_ptb.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states_ptb = hidden_states_ptb.to(query_ptb.dtype) + + # split the attention outputs. + hidden_states_ptb, encoder_hidden_states_ptb = ( + hidden_states_ptb[:, : residual.shape[1]], + hidden_states_ptb[:, residual.shape[1] :], + ) + + # linear proj + hidden_states_ptb = attn.to_out[0](hidden_states_ptb) + # dropout + hidden_states_ptb = attn.to_out[1](hidden_states_ptb) + if not attn.context_pre_only: + encoder_hidden_states_ptb = attn.to_add_out(encoder_hidden_states_ptb) + + if input_ndim == 4: + hidden_states_ptb = hidden_states_ptb.transpose(-1, -2).reshape(batch_size, channel, height, width) + if context_input_ndim == 4: + encoder_hidden_states_ptb = encoder_hidden_states_ptb.transpose(-1, -2).reshape( + batch_size, channel, height, width + ) + + ################ concat ############### + hidden_states = torch.cat([hidden_states_org, hidden_states_ptb]) + encoder_hidden_states = torch.cat([encoder_hidden_states_org, encoder_hidden_states_ptb]) + + return hidden_states, encoder_hidden_states + + +class FusedJointAttnProcessor2_0: + """Attention processor used typically in processing the SD3-like self-attention projections.""" + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: torch.FloatTensor | None = None, + *args, + **kwargs, + ) -> torch.FloatTensor: + residual = hidden_states + + input_ndim = hidden_states.ndim + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + context_input_ndim = encoder_hidden_states.ndim + if context_input_ndim == 4: + batch_size, channel, height, width = encoder_hidden_states.shape + encoder_hidden_states = encoder_hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size = encoder_hidden_states.shape[0] + + # `sample` projections. + qkv = attn.to_qkv(hidden_states) + split_size = qkv.shape[-1] // 3 + query, key, value = torch.split(qkv, split_size, dim=-1) + + # `context` projections. + encoder_qkv = attn.to_added_qkv(encoder_hidden_states) + split_size = encoder_qkv.shape[-1] // 3 + ( + encoder_hidden_states_query_proj, + encoder_hidden_states_key_proj, + encoder_hidden_states_value_proj, + ) = torch.split(encoder_qkv, split_size, dim=-1) + + # attention + query = torch.cat([query, encoder_hidden_states_query_proj], dim=1) + key = torch.cat([key, encoder_hidden_states_key_proj], dim=1) + value = torch.cat([value, encoder_hidden_states_value_proj], dim=1) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + hidden_states = F.scaled_dot_product_attention(query, key, value, dropout_p=0.0, is_causal=False) + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # Split the attention outputs. + hidden_states, encoder_hidden_states = ( + hidden_states[:, : residual.shape[1]], + hidden_states[:, residual.shape[1] :], + ) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + if not attn.context_pre_only: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + if context_input_ndim == 4: + encoder_hidden_states = encoder_hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + return hidden_states, encoder_hidden_states + + +class XFormersJointAttnProcessor: + r""" + Processor for implementing memory efficient attention using xFormers. + + Args: + attention_op (`Callable`, *optional*, defaults to `None`): + The base + [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to + use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best + operator. + """ + + def __init__(self, attention_op: Callable | None = None): + self.attention_op = attention_op + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: torch.FloatTensor | None = None, + *args, + **kwargs, + ) -> torch.FloatTensor: + residual = hidden_states + + # `sample` projections. + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = attn.head_to_batch_dim(query).contiguous() + key = attn.head_to_batch_dim(key).contiguous() + value = attn.head_to_batch_dim(value).contiguous() + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # `context` projections. + if encoder_hidden_states is not None: + encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states) + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + + encoder_hidden_states_query_proj = attn.head_to_batch_dim(encoder_hidden_states_query_proj).contiguous() + encoder_hidden_states_key_proj = attn.head_to_batch_dim(encoder_hidden_states_key_proj).contiguous() + encoder_hidden_states_value_proj = attn.head_to_batch_dim(encoder_hidden_states_value_proj).contiguous() + + if attn.norm_added_q is not None: + encoder_hidden_states_query_proj = attn.norm_added_q(encoder_hidden_states_query_proj) + if attn.norm_added_k is not None: + encoder_hidden_states_key_proj = attn.norm_added_k(encoder_hidden_states_key_proj) + + query = torch.cat([query, encoder_hidden_states_query_proj], dim=1) + key = torch.cat([key, encoder_hidden_states_key_proj], dim=1) + value = torch.cat([value, encoder_hidden_states_value_proj], dim=1) + + hidden_states = xformers.ops.memory_efficient_attention( + query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=attn.scale + ) + hidden_states = hidden_states.to(query.dtype) + hidden_states = attn.batch_to_head_dim(hidden_states) + + if encoder_hidden_states is not None: + # Split the attention outputs. + hidden_states, encoder_hidden_states = ( + hidden_states[:, : residual.shape[1]], + hidden_states[:, residual.shape[1] :], + ) + if not attn.context_pre_only: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if encoder_hidden_states is not None: + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class AllegroAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the Allegro model. It applies a normalization layer and rotary embedding on the query and key vector. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "AllegroAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + residual = hidden_states + + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # Apply RoPE if needed + if image_rotary_emb is not None and not attn.is_cross_attention: + from .embeddings import apply_rotary_emb_allegro + + query = apply_rotary_emb_allegro(query, image_rotary_emb[0], image_rotary_emb[1]) + key = apply_rotary_emb_allegro(key, image_rotary_emb[0], image_rotary_emb[1]) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class AuraFlowAttnProcessor2_0: + """Attention processor used typically in processing Aura Flow.""" + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention") and is_torch_version("<", "2.1"): + raise ImportError( + "AuraFlowAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to at least 2.1 or above as we use `scale` in `F.scaled_dot_product_attention()`. " + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + *args, + **kwargs, + ) -> torch.FloatTensor: + batch_size = hidden_states.shape[0] + + # `sample` projections. + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + # `context` projections. + if encoder_hidden_states is not None: + encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states) + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + + # Reshape. + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + query = query.view(batch_size, -1, attn.heads, head_dim) + key = key.view(batch_size, -1, attn.heads, head_dim) + value = value.view(batch_size, -1, attn.heads, head_dim) + + # Apply QK norm. + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Concatenate the projections. + if encoder_hidden_states is not None: + encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view( + batch_size, -1, attn.heads, head_dim + ) + encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view(batch_size, -1, attn.heads, head_dim) + encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view( + batch_size, -1, attn.heads, head_dim + ) + + if attn.norm_added_q is not None: + encoder_hidden_states_query_proj = attn.norm_added_q(encoder_hidden_states_query_proj) + if attn.norm_added_k is not None: + encoder_hidden_states_key_proj = attn.norm_added_q(encoder_hidden_states_key_proj) + + query = torch.cat([encoder_hidden_states_query_proj, query], dim=1) + key = torch.cat([encoder_hidden_states_key_proj, key], dim=1) + value = torch.cat([encoder_hidden_states_value_proj, value], dim=1) + + query = query.transpose(1, 2) + key = key.transpose(1, 2) + value = value.transpose(1, 2) + + # Attention. + hidden_states = F.scaled_dot_product_attention( + query, key, value, dropout_p=0.0, scale=attn.scale, is_causal=False + ) + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # Split the attention outputs. + if encoder_hidden_states is not None: + hidden_states, encoder_hidden_states = ( + hidden_states[:, encoder_hidden_states.shape[1] :], + hidden_states[:, : encoder_hidden_states.shape[1]], + ) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + if encoder_hidden_states is not None: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + if encoder_hidden_states is not None: + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class FusedAuraFlowAttnProcessor2_0: + """Attention processor used typically in processing Aura Flow with fused projections.""" + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention") and is_torch_version("<", "2.1"): + raise ImportError( + "FusedAuraFlowAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to at least 2.1 or above as we use `scale` in `F.scaled_dot_product_attention()`. " + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + *args, + **kwargs, + ) -> torch.FloatTensor: + batch_size = hidden_states.shape[0] + + # `sample` projections. + qkv = attn.to_qkv(hidden_states) + split_size = qkv.shape[-1] // 3 + query, key, value = torch.split(qkv, split_size, dim=-1) + + # `context` projections. + if encoder_hidden_states is not None: + encoder_qkv = attn.to_added_qkv(encoder_hidden_states) + split_size = encoder_qkv.shape[-1] // 3 + ( + encoder_hidden_states_query_proj, + encoder_hidden_states_key_proj, + encoder_hidden_states_value_proj, + ) = torch.split(encoder_qkv, split_size, dim=-1) + + # Reshape. + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + query = query.view(batch_size, -1, attn.heads, head_dim) + key = key.view(batch_size, -1, attn.heads, head_dim) + value = value.view(batch_size, -1, attn.heads, head_dim) + + # Apply QK norm. + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Concatenate the projections. + if encoder_hidden_states is not None: + encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view( + batch_size, -1, attn.heads, head_dim + ) + encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view(batch_size, -1, attn.heads, head_dim) + encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view( + batch_size, -1, attn.heads, head_dim + ) + + if attn.norm_added_q is not None: + encoder_hidden_states_query_proj = attn.norm_added_q(encoder_hidden_states_query_proj) + if attn.norm_added_k is not None: + encoder_hidden_states_key_proj = attn.norm_added_q(encoder_hidden_states_key_proj) + + query = torch.cat([encoder_hidden_states_query_proj, query], dim=1) + key = torch.cat([encoder_hidden_states_key_proj, key], dim=1) + value = torch.cat([encoder_hidden_states_value_proj, value], dim=1) + + query = query.transpose(1, 2) + key = key.transpose(1, 2) + value = value.transpose(1, 2) + + # Attention. + hidden_states = F.scaled_dot_product_attention( + query, key, value, dropout_p=0.0, scale=attn.scale, is_causal=False + ) + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # Split the attention outputs. + if encoder_hidden_states is not None: + hidden_states, encoder_hidden_states = ( + hidden_states[:, encoder_hidden_states.shape[1] :], + hidden_states[:, : encoder_hidden_states.shape[1]], + ) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + if encoder_hidden_states is not None: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + if encoder_hidden_states is not None: + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class CogVideoXAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on + query and key vectors, but does not include spatial normalization. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("CogVideoXAttnProcessor requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + text_seq_length = encoder_hidden_states.size(1) + + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + batch_size, sequence_length, _ = hidden_states.shape + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if image_rotary_emb is not None: + from .embeddings import apply_rotary_emb + + query[:, :, text_seq_length:] = apply_rotary_emb(query[:, :, text_seq_length:], image_rotary_emb) + if not attn.is_cross_attention: + key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb) + + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + encoder_hidden_states, hidden_states = hidden_states.split( + [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1 + ) + return hidden_states, encoder_hidden_states + + +class FusedCogVideoXAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on + query and key vectors, but does not include spatial normalization. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("CogVideoXAttnProcessor requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + text_seq_length = encoder_hidden_states.size(1) + + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + qkv = attn.to_qkv(hidden_states) + split_size = qkv.shape[-1] // 3 + query, key, value = torch.split(qkv, split_size, dim=-1) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if image_rotary_emb is not None: + from .embeddings import apply_rotary_emb + + query[:, :, text_seq_length:] = apply_rotary_emb(query[:, :, text_seq_length:], image_rotary_emb) + if not attn.is_cross_attention: + key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb) + + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + encoder_hidden_states, hidden_states = hidden_states.split( + [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1 + ) + return hidden_states, encoder_hidden_states + + +class XFormersAttnAddedKVProcessor: + r""" + Processor for implementing memory efficient attention using xFormers. + + Args: + attention_op (`Callable`, *optional*, defaults to `None`): + The base + [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to + use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best + operator. + """ + + def __init__(self, attention_op: Callable | None = None): + self.attention_op = attention_op + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + residual = hidden_states + hidden_states = hidden_states.view(hidden_states.shape[0], hidden_states.shape[1], -1).transpose(1, 2) + batch_size, sequence_length, _ = hidden_states.shape + + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + query = attn.head_to_batch_dim(query) + + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + encoder_hidden_states_key_proj = attn.head_to_batch_dim(encoder_hidden_states_key_proj) + encoder_hidden_states_value_proj = attn.head_to_batch_dim(encoder_hidden_states_value_proj) + + if not attn.only_cross_attention: + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + key = attn.head_to_batch_dim(key) + value = attn.head_to_batch_dim(value) + key = torch.cat([encoder_hidden_states_key_proj, key], dim=1) + value = torch.cat([encoder_hidden_states_value_proj, value], dim=1) + else: + key = encoder_hidden_states_key_proj + value = encoder_hidden_states_value_proj + + hidden_states = xformers.ops.memory_efficient_attention( + query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=attn.scale + ) + hidden_states = hidden_states.to(query.dtype) + hidden_states = attn.batch_to_head_dim(hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + hidden_states = hidden_states.transpose(-1, -2).reshape(residual.shape) + hidden_states = hidden_states + residual + + return hidden_states + + +class XFormersAttnProcessor: + r""" + Processor for implementing memory efficient attention using xFormers. + + Args: + attention_op (`Callable`, *optional*, defaults to `None`): + The base + [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to + use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best + operator. + """ + + def __init__(self, attention_op: Callable | None = None): + self.attention_op = attention_op + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + residual = hidden_states + + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, key_tokens, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + attention_mask = attn.prepare_attention_mask(attention_mask, key_tokens, batch_size) + if attention_mask is not None: + # expand our mask's singleton query_tokens dimension: + # [batch*heads, 1, key_tokens] -> + # [batch*heads, query_tokens, key_tokens] + # so that it can be added as a bias onto the attention scores that xformers computes: + # [batch*heads, query_tokens, key_tokens] + # we do this explicitly because xformers doesn't broadcast the singleton dimension for us. + _, query_tokens, _ = hidden_states.shape + attention_mask = attention_mask.expand(-1, query_tokens, -1) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query = attn.head_to_batch_dim(query).contiguous() + key = attn.head_to_batch_dim(key).contiguous() + value = attn.head_to_batch_dim(value).contiguous() + + hidden_states = xformers.ops.memory_efficient_attention( + query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=attn.scale + ) + hidden_states = hidden_states.to(query.dtype) + hidden_states = attn.batch_to_head_dim(hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class AttnProcessorNPU: + r""" + Processor for implementing flash attention using torch_npu. Torch_npu supports only fp16 and bf16 data types. If + fp32 is used, F.scaled_dot_product_attention will be used for computation, but the acceleration effect on NPU is + not significant. + + """ + + def __init__(self): + if not is_torch_npu_available(): + raise ImportError("AttnProcessorNPU requires torch_npu extensions and is supported only on npu devices.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + attention_mask = attention_mask.repeat(1, 1, hidden_states.shape[1], 1) + if attention_mask.dtype == torch.bool: + attention_mask = torch.logical_not(attention_mask.bool()) + else: + attention_mask = attention_mask.bool() + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + if query.dtype in (torch.float16, torch.bfloat16): + hidden_states = torch_npu.npu_fusion_attention( + query, + key, + value, + attn.heads, + input_layout="BNSD", + pse=None, + atten_mask=attention_mask, + scale=1.0 / math.sqrt(query.shape[-1]), + pre_tockens=65536, + next_tockens=65536, + keep_prob=1.0, + sync=False, + inner_precise=0, + )[0] + else: + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class AttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class XLAFlashAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention with pallas flash attention kernel if using `torch_xla`. + """ + + def __init__(self, partition_spec: tuple[str | None, ...] | None = None): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "XLAFlashAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + if is_torch_xla_version("<", "2.3"): + raise ImportError("XLA flash attention requires torch_xla version >= 2.3.") + if is_spmd() and is_torch_xla_version("<", "2.4"): + raise ImportError("SPMD support for XLA flash attention needs torch_xla version >= 2.4.") + self.partition_spec = partition_spec + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + if all(tensor.shape[2] >= 4096 for tensor in [query, key, value]): + if attention_mask is not None: + attention_mask = attention_mask.view(batch_size, 1, 1, attention_mask.shape[-1]) + # Convert mask to float and replace 0s with -inf and 1s with 0 + attention_mask = ( + attention_mask.float() + .masked_fill(attention_mask == 0, float("-inf")) + .masked_fill(attention_mask == 1, float(0.0)) + ) + + # Apply attention mask to key + key = key + attention_mask + query /= math.sqrt(query.shape[3]) + partition_spec = self.partition_spec if is_spmd() else None + hidden_states = flash_attention(query, key, value, causal=False, partition_spec=partition_spec) + else: + logger.warning( + "Unable to use the flash attention pallas kernel API call due to QKV sequence length < 4096." + ) + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class MochiVaeAttnProcessor2_0: + r""" + Attention processor used in Mochi VAE. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + residual = hidden_states + is_single_frame = hidden_states.shape[1] == 1 + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if is_single_frame: + hidden_states = attn.to_v(hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + return hidden_states + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=attn.is_causal + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class StableAudioAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the Stable Audio model. It applies rotary embedding on query and key vector, and allows MHA, GQA or MQA. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "StableAudioAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def apply_partial_rotary_emb( + self, + x: torch.Tensor, + freqs_cis: tuple[torch.Tensor], + ) -> torch.Tensor: + from .embeddings import apply_rotary_emb + + rot_dim = freqs_cis[0].shape[-1] + x_to_rotate, x_unrotated = x[..., :rot_dim], x[..., rot_dim:] + + x_rotated = apply_rotary_emb(x_to_rotate, freqs_cis, use_real=True, use_real_unbind_dim=-2) + + out = torch.cat((x_rotated, x_unrotated), dim=-1) + return out + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + from .embeddings import apply_rotary_emb + + residual = hidden_states + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + head_dim = query.shape[-1] // attn.heads + kv_heads = key.shape[-1] // head_dim + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, kv_heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, kv_heads, head_dim).transpose(1, 2) + + if kv_heads != attn.heads: + # if GQA or MQA, repeat the key/value heads to reach the number of query heads. + heads_per_kv_head = attn.heads // kv_heads + key = torch.repeat_interleave(key, heads_per_kv_head, dim=1, output_size=key.shape[1] * heads_per_kv_head) + value = torch.repeat_interleave( + value, heads_per_kv_head, dim=1, output_size=value.shape[1] * heads_per_kv_head + ) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if rotary_emb is not None: + query_dtype = query.dtype + key_dtype = key.dtype + query = query.to(torch.float32) + key = key.to(torch.float32) + + rot_dim = rotary_emb[0].shape[-1] + query_to_rotate, query_unrotated = query[..., :rot_dim], query[..., rot_dim:] + query_rotated = apply_rotary_emb(query_to_rotate, rotary_emb, use_real=True, use_real_unbind_dim=-2) + + query = torch.cat((query_rotated, query_unrotated), dim=-1) + + if not attn.is_cross_attention: + key_to_rotate, key_unrotated = key[..., :rot_dim], key[..., rot_dim:] + key_rotated = apply_rotary_emb(key_to_rotate, rotary_emb, use_real=True, use_real_unbind_dim=-2) + + key = torch.cat((key_rotated, key_unrotated), dim=-1) + + query = query.to(query_dtype) + key = key.to(key_dtype) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class HunyuanAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on query and key vector. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + from .embeddings import apply_rotary_emb + + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb) + if not attn.is_cross_attention: + key = apply_rotary_emb(key, image_rotary_emb) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class FusedHunyuanAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0) with fused + projection layers. This is used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on + query and key vector. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "FusedHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + from .embeddings import apply_rotary_emb + + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + if encoder_hidden_states is None: + qkv = attn.to_qkv(hidden_states) + split_size = qkv.shape[-1] // 3 + query, key, value = torch.split(qkv, split_size, dim=-1) + else: + if attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + query = attn.to_q(hidden_states) + + kv = attn.to_kv(encoder_hidden_states) + split_size = kv.shape[-1] // 2 + key, value = torch.split(kv, split_size, dim=-1) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb) + if not attn.is_cross_attention: + key = apply_rotary_emb(key, image_rotary_emb) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class PAGHunyuanAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the HunyuanDiT model. It applies a normalization layer and rotary embedding on query and key vector. This + variant of the processor employs [Pertubed Attention Guidance](https://huggingface.co/papers/2403.17377). + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "PAGHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + from .embeddings import apply_rotary_emb + + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + # chunk + hidden_states_org, hidden_states_ptb = hidden_states.chunk(2) + + # 1. Original Path + batch_size, sequence_length, _ = ( + hidden_states_org.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states_org = attn.group_norm(hidden_states_org.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states_org) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states_org + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb) + if not attn.is_cross_attention: + key = apply_rotary_emb(key, image_rotary_emb) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states_org = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states_org = hidden_states_org.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states_org = hidden_states_org.to(query.dtype) + + # linear proj + hidden_states_org = attn.to_out[0](hidden_states_org) + # dropout + hidden_states_org = attn.to_out[1](hidden_states_org) + + if input_ndim == 4: + hidden_states_org = hidden_states_org.transpose(-1, -2).reshape(batch_size, channel, height, width) + + # 2. Perturbed Path + if attn.group_norm is not None: + hidden_states_ptb = attn.group_norm(hidden_states_ptb.transpose(1, 2)).transpose(1, 2) + + hidden_states_ptb = attn.to_v(hidden_states_ptb) + hidden_states_ptb = hidden_states_ptb.to(query.dtype) + + # linear proj + hidden_states_ptb = attn.to_out[0](hidden_states_ptb) + # dropout + hidden_states_ptb = attn.to_out[1](hidden_states_ptb) + + if input_ndim == 4: + hidden_states_ptb = hidden_states_ptb.transpose(-1, -2).reshape(batch_size, channel, height, width) + + # cat + hidden_states = torch.cat([hidden_states_org, hidden_states_ptb]) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class PAGCFGHunyuanAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the HunyuanDiT model. It applies a normalization layer and rotary embedding on query and key vector. This + variant of the processor employs [Pertubed Attention Guidance](https://huggingface.co/papers/2403.17377). + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "PAGCFGHunyuanAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + from .embeddings import apply_rotary_emb + + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + # chunk + hidden_states_uncond, hidden_states_org, hidden_states_ptb = hidden_states.chunk(3) + hidden_states_org = torch.cat([hidden_states_uncond, hidden_states_org]) + + # 1. Original Path + batch_size, sequence_length, _ = ( + hidden_states_org.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states_org = attn.group_norm(hidden_states_org.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states_org) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states_org + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb) + if not attn.is_cross_attention: + key = apply_rotary_emb(key, image_rotary_emb) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states_org = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states_org = hidden_states_org.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states_org = hidden_states_org.to(query.dtype) + + # linear proj + hidden_states_org = attn.to_out[0](hidden_states_org) + # dropout + hidden_states_org = attn.to_out[1](hidden_states_org) + + if input_ndim == 4: + hidden_states_org = hidden_states_org.transpose(-1, -2).reshape(batch_size, channel, height, width) + + # 2. Perturbed Path + if attn.group_norm is not None: + hidden_states_ptb = attn.group_norm(hidden_states_ptb.transpose(1, 2)).transpose(1, 2) + + hidden_states_ptb = attn.to_v(hidden_states_ptb) + hidden_states_ptb = hidden_states_ptb.to(query.dtype) + + # linear proj + hidden_states_ptb = attn.to_out[0](hidden_states_ptb) + # dropout + hidden_states_ptb = attn.to_out[1](hidden_states_ptb) + + if input_ndim == 4: + hidden_states_ptb = hidden_states_ptb.transpose(-1, -2).reshape(batch_size, channel, height, width) + + # cat + hidden_states = torch.cat([hidden_states_org, hidden_states_ptb]) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class LuminaAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the LuminaNextDiT model. It applies a s normalization layer and rotary embedding on query and key vector. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + query_rotary_emb: torch.Tensor | None = None, + key_rotary_emb: torch.Tensor | None = None, + base_sequence_length: int | None = None, + ) -> torch.Tensor: + from .embeddings import apply_rotary_emb + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = hidden_states.shape + + # Get Query-Key-Value Pair + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query_dim = query.shape[-1] + inner_dim = key.shape[-1] + head_dim = query_dim // attn.heads + dtype = query.dtype + + # Get key-value heads + kv_heads = inner_dim // head_dim + + # Apply Query-Key Norm if needed + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + query = query.view(batch_size, -1, attn.heads, head_dim) + + key = key.view(batch_size, -1, kv_heads, head_dim) + value = value.view(batch_size, -1, kv_heads, head_dim) + + # Apply RoPE if needed + if query_rotary_emb is not None: + query = apply_rotary_emb(query, query_rotary_emb, use_real=False) + if key_rotary_emb is not None: + key = apply_rotary_emb(key, key_rotary_emb, use_real=False) + + query, key = query.to(dtype), key.to(dtype) + + # Apply proportional attention if true + if key_rotary_emb is None: + softmax_scale = None + else: + if base_sequence_length is not None: + softmax_scale = math.sqrt(math.log(sequence_length, base_sequence_length)) * attn.scale + else: + softmax_scale = attn.scale + + # perform Grouped-qurey Attention (GQA) + n_rep = attn.heads // kv_heads + if n_rep >= 1: + key = key.unsqueeze(3).repeat(1, 1, 1, n_rep, 1).flatten(2, 3) + value = value.unsqueeze(3).repeat(1, 1, 1, n_rep, 1).flatten(2, 3) + + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.bool().view(batch_size, 1, 1, -1) + attention_mask = attention_mask.expand(-1, attn.heads, sequence_length, -1) + + query = query.transpose(1, 2) + key = key.transpose(1, 2) + value = value.transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, scale=softmax_scale + ) + hidden_states = hidden_states.transpose(1, 2).to(dtype) + + return hidden_states + + +class FusedAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). It uses + fused projection layers. For self-attention modules, all projection matrices (i.e., query, key, value) are fused. + For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is currently 🧪 experimental in nature and can change in future. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "FusedAttnProcessor2_0 requires at least PyTorch 2.0, to use it. Please upgrade PyTorch to > 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + if encoder_hidden_states is None: + qkv = attn.to_qkv(hidden_states) + split_size = qkv.shape[-1] // 3 + query, key, value = torch.split(qkv, split_size, dim=-1) + else: + if attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + query = attn.to_q(hidden_states) + + kv = attn.to_kv(encoder_hidden_states) + split_size = kv.shape[-1] // 2 + key, value = torch.split(kv, split_size, dim=-1) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class CustomDiffusionXFormersAttnProcessor(nn.Module): + r""" + Processor for implementing memory efficient attention using xFormers for the Custom Diffusion method. + + Args: + train_kv (`bool`, defaults to `True`): + Whether to newly train the key and value matrices corresponding to the text features. + train_q_out (`bool`, defaults to `True`): + Whether to newly train query matrices corresponding to the latent image features. + hidden_size (`int`, *optional*, defaults to `None`): + The hidden size of the attention layer. + cross_attention_dim (`int`, *optional*, defaults to `None`): + The number of channels in the `encoder_hidden_states`. + out_bias (`bool`, defaults to `True`): + Whether to include the bias parameter in `train_q_out`. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability to use. + attention_op (`Callable`, *optional*, defaults to `None`): + The base + [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to use + as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best operator. + """ + + def __init__( + self, + train_kv: bool = True, + train_q_out: bool = False, + hidden_size: int | None = None, + cross_attention_dim: int | None = None, + out_bias: bool = True, + dropout: float = 0.0, + attention_op: Callable | None = None, + ): + super().__init__() + self.train_kv = train_kv + self.train_q_out = train_q_out + + self.hidden_size = hidden_size + self.cross_attention_dim = cross_attention_dim + self.attention_op = attention_op + + # `_custom_diffusion` id for easy serialization and loading. + if self.train_kv: + self.to_k_custom_diffusion = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) + self.to_v_custom_diffusion = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) + if self.train_q_out: + self.to_q_custom_diffusion = nn.Linear(hidden_size, hidden_size, bias=False) + self.to_out_custom_diffusion = nn.ModuleList([]) + self.to_out_custom_diffusion.append(nn.Linear(hidden_size, hidden_size, bias=out_bias)) + self.to_out_custom_diffusion.append(nn.Dropout(dropout)) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + + if self.train_q_out: + query = self.to_q_custom_diffusion(hidden_states).to(attn.to_q.weight.dtype) + else: + query = attn.to_q(hidden_states.to(attn.to_q.weight.dtype)) + + if encoder_hidden_states is None: + crossattn = False + encoder_hidden_states = hidden_states + else: + crossattn = True + if attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + if self.train_kv: + key = self.to_k_custom_diffusion(encoder_hidden_states.to(self.to_k_custom_diffusion.weight.dtype)) + value = self.to_v_custom_diffusion(encoder_hidden_states.to(self.to_v_custom_diffusion.weight.dtype)) + key = key.to(attn.to_q.weight.dtype) + value = value.to(attn.to_q.weight.dtype) + else: + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + if crossattn: + detach = torch.ones_like(key) + detach[:, :1, :] = detach[:, :1, :] * 0.0 + key = detach * key + (1 - detach) * key.detach() + value = detach * value + (1 - detach) * value.detach() + + query = attn.head_to_batch_dim(query).contiguous() + key = attn.head_to_batch_dim(key).contiguous() + value = attn.head_to_batch_dim(value).contiguous() + + hidden_states = xformers.ops.memory_efficient_attention( + query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=attn.scale + ) + hidden_states = hidden_states.to(query.dtype) + hidden_states = attn.batch_to_head_dim(hidden_states) + + if self.train_q_out: + # linear proj + hidden_states = self.to_out_custom_diffusion[0](hidden_states) + # dropout + hidden_states = self.to_out_custom_diffusion[1](hidden_states) + else: + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states + + +class CustomDiffusionAttnProcessor2_0(nn.Module): + r""" + Processor for implementing attention for the Custom Diffusion method using PyTorch 2.0’s memory-efficient scaled + dot-product attention. + + Args: + train_kv (`bool`, defaults to `True`): + Whether to newly train the key and value matrices corresponding to the text features. + train_q_out (`bool`, defaults to `True`): + Whether to newly train query matrices corresponding to the latent image features. + hidden_size (`int`, *optional*, defaults to `None`): + The hidden size of the attention layer. + cross_attention_dim (`int`, *optional*, defaults to `None`): + The number of channels in the `encoder_hidden_states`. + out_bias (`bool`, defaults to `True`): + Whether to include the bias parameter in `train_q_out`. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability to use. + """ + + def __init__( + self, + train_kv: bool = True, + train_q_out: bool = True, + hidden_size: int | None = None, + cross_attention_dim: int | None = None, + out_bias: bool = True, + dropout: float = 0.0, + ): + super().__init__() + self.train_kv = train_kv + self.train_q_out = train_q_out + + self.hidden_size = hidden_size + self.cross_attention_dim = cross_attention_dim + + # `_custom_diffusion` id for easy serialization and loading. + if self.train_kv: + self.to_k_custom_diffusion = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) + self.to_v_custom_diffusion = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) + if self.train_q_out: + self.to_q_custom_diffusion = nn.Linear(hidden_size, hidden_size, bias=False) + self.to_out_custom_diffusion = nn.ModuleList([]) + self.to_out_custom_diffusion.append(nn.Linear(hidden_size, hidden_size, bias=out_bias)) + self.to_out_custom_diffusion.append(nn.Dropout(dropout)) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = hidden_states.shape + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + if self.train_q_out: + query = self.to_q_custom_diffusion(hidden_states) + else: + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + crossattn = False + encoder_hidden_states = hidden_states + else: + crossattn = True + if attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + if self.train_kv: + key = self.to_k_custom_diffusion(encoder_hidden_states.to(self.to_k_custom_diffusion.weight.dtype)) + value = self.to_v_custom_diffusion(encoder_hidden_states.to(self.to_v_custom_diffusion.weight.dtype)) + key = key.to(attn.to_q.weight.dtype) + value = value.to(attn.to_q.weight.dtype) + + else: + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + if crossattn: + detach = torch.ones_like(key) + detach[:, :1, :] = detach[:, :1, :] * 0.0 + key = detach * key + (1 - detach) * key.detach() + value = detach * value + (1 - detach) * value.detach() + + inner_dim = hidden_states.shape[-1] + + head_dim = inner_dim // attn.heads + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + if self.train_q_out: + # linear proj + hidden_states = self.to_out_custom_diffusion[0](hidden_states) + # dropout + hidden_states = self.to_out_custom_diffusion[1](hidden_states) + else: + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states + + +class SlicedAttnProcessor: + r""" + Processor for implementing sliced attention. + + Args: + slice_size (`int`, *optional*): + The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and + `attention_head_dim` must be a multiple of the `slice_size`. + """ + + def __init__(self, slice_size: int): + self.slice_size = slice_size + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + residual = hidden_states + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + dim = query.shape[-1] + query = attn.head_to_batch_dim(query) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + key = attn.head_to_batch_dim(key) + value = attn.head_to_batch_dim(value) + + batch_size_attention, query_tokens, _ = query.shape + hidden_states = torch.zeros( + (batch_size_attention, query_tokens, dim // attn.heads), device=query.device, dtype=query.dtype + ) + + for i in range((batch_size_attention - 1) // self.slice_size + 1): + start_idx = i * self.slice_size + end_idx = (i + 1) * self.slice_size + + query_slice = query[start_idx:end_idx] + key_slice = key[start_idx:end_idx] + attn_mask_slice = attention_mask[start_idx:end_idx] if attention_mask is not None else None + + attn_slice = attn.get_attention_scores(query_slice, key_slice, attn_mask_slice) + + attn_slice = torch.bmm(attn_slice, value[start_idx:end_idx]) + + hidden_states[start_idx:end_idx] = attn_slice + + hidden_states = attn.batch_to_head_dim(hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class SlicedAttnAddedKVProcessor: + r""" + Processor for implementing sliced attention with extra learnable key and value matrices for the text encoder. + + Args: + slice_size (`int`, *optional*): + The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and + `attention_head_dim` must be a multiple of the `slice_size`. + """ + + def __init__(self, slice_size): + self.slice_size = slice_size + + def __call__( + self, + attn: "Attention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + ) -> torch.Tensor: + residual = hidden_states + + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + hidden_states = hidden_states.view(hidden_states.shape[0], hidden_states.shape[1], -1).transpose(1, 2) + + batch_size, sequence_length, _ = hidden_states.shape + + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + dim = query.shape[-1] + query = attn.head_to_batch_dim(query) + + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + + encoder_hidden_states_key_proj = attn.head_to_batch_dim(encoder_hidden_states_key_proj) + encoder_hidden_states_value_proj = attn.head_to_batch_dim(encoder_hidden_states_value_proj) + + if not attn.only_cross_attention: + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + key = attn.head_to_batch_dim(key) + value = attn.head_to_batch_dim(value) + key = torch.cat([encoder_hidden_states_key_proj, key], dim=1) + value = torch.cat([encoder_hidden_states_value_proj, value], dim=1) + else: + key = encoder_hidden_states_key_proj + value = encoder_hidden_states_value_proj + + batch_size_attention, query_tokens, _ = query.shape + hidden_states = torch.zeros( + (batch_size_attention, query_tokens, dim // attn.heads), device=query.device, dtype=query.dtype + ) + + for i in range((batch_size_attention - 1) // self.slice_size + 1): + start_idx = i * self.slice_size + end_idx = (i + 1) * self.slice_size + + query_slice = query[start_idx:end_idx] + key_slice = key[start_idx:end_idx] + attn_mask_slice = attention_mask[start_idx:end_idx] if attention_mask is not None else None + + attn_slice = attn.get_attention_scores(query_slice, key_slice, attn_mask_slice) + + attn_slice = torch.bmm(attn_slice, value[start_idx:end_idx]) + + hidden_states[start_idx:end_idx] = attn_slice + + hidden_states = attn.batch_to_head_dim(hidden_states) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + hidden_states = hidden_states.transpose(-1, -2).reshape(residual.shape) + hidden_states = hidden_states + residual + + return hidden_states + + +class SpatialNorm(nn.Module): + """ + Spatially conditioned normalization as defined in https://huggingface.co/papers/2209.09002. + + Args: + f_channels (`int`): + The number of channels for input to group normalization layer, and output of the spatial norm layer. + zq_channels (`int`): + The number of channels for the quantized vector as described in the paper. + """ + + def __init__( + self, + f_channels: int, + zq_channels: int, + ): + super().__init__() + self.norm_layer = nn.GroupNorm(num_channels=f_channels, num_groups=32, eps=1e-6, affine=True) + self.conv_y = nn.Conv2d(zq_channels, f_channels, kernel_size=1, stride=1, padding=0) + self.conv_b = nn.Conv2d(zq_channels, f_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, f: torch.Tensor, zq: torch.Tensor) -> torch.Tensor: + f_size = f.shape[-2:] + zq = F.interpolate(zq, size=f_size, mode="nearest") + norm_f = self.norm_layer(f) + new_f = norm_f * self.conv_y(zq) + self.conv_b(zq) + return new_f + + +class IPAdapterAttnProcessor(nn.Module): + r""" + Attention processor for Multiple IP-Adapters. + + Args: + hidden_size (`int`): + The hidden size of the attention layer. + cross_attention_dim (`int`): + The number of channels in the `encoder_hidden_states`. + num_tokens (`int`, `tuple[int]` or `list[int]`, defaults to `(4,)`): + The context length of the image features. + scale (`float` or list[`float`], defaults to 1.0): + the weight scale of image prompt. + """ + + def __init__(self, hidden_size, cross_attention_dim=None, num_tokens=(4,), scale=1.0): + super().__init__() + + self.hidden_size = hidden_size + self.cross_attention_dim = cross_attention_dim + + if not isinstance(num_tokens, (tuple, list)): + num_tokens = [num_tokens] + self.num_tokens = num_tokens + + if not isinstance(scale, list): + scale = [scale] * len(num_tokens) + if len(scale) != len(num_tokens): + raise ValueError("`scale` should be a list of integers with the same length as `num_tokens`.") + self.scale = scale + + self.to_k_ip = nn.ModuleList( + [nn.Linear(cross_attention_dim, hidden_size, bias=False) for _ in range(len(num_tokens))] + ) + self.to_v_ip = nn.ModuleList( + [nn.Linear(cross_attention_dim, hidden_size, bias=False) for _ in range(len(num_tokens))] + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + scale: float = 1.0, + ip_adapter_masks: torch.Tensor | None = None, + ): + residual = hidden_states + + # separate ip_hidden_states from encoder_hidden_states + if encoder_hidden_states is not None: + if isinstance(encoder_hidden_states, tuple): + encoder_hidden_states, ip_hidden_states = encoder_hidden_states + else: + deprecation_message = ( + "You have passed a tensor as `encoder_hidden_states`. This is deprecated and will be removed in a future release." + " Please make sure to update your script to pass `encoder_hidden_states` as a tuple to suppress this warning." + ) + deprecate("encoder_hidden_states not a tuple", "1.0.0", deprecation_message, standard_warn=False) + end_pos = encoder_hidden_states.shape[1] - self.num_tokens[0] + encoder_hidden_states, ip_hidden_states = ( + encoder_hidden_states[:, :end_pos, :], + [encoder_hidden_states[:, end_pos:, :]], + ) + + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query = attn.head_to_batch_dim(query) + key = attn.head_to_batch_dim(key) + value = attn.head_to_batch_dim(value) + + attention_probs = attn.get_attention_scores(query, key, attention_mask) + hidden_states = torch.bmm(attention_probs, value) + hidden_states = attn.batch_to_head_dim(hidden_states) + + if ip_adapter_masks is not None: + if not isinstance(ip_adapter_masks, list): + # for backward compatibility, we accept `ip_adapter_mask` as a tensor of shape [num_ip_adapter, 1, height, width] + ip_adapter_masks = list(ip_adapter_masks.unsqueeze(1)) + if not (len(ip_adapter_masks) == len(self.scale) == len(ip_hidden_states)): + raise ValueError( + f"Length of ip_adapter_masks array ({len(ip_adapter_masks)}) must match " + f"length of self.scale array ({len(self.scale)}) and number of ip_hidden_states " + f"({len(ip_hidden_states)})" + ) + else: + for index, (mask, scale, ip_state) in enumerate(zip(ip_adapter_masks, self.scale, ip_hidden_states)): + if mask is None: + continue + if not isinstance(mask, torch.Tensor) or mask.ndim != 4: + raise ValueError( + "Each element of the ip_adapter_masks array should be a tensor with shape " + "[1, num_images_for_ip_adapter, height, width]." + " Please use `IPAdapterMaskProcessor` to preprocess your mask" + ) + if mask.shape[1] != ip_state.shape[1]: + raise ValueError( + f"Number of masks ({mask.shape[1]}) does not match " + f"number of ip images ({ip_state.shape[1]}) at index {index}" + ) + if isinstance(scale, list) and not len(scale) == mask.shape[1]: + raise ValueError( + f"Number of masks ({mask.shape[1]}) does not match " + f"number of scales ({len(scale)}) at index {index}" + ) + else: + ip_adapter_masks = [None] * len(self.scale) + + # for ip-adapter + for current_ip_hidden_states, scale, to_k_ip, to_v_ip, mask in zip( + ip_hidden_states, self.scale, self.to_k_ip, self.to_v_ip, ip_adapter_masks + ): + skip = False + if isinstance(scale, list): + if all(s == 0 for s in scale): + skip = True + elif scale == 0: + skip = True + if not skip: + if mask is not None: + if not isinstance(scale, list): + scale = [scale] * mask.shape[1] + + current_num_images = mask.shape[1] + for i in range(current_num_images): + ip_key = to_k_ip(current_ip_hidden_states[:, i, :, :]) + ip_value = to_v_ip(current_ip_hidden_states[:, i, :, :]) + + ip_key = attn.head_to_batch_dim(ip_key) + ip_value = attn.head_to_batch_dim(ip_value) + + ip_attention_probs = attn.get_attention_scores(query, ip_key, None) + _current_ip_hidden_states = torch.bmm(ip_attention_probs, ip_value) + _current_ip_hidden_states = attn.batch_to_head_dim(_current_ip_hidden_states) + + mask_downsample = IPAdapterMaskProcessor.downsample( + mask[:, i, :, :], + batch_size, + _current_ip_hidden_states.shape[1], + _current_ip_hidden_states.shape[2], + ) + + mask_downsample = mask_downsample.to(dtype=query.dtype, device=query.device) + + hidden_states = hidden_states + scale[i] * (_current_ip_hidden_states * mask_downsample) + else: + ip_key = to_k_ip(current_ip_hidden_states) + ip_value = to_v_ip(current_ip_hidden_states) + + ip_key = attn.head_to_batch_dim(ip_key) + ip_value = attn.head_to_batch_dim(ip_value) + + ip_attention_probs = attn.get_attention_scores(query, ip_key, None) + current_ip_hidden_states = torch.bmm(ip_attention_probs, ip_value) + current_ip_hidden_states = attn.batch_to_head_dim(current_ip_hidden_states) + + hidden_states = hidden_states + scale * current_ip_hidden_states + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class IPAdapterAttnProcessor2_0(torch.nn.Module): + r""" + Attention processor for IP-Adapter for PyTorch 2.0. + + Args: + hidden_size (`int`): + The hidden size of the attention layer. + cross_attention_dim (`int`): + The number of channels in the `encoder_hidden_states`. + num_tokens (`int`, `tuple[int]` or `list[int]`, defaults to `(4,)`): + The context length of the image features. + scale (`float` or `list[float]`, defaults to 1.0): + the weight scale of image prompt. + """ + + def __init__(self, hidden_size, cross_attention_dim=None, num_tokens=(4,), scale=1.0): + super().__init__() + + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + f"{self.__class__.__name__} requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + self.hidden_size = hidden_size + self.cross_attention_dim = cross_attention_dim + + if not isinstance(num_tokens, (tuple, list)): + num_tokens = [num_tokens] + self.num_tokens = num_tokens + + if not isinstance(scale, list): + scale = [scale] * len(num_tokens) + if len(scale) != len(num_tokens): + raise ValueError("`scale` should be a list of integers with the same length as `num_tokens`.") + self.scale = scale + + self.to_k_ip = nn.ModuleList( + [nn.Linear(cross_attention_dim, hidden_size, bias=False) for _ in range(len(num_tokens))] + ) + self.to_v_ip = nn.ModuleList( + [nn.Linear(cross_attention_dim, hidden_size, bias=False) for _ in range(len(num_tokens))] + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + scale: float = 1.0, + ip_adapter_masks: torch.Tensor | None = None, + ): + residual = hidden_states + + # separate ip_hidden_states from encoder_hidden_states + if encoder_hidden_states is not None: + if isinstance(encoder_hidden_states, tuple): + encoder_hidden_states, ip_hidden_states = encoder_hidden_states + else: + deprecation_message = ( + "You have passed a tensor as `encoder_hidden_states`. This is deprecated and will be removed in a future release." + " Please make sure to update your script to pass `encoder_hidden_states` as a tuple to suppress this warning." + ) + deprecate("encoder_hidden_states not a tuple", "1.0.0", deprecation_message, standard_warn=False) + end_pos = encoder_hidden_states.shape[1] - self.num_tokens[0] + encoder_hidden_states, ip_hidden_states = ( + encoder_hidden_states[:, :end_pos, :], + [encoder_hidden_states[:, end_pos:, :]], + ) + + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + if ip_adapter_masks is not None: + if not isinstance(ip_adapter_masks, list): + # for backward compatibility, we accept `ip_adapter_mask` as a tensor of shape [num_ip_adapter, 1, height, width] + ip_adapter_masks = list(ip_adapter_masks.unsqueeze(1)) + if not (len(ip_adapter_masks) == len(self.scale) == len(ip_hidden_states)): + raise ValueError( + f"Length of ip_adapter_masks array ({len(ip_adapter_masks)}) must match " + f"length of self.scale array ({len(self.scale)}) and number of ip_hidden_states " + f"({len(ip_hidden_states)})" + ) + else: + for index, (mask, scale, ip_state) in enumerate(zip(ip_adapter_masks, self.scale, ip_hidden_states)): + if mask is None: + continue + if not isinstance(mask, torch.Tensor) or mask.ndim != 4: + raise ValueError( + "Each element of the ip_adapter_masks array should be a tensor with shape " + "[1, num_images_for_ip_adapter, height, width]." + " Please use `IPAdapterMaskProcessor` to preprocess your mask" + ) + if mask.shape[1] != ip_state.shape[1]: + raise ValueError( + f"Number of masks ({mask.shape[1]}) does not match " + f"number of ip images ({ip_state.shape[1]}) at index {index}" + ) + if isinstance(scale, list) and not len(scale) == mask.shape[1]: + raise ValueError( + f"Number of masks ({mask.shape[1]}) does not match " + f"number of scales ({len(scale)}) at index {index}" + ) + else: + ip_adapter_masks = [None] * len(self.scale) + + # for ip-adapter + for current_ip_hidden_states, scale, to_k_ip, to_v_ip, mask in zip( + ip_hidden_states, self.scale, self.to_k_ip, self.to_v_ip, ip_adapter_masks + ): + skip = False + if isinstance(scale, list): + if all(s == 0 for s in scale): + skip = True + elif scale == 0: + skip = True + if not skip: + if mask is not None: + if not isinstance(scale, list): + scale = [scale] * mask.shape[1] + + current_num_images = mask.shape[1] + for i in range(current_num_images): + ip_key = to_k_ip(current_ip_hidden_states[:, i, :, :]) + ip_value = to_v_ip(current_ip_hidden_states[:, i, :, :]) + + ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + _current_ip_hidden_states = F.scaled_dot_product_attention( + query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False + ) + + _current_ip_hidden_states = _current_ip_hidden_states.transpose(1, 2).reshape( + batch_size, -1, attn.heads * head_dim + ) + _current_ip_hidden_states = _current_ip_hidden_states.to(query.dtype) + + mask_downsample = IPAdapterMaskProcessor.downsample( + mask[:, i, :, :], + batch_size, + _current_ip_hidden_states.shape[1], + _current_ip_hidden_states.shape[2], + ) + + mask_downsample = mask_downsample.to(dtype=query.dtype, device=query.device) + hidden_states = hidden_states + scale[i] * (_current_ip_hidden_states * mask_downsample) + else: + ip_key = to_k_ip(current_ip_hidden_states) + ip_value = to_v_ip(current_ip_hidden_states) + + ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + current_ip_hidden_states = F.scaled_dot_product_attention( + query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False + ) + + current_ip_hidden_states = current_ip_hidden_states.transpose(1, 2).reshape( + batch_size, -1, attn.heads * head_dim + ) + current_ip_hidden_states = current_ip_hidden_states.to(query.dtype) + + hidden_states = hidden_states + scale * current_ip_hidden_states + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class IPAdapterXFormersAttnProcessor(torch.nn.Module): + r""" + Attention processor for IP-Adapter using xFormers. + + Args: + hidden_size (`int`): + The hidden size of the attention layer. + cross_attention_dim (`int`): + The number of channels in the `encoder_hidden_states`. + num_tokens (`int`, `tuple[int]` or `list[int]`, defaults to `(4,)`): + The context length of the image features. + scale (`float` or `list[float]`, defaults to 1.0): + the weight scale of image prompt. + attention_op (`Callable`, *optional*, defaults to `None`): + The base + [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to + use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best + operator. + """ + + def __init__( + self, + hidden_size, + cross_attention_dim=None, + num_tokens=(4,), + scale=1.0, + attention_op: Callable | None = None, + ): + super().__init__() + + self.hidden_size = hidden_size + self.cross_attention_dim = cross_attention_dim + self.attention_op = attention_op + + if not isinstance(num_tokens, (tuple, list)): + num_tokens = [num_tokens] + self.num_tokens = num_tokens + + if not isinstance(scale, list): + scale = [scale] * len(num_tokens) + if len(scale) != len(num_tokens): + raise ValueError("`scale` should be a list of integers with the same length as `num_tokens`.") + self.scale = scale + + self.to_k_ip = nn.ModuleList( + [nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) for _ in range(len(num_tokens))] + ) + self.to_v_ip = nn.ModuleList( + [nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) for _ in range(len(num_tokens))] + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor | None = None, + attention_mask: torch.FloatTensor | None = None, + temb: torch.FloatTensor | None = None, + scale: float = 1.0, + ip_adapter_masks: torch.FloatTensor | None = None, + ): + residual = hidden_states + + # separate ip_hidden_states from encoder_hidden_states + if encoder_hidden_states is not None: + if isinstance(encoder_hidden_states, tuple): + encoder_hidden_states, ip_hidden_states = encoder_hidden_states + else: + deprecation_message = ( + "You have passed a tensor as `encoder_hidden_states`. This is deprecated and will be removed in a future release." + " Please make sure to update your script to pass `encoder_hidden_states` as a tuple to suppress this warning." + ) + deprecate("encoder_hidden_states not a tuple", "1.0.0", deprecation_message, standard_warn=False) + end_pos = encoder_hidden_states.shape[1] - self.num_tokens[0] + encoder_hidden_states, ip_hidden_states = ( + encoder_hidden_states[:, :end_pos, :], + [encoder_hidden_states[:, end_pos:, :]], + ) + + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # expand our mask's singleton query_tokens dimension: + # [batch*heads, 1, key_tokens] -> + # [batch*heads, query_tokens, key_tokens] + # so that it can be added as a bias onto the attention scores that xformers computes: + # [batch*heads, query_tokens, key_tokens] + # we do this explicitly because xformers doesn't broadcast the singleton dimension for us. + _, query_tokens, _ = hidden_states.shape + attention_mask = attention_mask.expand(-1, query_tokens, -1) + + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + elif attn.norm_cross: + encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query = attn.head_to_batch_dim(query).contiguous() + key = attn.head_to_batch_dim(key).contiguous() + value = attn.head_to_batch_dim(value).contiguous() + + hidden_states = xformers.ops.memory_efficient_attention( + query, key, value, attn_bias=attention_mask, op=self.attention_op + ) + hidden_states = hidden_states.to(query.dtype) + hidden_states = attn.batch_to_head_dim(hidden_states) + + if ip_hidden_states: + if ip_adapter_masks is not None: + if not isinstance(ip_adapter_masks, list): + # for backward compatibility, we accept `ip_adapter_mask` as a tensor of shape [num_ip_adapter, 1, height, width] + ip_adapter_masks = list(ip_adapter_masks.unsqueeze(1)) + if not (len(ip_adapter_masks) == len(self.scale) == len(ip_hidden_states)): + raise ValueError( + f"Length of ip_adapter_masks array ({len(ip_adapter_masks)}) must match " + f"length of self.scale array ({len(self.scale)}) and number of ip_hidden_states " + f"({len(ip_hidden_states)})" + ) + else: + for index, (mask, scale, ip_state) in enumerate( + zip(ip_adapter_masks, self.scale, ip_hidden_states) + ): + if mask is None: + continue + if not isinstance(mask, torch.Tensor) or mask.ndim != 4: + raise ValueError( + "Each element of the ip_adapter_masks array should be a tensor with shape " + "[1, num_images_for_ip_adapter, height, width]." + " Please use `IPAdapterMaskProcessor` to preprocess your mask" + ) + if mask.shape[1] != ip_state.shape[1]: + raise ValueError( + f"Number of masks ({mask.shape[1]}) does not match " + f"number of ip images ({ip_state.shape[1]}) at index {index}" + ) + if isinstance(scale, list) and not len(scale) == mask.shape[1]: + raise ValueError( + f"Number of masks ({mask.shape[1]}) does not match " + f"number of scales ({len(scale)}) at index {index}" + ) + else: + ip_adapter_masks = [None] * len(self.scale) + + # for ip-adapter + for current_ip_hidden_states, scale, to_k_ip, to_v_ip, mask in zip( + ip_hidden_states, self.scale, self.to_k_ip, self.to_v_ip, ip_adapter_masks + ): + skip = False + if isinstance(scale, list): + if all(s == 0 for s in scale): + skip = True + elif scale == 0: + skip = True + if not skip: + if mask is not None: + mask = mask.to(torch.float16) + if not isinstance(scale, list): + scale = [scale] * mask.shape[1] + + current_num_images = mask.shape[1] + for i in range(current_num_images): + ip_key = to_k_ip(current_ip_hidden_states[:, i, :, :]) + ip_value = to_v_ip(current_ip_hidden_states[:, i, :, :]) + + ip_key = attn.head_to_batch_dim(ip_key).contiguous() + ip_value = attn.head_to_batch_dim(ip_value).contiguous() + + _current_ip_hidden_states = xformers.ops.memory_efficient_attention( + query, ip_key, ip_value, op=self.attention_op + ) + _current_ip_hidden_states = _current_ip_hidden_states.to(query.dtype) + _current_ip_hidden_states = attn.batch_to_head_dim(_current_ip_hidden_states) + + mask_downsample = IPAdapterMaskProcessor.downsample( + mask[:, i, :, :], + batch_size, + _current_ip_hidden_states.shape[1], + _current_ip_hidden_states.shape[2], + ) + + mask_downsample = mask_downsample.to(dtype=query.dtype, device=query.device) + hidden_states = hidden_states + scale[i] * (_current_ip_hidden_states * mask_downsample) + else: + ip_key = to_k_ip(current_ip_hidden_states) + ip_value = to_v_ip(current_ip_hidden_states) + + ip_key = attn.head_to_batch_dim(ip_key).contiguous() + ip_value = attn.head_to_batch_dim(ip_value).contiguous() + + current_ip_hidden_states = xformers.ops.memory_efficient_attention( + query, ip_key, ip_value, op=self.attention_op + ) + current_ip_hidden_states = current_ip_hidden_states.to(query.dtype) + current_ip_hidden_states = attn.batch_to_head_dim(current_ip_hidden_states) + + hidden_states = hidden_states + scale * current_ip_hidden_states + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if input_ndim == 4: + hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class SD3IPAdapterJointAttnProcessor2_0(torch.nn.Module): + """ + Attention processor for IP-Adapter used typically in processing the SD3-like self-attention projections, with + additional image-based information and timestep embeddings. + + Args: + hidden_size (`int`): + The number of hidden channels. + ip_hidden_states_dim (`int`): + The image feature dimension. + head_dim (`int`): + The number of head channels. + timesteps_emb_dim (`int`, defaults to 1280): + The number of input channels for timestep embedding. + scale (`float`, defaults to 0.5): + IP-Adapter scale. + """ + + def __init__( + self, + hidden_size: int, + ip_hidden_states_dim: int, + head_dim: int, + timesteps_emb_dim: int = 1280, + scale: float = 0.5, + ): + super().__init__() + + # To prevent circular import + from .normalization import AdaLayerNorm, RMSNorm + + self.norm_ip = AdaLayerNorm(timesteps_emb_dim, output_dim=ip_hidden_states_dim * 2, norm_eps=1e-6, chunk_dim=1) + self.to_k_ip = nn.Linear(ip_hidden_states_dim, hidden_size, bias=False) + self.to_v_ip = nn.Linear(ip_hidden_states_dim, hidden_size, bias=False) + self.norm_q = RMSNorm(head_dim, 1e-6) + self.norm_k = RMSNorm(head_dim, 1e-6) + self.norm_ip_k = RMSNorm(head_dim, 1e-6) + self.scale = scale + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + attention_mask: torch.FloatTensor | None = None, + ip_hidden_states: torch.FloatTensor = None, + temb: torch.FloatTensor = None, + ) -> torch.FloatTensor: + """ + Perform the attention computation, integrating image features (if provided) and timestep embeddings. + + If `ip_hidden_states` is `None`, this is equivalent to using JointAttnProcessor2_0. + + Args: + attn (`Attention`): + Attention instance. + hidden_states (`torch.FloatTensor`): + Input `hidden_states`. + encoder_hidden_states (`torch.FloatTensor`, *optional*): + The encoder hidden states. + attention_mask (`torch.FloatTensor`, *optional*): + Attention mask. + ip_hidden_states (`torch.FloatTensor`, *optional*): + Image embeddings. + temb (`torch.FloatTensor`, *optional*): + Timestep embeddings. + + Returns: + `torch.FloatTensor`: Output hidden states. + """ + residual = hidden_states + + batch_size = hidden_states.shape[0] + + # `sample` projections. + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + img_query = query + img_key = key + img_value = value + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # `context` projections. + if encoder_hidden_states is not None: + encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states) + encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states) + encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states) + + encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view( + batch_size, -1, attn.heads, head_dim + ).transpose(1, 2) + encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view( + batch_size, -1, attn.heads, head_dim + ).transpose(1, 2) + encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view( + batch_size, -1, attn.heads, head_dim + ).transpose(1, 2) + + if attn.norm_added_q is not None: + encoder_hidden_states_query_proj = attn.norm_added_q(encoder_hidden_states_query_proj) + if attn.norm_added_k is not None: + encoder_hidden_states_key_proj = attn.norm_added_k(encoder_hidden_states_key_proj) + + query = torch.cat([query, encoder_hidden_states_query_proj], dim=2) + key = torch.cat([key, encoder_hidden_states_key_proj], dim=2) + value = torch.cat([value, encoder_hidden_states_value_proj], dim=2) + + hidden_states = F.scaled_dot_product_attention(query, key, value, dropout_p=0.0, is_causal=False) + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + # Split the attention outputs. + hidden_states, encoder_hidden_states = ( + hidden_states[:, : residual.shape[1]], + hidden_states[:, residual.shape[1] :], + ) + if not attn.context_pre_only: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + # IP Adapter + if self.scale != 0 and ip_hidden_states is not None: + # Norm image features + norm_ip_hidden_states = self.norm_ip(ip_hidden_states, temb=temb) + + # To k and v + ip_key = self.to_k_ip(norm_ip_hidden_states) + ip_value = self.to_v_ip(norm_ip_hidden_states) + + # Reshape + ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # Norm + query = self.norm_q(img_query) + img_key = self.norm_k(img_key) + ip_key = self.norm_ip_k(ip_key) + + # cat img + key = torch.cat([img_key, ip_key], dim=2) + value = torch.cat([img_value, ip_value], dim=2) + + ip_hidden_states = F.scaled_dot_product_attention(query, key, value, dropout_p=0.0, is_causal=False) + ip_hidden_states = ip_hidden_states.transpose(1, 2).view(batch_size, -1, attn.heads * head_dim) + ip_hidden_states = ip_hidden_states.to(query.dtype) + + hidden_states = hidden_states + ip_hidden_states * self.scale + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + if encoder_hidden_states is not None: + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class PAGIdentitySelfAttnProcessor2_0: + r""" + Processor for implementing PAG using scaled dot-product attention (enabled by default if you're using PyTorch 2.0). + PAG reference: https://huggingface.co/papers/2403.17377 + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "PAGIdentitySelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor | None = None, + attention_mask: torch.FloatTensor | None = None, + temb: torch.FloatTensor | None = None, + ) -> torch.Tensor: + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + # chunk + hidden_states_org, hidden_states_ptb = hidden_states.chunk(2) + + # original path + batch_size, sequence_length, _ = hidden_states_org.shape + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states_org = attn.group_norm(hidden_states_org.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states_org) + key = attn.to_k(hidden_states_org) + value = attn.to_v(hidden_states_org) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states_org = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + hidden_states_org = hidden_states_org.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states_org = hidden_states_org.to(query.dtype) + + # linear proj + hidden_states_org = attn.to_out[0](hidden_states_org) + # dropout + hidden_states_org = attn.to_out[1](hidden_states_org) + + if input_ndim == 4: + hidden_states_org = hidden_states_org.transpose(-1, -2).reshape(batch_size, channel, height, width) + + # perturbed path (identity attention) + batch_size, sequence_length, _ = hidden_states_ptb.shape + + if attn.group_norm is not None: + hidden_states_ptb = attn.group_norm(hidden_states_ptb.transpose(1, 2)).transpose(1, 2) + + hidden_states_ptb = attn.to_v(hidden_states_ptb) + hidden_states_ptb = hidden_states_ptb.to(query.dtype) + + # linear proj + hidden_states_ptb = attn.to_out[0](hidden_states_ptb) + # dropout + hidden_states_ptb = attn.to_out[1](hidden_states_ptb) + + if input_ndim == 4: + hidden_states_ptb = hidden_states_ptb.transpose(-1, -2).reshape(batch_size, channel, height, width) + + # cat + hidden_states = torch.cat([hidden_states_org, hidden_states_ptb]) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class PAGCFGIdentitySelfAttnProcessor2_0: + r""" + Processor for implementing PAG using scaled dot-product attention (enabled by default if you're using PyTorch 2.0). + PAG reference: https://huggingface.co/papers/2403.17377 + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "PAGCFGIdentitySelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor | None = None, + attention_mask: torch.FloatTensor | None = None, + temb: torch.FloatTensor | None = None, + ) -> torch.Tensor: + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + + input_ndim = hidden_states.ndim + if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape + hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) + + # chunk + hidden_states_uncond, hidden_states_org, hidden_states_ptb = hidden_states.chunk(3) + hidden_states_org = torch.cat([hidden_states_uncond, hidden_states_org]) + + # original path + batch_size, sequence_length, _ = hidden_states_org.shape + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if attn.group_norm is not None: + hidden_states_org = attn.group_norm(hidden_states_org.transpose(1, 2)).transpose(1, 2) + + query = attn.to_q(hidden_states_org) + key = attn.to_k(hidden_states_org) + value = attn.to_v(hidden_states_org) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states_org = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states_org = hidden_states_org.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states_org = hidden_states_org.to(query.dtype) + + # linear proj + hidden_states_org = attn.to_out[0](hidden_states_org) + # dropout + hidden_states_org = attn.to_out[1](hidden_states_org) + + if input_ndim == 4: + hidden_states_org = hidden_states_org.transpose(-1, -2).reshape(batch_size, channel, height, width) + + # perturbed path (identity attention) + batch_size, sequence_length, _ = hidden_states_ptb.shape + + if attn.group_norm is not None: + hidden_states_ptb = attn.group_norm(hidden_states_ptb.transpose(1, 2)).transpose(1, 2) + + value = attn.to_v(hidden_states_ptb) + hidden_states_ptb = value + hidden_states_ptb = hidden_states_ptb.to(query.dtype) + + # linear proj + hidden_states_ptb = attn.to_out[0](hidden_states_ptb) + # dropout + hidden_states_ptb = attn.to_out[1](hidden_states_ptb) + + if input_ndim == 4: + hidden_states_ptb = hidden_states_ptb.transpose(-1, -2).reshape(batch_size, channel, height, width) + + # cat + hidden_states = torch.cat([hidden_states_org, hidden_states_ptb]) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class SanaMultiscaleAttnProcessor2_0: + r""" + Processor for implementing multiscale quadratic attention. + """ + + def __call__(self, attn: SanaMultiscaleLinearAttention, hidden_states: torch.Tensor) -> torch.Tensor: + height, width = hidden_states.shape[-2:] + if height * width > attn.attention_head_dim: + use_linear_attention = True + else: + use_linear_attention = False + + residual = hidden_states + + batch_size, _, height, width = list(hidden_states.size()) + original_dtype = hidden_states.dtype + + hidden_states = hidden_states.movedim(1, -1) + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + hidden_states = torch.cat([query, key, value], dim=3) + hidden_states = hidden_states.movedim(-1, 1) + + multi_scale_qkv = [hidden_states] + for block in attn.to_qkv_multiscale: + multi_scale_qkv.append(block(hidden_states)) + + hidden_states = torch.cat(multi_scale_qkv, dim=1) + + if use_linear_attention: + # for linear attention upcast hidden_states to float32 + hidden_states = hidden_states.to(dtype=torch.float32) + + hidden_states = hidden_states.reshape(batch_size, -1, 3 * attn.attention_head_dim, height * width) + + query, key, value = hidden_states.chunk(3, dim=2) + query = attn.nonlinearity(query) + key = attn.nonlinearity(key) + + if use_linear_attention: + hidden_states = attn.apply_linear_attention(query, key, value) + hidden_states = hidden_states.to(dtype=original_dtype) + else: + hidden_states = attn.apply_quadratic_attention(query, key, value) + + hidden_states = torch.reshape(hidden_states, (batch_size, -1, height, width)) + hidden_states = attn.to_out(hidden_states.movedim(1, -1)).movedim(-1, 1) + + if attn.norm_type == "rms_norm": + hidden_states = attn.norm_out(hidden_states.movedim(1, -1)).movedim(-1, 1) + else: + hidden_states = attn.norm_out(hidden_states) + + if attn.residual_connection: + hidden_states = hidden_states + residual + + return hidden_states + + +class LoRAAttnProcessor: + r""" + Processor for implementing attention with LoRA. + """ + + def __init__(self): + pass + + +class LoRAAttnProcessor2_0: + r""" + Processor for implementing attention with LoRA (enabled by default if you're using PyTorch 2.0). + """ + + def __init__(self): + pass + + +class LoRAXFormersAttnProcessor: + r""" + Processor for implementing attention with LoRA using xFormers. + """ + + def __init__(self): + pass + + +class LoRAAttnAddedKVProcessor: + r""" + Processor for implementing attention with LoRA with extra learnable key and value matrices for the text encoder. + """ + + def __init__(self): + pass + + +class SanaLinearAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product linear attention. + """ + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + original_dtype = hidden_states.dtype + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + query = query.transpose(1, 2).unflatten(1, (attn.heads, -1)) + key = key.transpose(1, 2).unflatten(1, (attn.heads, -1)).transpose(2, 3) + value = value.transpose(1, 2).unflatten(1, (attn.heads, -1)) + + query = F.relu(query) + key = F.relu(key) + + query, key, value = query.float(), key.float(), value.float() + + value = F.pad(value, (0, 0, 0, 1), mode="constant", value=1.0) + scores = torch.matmul(value, key) + hidden_states = torch.matmul(scores, query) + + hidden_states = hidden_states[:, :, :-1] / (hidden_states[:, :, -1:] + 1e-15) + hidden_states = hidden_states.flatten(1, 2).transpose(1, 2) + hidden_states = hidden_states.to(original_dtype) + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + if original_dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + return hidden_states + + +class PAGCFGSanaLinearAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product linear attention. + """ + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + original_dtype = hidden_states.dtype + + hidden_states_uncond, hidden_states_org, hidden_states_ptb = hidden_states.chunk(3) + hidden_states_org = torch.cat([hidden_states_uncond, hidden_states_org]) + + query = attn.to_q(hidden_states_org) + key = attn.to_k(hidden_states_org) + value = attn.to_v(hidden_states_org) + + query = query.transpose(1, 2).unflatten(1, (attn.heads, -1)) + key = key.transpose(1, 2).unflatten(1, (attn.heads, -1)).transpose(2, 3) + value = value.transpose(1, 2).unflatten(1, (attn.heads, -1)) + + query = F.relu(query) + key = F.relu(key) + + query, key, value = query.float(), key.float(), value.float() + + value = F.pad(value, (0, 0, 0, 1), mode="constant", value=1.0) + scores = torch.matmul(value, key) + hidden_states_org = torch.matmul(scores, query) + + hidden_states_org = hidden_states_org[:, :, :-1] / (hidden_states_org[:, :, -1:] + 1e-15) + hidden_states_org = hidden_states_org.flatten(1, 2).transpose(1, 2) + hidden_states_org = hidden_states_org.to(original_dtype) + + hidden_states_org = attn.to_out[0](hidden_states_org) + hidden_states_org = attn.to_out[1](hidden_states_org) + + # perturbed path (identity attention) + hidden_states_ptb = attn.to_v(hidden_states_ptb).to(original_dtype) + + hidden_states_ptb = attn.to_out[0](hidden_states_ptb) + hidden_states_ptb = attn.to_out[1](hidden_states_ptb) + + hidden_states = torch.cat([hidden_states_org, hidden_states_ptb]) + + if original_dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + return hidden_states + + +class PAGIdentitySanaLinearAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product linear attention. + """ + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + original_dtype = hidden_states.dtype + + hidden_states_org, hidden_states_ptb = hidden_states.chunk(2) + + query = attn.to_q(hidden_states_org) + key = attn.to_k(hidden_states_org) + value = attn.to_v(hidden_states_org) + + query = query.transpose(1, 2).unflatten(1, (attn.heads, -1)) + key = key.transpose(1, 2).unflatten(1, (attn.heads, -1)).transpose(2, 3) + value = value.transpose(1, 2).unflatten(1, (attn.heads, -1)) + + query = F.relu(query) + key = F.relu(key) + + query, key, value = query.float(), key.float(), value.float() + + value = F.pad(value, (0, 0, 0, 1), mode="constant", value=1.0) + scores = torch.matmul(value, key) + hidden_states_org = torch.matmul(scores, query) + + if hidden_states_org.dtype in [torch.float16, torch.bfloat16]: + hidden_states_org = hidden_states_org.float() + + hidden_states_org = hidden_states_org[:, :, :-1] / (hidden_states_org[:, :, -1:] + 1e-15) + hidden_states_org = hidden_states_org.flatten(1, 2).transpose(1, 2) + hidden_states_org = hidden_states_org.to(original_dtype) + + hidden_states_org = attn.to_out[0](hidden_states_org) + hidden_states_org = attn.to_out[1](hidden_states_org) + + # perturbed path (identity attention) + hidden_states_ptb = attn.to_v(hidden_states_ptb).to(original_dtype) + + hidden_states_ptb = attn.to_out[0](hidden_states_ptb) + hidden_states_ptb = attn.to_out[1](hidden_states_ptb) + + hidden_states = torch.cat([hidden_states_org, hidden_states_ptb]) + + if original_dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + return hidden_states + + +class FluxAttnProcessor2_0: + def __new__(cls, *args, **kwargs): + deprecation_message = "`FluxAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxAttnProcessor`" + deprecate("FluxAttnProcessor2_0", "1.0.0", deprecation_message) + + from .transformers.transformer_flux import FluxAttnProcessor + + return FluxAttnProcessor(*args, **kwargs) + + +class FluxSingleAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). + """ + + def __new__(cls, *args, **kwargs): + deprecation_message = "`FluxSingleAttnProcessor` is deprecated and will be removed in a future version. Please use `FluxAttnProcessorSDPA` instead." + deprecate("FluxSingleAttnProcessor2_0", "1.0.0", deprecation_message) + + from .transformers.transformer_flux import FluxAttnProcessor + + return FluxAttnProcessor(*args, **kwargs) + + +class FusedFluxAttnProcessor2_0: + def __new__(cls, *args, **kwargs): + deprecation_message = "`FusedFluxAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxAttnProcessor`" + deprecate("FusedFluxAttnProcessor2_0", "1.0.0", deprecation_message) + + from .transformers.transformer_flux import FluxAttnProcessor + + return FluxAttnProcessor(*args, **kwargs) + + +class FluxIPAdapterJointAttnProcessor2_0: + def __new__(cls, *args, **kwargs): + deprecation_message = "`FluxIPAdapterJointAttnProcessor2_0` is deprecated and this will be removed in a future version. Please use `FluxIPAdapterAttnProcessor`" + deprecate("FluxIPAdapterJointAttnProcessor2_0", "1.0.0", deprecation_message) + + from .transformers.transformer_flux import FluxIPAdapterAttnProcessor + + return FluxIPAdapterAttnProcessor(*args, **kwargs) + + +class FluxAttnProcessor2_0_NPU: + def __new__(cls, *args, **kwargs): + deprecation_message = ( + "FluxAttnProcessor2_0_NPU is deprecated and will be removed in a future version. An " + "alternative solution to use NPU Flash Attention will be provided in the future." + ) + deprecate("FluxAttnProcessor2_0_NPU", "1.0.0", deprecation_message, standard_warn=False) + + from .transformers.transformer_flux import FluxAttnProcessor + + processor = FluxAttnProcessor() + processor._attention_backend = "_native_npu" + return processor + + +class FusedFluxAttnProcessor2_0_NPU: + def __new__(self): + deprecation_message = ( + "FusedFluxAttnProcessor2_0_NPU is deprecated and will be removed in a future version. An " + "alternative solution to use NPU Flash Attention will be provided in the future." + ) + deprecate("FusedFluxAttnProcessor2_0_NPU", "1.0.0", deprecation_message, standard_warn=False) + + from .transformers.transformer_flux import FluxAttnProcessor + + processor = FluxAttnProcessor() + processor._attention_backend = "_fused_npu" + return processor + + +class XLAFluxFlashAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention with pallas flash attention kernel if using `torch_xla`. + """ + + def __new__(cls, *args, **kwargs): + deprecation_message = ( + "XLAFluxFlashAttnProcessor2_0 is deprecated and will be removed in diffusers 1.0.0. An " + "alternative solution to using XLA Flash Attention will be provided in the future." + ) + deprecate("XLAFluxFlashAttnProcessor2_0", "1.0.0", deprecation_message, standard_warn=False) + + if is_torch_xla_version("<", "2.3"): + raise ImportError("XLA flash attention requires torch_xla version >= 2.3.") + if is_spmd() and is_torch_xla_version("<", "2.4"): + raise ImportError("SPMD support for XLA flash attention needs torch_xla version >= 2.4.") + + from .transformers.transformer_flux import FluxAttnProcessor + + if len(args) > 0 or kwargs.get("partition_spec", None) is not None: + deprecation_message = ( + "partition_spec was not used in the processor implementation when it was added. Passing it " + "is a no-op and support for it will be removed." + ) + deprecate("partition_spec", "1.0.0", deprecation_message) + + processor = FluxAttnProcessor(*args, **kwargs) + processor._attention_backend = "_native_xla" + return processor + + +ADDED_KV_ATTENTION_PROCESSORS = ( + AttnAddedKVProcessor, + SlicedAttnAddedKVProcessor, + AttnAddedKVProcessor2_0, + XFormersAttnAddedKVProcessor, +) + +CROSS_ATTENTION_PROCESSORS = ( + AttnProcessor, + AttnProcessor2_0, + XFormersAttnProcessor, + SlicedAttnProcessor, + IPAdapterAttnProcessor, + IPAdapterAttnProcessor2_0, + FluxIPAdapterJointAttnProcessor2_0, +) + +AttentionProcessor = ( + AttnProcessor + | CustomDiffusionAttnProcessor + | AttnAddedKVProcessor + | AttnAddedKVProcessor2_0 + | JointAttnProcessor2_0 + | PAGJointAttnProcessor2_0 + | PAGCFGJointAttnProcessor2_0 + | FusedJointAttnProcessor2_0 + | AllegroAttnProcessor2_0 + | AuraFlowAttnProcessor2_0 + | FusedAuraFlowAttnProcessor2_0 + | FluxAttnProcessor2_0 + | FluxAttnProcessor2_0_NPU + | FusedFluxAttnProcessor2_0 + | FusedFluxAttnProcessor2_0_NPU + | CogVideoXAttnProcessor2_0 + | FusedCogVideoXAttnProcessor2_0 + | XFormersAttnAddedKVProcessor + | XFormersAttnProcessor + | XLAFlashAttnProcessor2_0 + | AttnProcessorNPU + | AttnProcessor2_0 + | MochiVaeAttnProcessor2_0 + | MochiAttnProcessor2_0 + | StableAudioAttnProcessor2_0 + | HunyuanAttnProcessor2_0 + | FusedHunyuanAttnProcessor2_0 + | PAGHunyuanAttnProcessor2_0 + | PAGCFGHunyuanAttnProcessor2_0 + | LuminaAttnProcessor2_0 + | FusedAttnProcessor2_0 + | CustomDiffusionXFormersAttnProcessor + | CustomDiffusionAttnProcessor2_0 + | SlicedAttnProcessor + | SlicedAttnAddedKVProcessor + | SanaLinearAttnProcessor2_0 + | PAGCFGSanaLinearAttnProcessor2_0 + | PAGIdentitySanaLinearAttnProcessor2_0 + | SanaMultiscaleLinearAttention + | SanaMultiscaleAttnProcessor2_0 + | SanaMultiscaleAttentionProjection + | IPAdapterAttnProcessor + | IPAdapterAttnProcessor2_0 + | IPAdapterXFormersAttnProcessor + | SD3IPAdapterJointAttnProcessor2_0 + | PAGIdentitySelfAttnProcessor2_0 + | PAGCFGIdentitySelfAttnProcessor2_0 + | LoRAAttnProcessor + | LoRAAttnProcessor2_0 + | LoRAXFormersAttnProcessor + | LoRAAttnAddedKVProcessor +) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/auto_model.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/auto_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1c001e23fe00a2712c67cfb74dab6a5b1c1fc32e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/auto_model.py @@ -0,0 +1,345 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from huggingface_hub.utils import validate_hf_hub_args + +from ..configuration_utils import ConfigMixin +from ..utils import DIFFUSERS_LOAD_ID_FIELDS, logging +from ..utils.dynamic_modules_utils import get_class_from_dynamic_module, resolve_trust_remote_code + + +logger = logging.get_logger(__name__) + + +class AutoModel(ConfigMixin): + config_name = "config.json" + + def __init__(self, *args, **kwargs): + raise EnvironmentError( + f"{self.__class__.__name__} is designed to be instantiated " + f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)`, " + f"`{self.__class__.__name__}.from_config(config)`, or " + f"`{self.__class__.__name__}.from_pipe(pipeline)` methods." + ) + + @classmethod + def from_config(cls, pretrained_model_name_or_path_or_dict: str | os.PathLike | dict | None = None, **kwargs): + r""" + Instantiate a model from a config dictionary or a pretrained model configuration file with random weights (no + pretrained weights are loaded). + + Parameters: + pretrained_model_name_or_path_or_dict (`str`, `os.PathLike`, or `dict`): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model + configuration hosted on the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing a model configuration + file. + - A config dictionary. + + cache_dir (`Union[str, os.PathLike]`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model configuration, overriding the cached version if + it exists. + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether to only load local model configuration files or not. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. + trust_remote_code (`bool`, *optional*, defaults to `False`): + Whether to trust remote code. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + + Returns: + A model object instantiated from the config with random weights. + + Example: + + ```py + from diffusers import AutoModel + + model = AutoModel.from_config("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet") + ``` + """ + subfolder = kwargs.pop("subfolder", None) + trust_remote_code = kwargs.pop("trust_remote_code", False) + + hub_kwargs_names = [ + "cache_dir", + "force_download", + "local_files_only", + "proxies", + "revision", + "token", + ] + hub_kwargs = {name: kwargs.pop(name, None) for name in hub_kwargs_names} + + if pretrained_model_name_or_path_or_dict is None: + raise ValueError( + "Please provide a `pretrained_model_name_or_path_or_dict` as the first positional argument." + ) + + if isinstance(pretrained_model_name_or_path_or_dict, (str, os.PathLike)): + pretrained_model_name_or_path = pretrained_model_name_or_path_or_dict + config = cls.load_config(pretrained_model_name_or_path, subfolder=subfolder, **hub_kwargs) + else: + config = pretrained_model_name_or_path_or_dict + pretrained_model_name_or_path = config.get("_name_or_path", None) + + has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"] + trust_remote_code = resolve_trust_remote_code( + trust_remote_code, pretrained_model_name_or_path, has_remote_code + ) + + if has_remote_code and trust_remote_code: + class_ref = config["auto_map"][cls.__name__] + module_file, class_name = class_ref.split(".") + module_file = module_file + ".py" + model_cls = get_class_from_dynamic_module( + pretrained_model_name_or_path, + subfolder=subfolder, + module_file=module_file, + class_name=class_name, + **hub_kwargs, + ) + else: + if "_class_name" in config: + class_name = config["_class_name"] + library = "diffusers" + elif "model_type" in config: + class_name = "AutoModel" + library = "transformers" + else: + raise ValueError( + f"Couldn't find a model class associated with the config: {config}. Make sure the config " + "contains a `_class_name` or `model_type` key." + ) + + from ..pipelines.pipeline_loading_utils import ALL_IMPORTABLE_CLASSES, get_class_obj_and_candidates + + model_cls, _ = get_class_obj_and_candidates( + library_name=library, + class_name=class_name, + importable_classes=ALL_IMPORTABLE_CLASSES, + pipelines=None, + is_pipeline_module=False, + ) + + if model_cls is None: + raise ValueError(f"AutoModel can't find a model linked to {class_name}.") + + return model_cls.from_config(config, **kwargs) + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_or_path: str | os.PathLike | None = None, **kwargs): + r""" + Instantiate a pretrained PyTorch model from a pretrained model configuration. + + The model is set in evaluation mode - `model.eval()` - by default, and dropout modules are deactivated. To + train the model, set it back in training mode with `model.train()`. + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`~ModelMixin.save_pretrained`]. + + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + torch_dtype (`torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model with another dtype. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info (`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + from_flax (`bool`, *optional*, defaults to `False`): + Load the model weights from a Flax checkpoint save file. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you're downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + device_map (`str` or `dict[str, int | str | torch.device]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be defined for each + parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the + same device. Defaults to `None`, meaning that the model will be loaded on CPU. + + Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + max_memory (`Dict`, *optional*): + A dictionary device identifier for the maximum memory. Will default to the maximum memory available for + each GPU and the available CPU RAM if unset. + offload_folder (`str` or `os.PathLike`, *optional*): + The path to offload weights if `device_map` contains the value `"disk"`. + offload_state_dict (`bool`, *optional*): + If `True`, temporarily offloads the CPU state dict to the hard drive to avoid running out of CPU RAM if + the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to `True` + when there is some disk offload. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + variant (`str`, *optional*): + Load weights from a specified `variant` filename such as `"fp16"` or `"ema"`. This is ignored when + loading `from_flax`. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the `safetensors` weights are downloaded if they're available **and** if the + `safetensors` library is installed. If set to `True`, the model is forcibly loaded from `safetensors` + weights. If set to `False`, `safetensors` weights are not loaded. + disable_mmap ('bool', *optional*, defaults to 'False'): + Whether to disable mmap when loading a Safetensors model. This option can perform better when the model + is on a network mount or hard drive, which may not handle the seeky-ness of mmap very well. + trust_remote_cocde (`bool`, *optional*, defaults to `False`): + Whether to trust remote code + + > [!TIP] > To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in + with `hf > auth login`. You can also activate the special > + ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use this method in a > + firewalled environment. + + Example: + + ```py + from diffusers import AutoModel + + unet = AutoModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet") + ``` + + If you get the error message below, you need to finetune the weights for your downstream task: + + ```bash + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + ``` + """ + subfolder = kwargs.pop("subfolder", None) + trust_remote_code = kwargs.pop("trust_remote_code", False) + + hub_kwargs_names = [ + "cache_dir", + "force_download", + "local_files_only", + "proxies", + "revision", + "token", + ] + hub_kwargs = {name: kwargs.pop(name, None) for name in hub_kwargs_names} + + # load_config_kwargs uses the same hub kwargs minus subfolder and resume_download + load_config_kwargs = {k: v for k, v in hub_kwargs.items() if k not in ["subfolder"]} + + library = None + orig_class_name = None + + # Always attempt to fetch model_index.json first + try: + cls.config_name = "model_index.json" + config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) + + if subfolder is not None and subfolder in config: + library, orig_class_name = config[subfolder] + load_config_kwargs.update({"subfolder": subfolder}) + + except EnvironmentError as e: + logger.debug(e) + + # Unable to load from model_index.json so fallback to loading from config + if library is None and orig_class_name is None: + cls.config_name = "config.json" + config = cls.load_config(pretrained_model_or_path, subfolder=subfolder, **load_config_kwargs) + + if "_class_name" in config: + # If we find a class name in the config, we can try to load the model as a diffusers model + orig_class_name = config["_class_name"] + library = "diffusers" + load_config_kwargs.update({"subfolder": subfolder}) + elif "model_type" in config: + orig_class_name = "AutoModel" + library = "transformers" + load_config_kwargs.update({"subfolder": "" if subfolder is None else subfolder}) + else: + raise ValueError(f"Couldn't find model associated with the config file at {pretrained_model_or_path}.") + + has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"] + trust_remote_code = resolve_trust_remote_code(trust_remote_code, pretrained_model_or_path, has_remote_code) + if not has_remote_code and trust_remote_code: + raise ValueError( + "Selected model repository does not happear to have any custom code or does not have a valid `config.json` file." + ) + + if has_remote_code and trust_remote_code: + class_ref = config["auto_map"][cls.__name__] + module_file, class_name = class_ref.split(".") + module_file = module_file + ".py" + model_cls = get_class_from_dynamic_module( + pretrained_model_or_path, + subfolder=subfolder, + module_file=module_file, + class_name=class_name, + **hub_kwargs, + ) + else: + from ..pipelines.pipeline_loading_utils import ALL_IMPORTABLE_CLASSES, get_class_obj_and_candidates + + model_cls, _ = get_class_obj_and_candidates( + library_name=library, + class_name=orig_class_name, + importable_classes=ALL_IMPORTABLE_CLASSES, + pipelines=None, + is_pipeline_module=False, + ) + + if model_cls is None: + raise ValueError(f"AutoModel can't find a model linked to {orig_class_name}.") + + kwargs = {**load_config_kwargs, **kwargs} + model = model_cls.from_pretrained(pretrained_model_or_path, **kwargs) + + load_id_kwargs = {"pretrained_model_name_or_path": pretrained_model_or_path, **kwargs} + parts = [load_id_kwargs.get(field, "null") for field in DIFFUSERS_LOAD_ID_FIELDS] + load_id = "|".join("null" if p is None else p for p in parts) + model._diffusers_load_id = load_id + + return model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1da68775e682ef72a6b2a44d13fb39159059c324 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_asym_kl.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_asym_kl.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba02faa6b5fc03872655c98f1d55ece7edb37462 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_asym_kl.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_dc.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_dc.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2c124a092eacfbc28145ebd932cab48b5378949 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_dc.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..901778125fad9da880f65670d482587916d6f0f0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_allegro.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_allegro.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4725e2db9a0317635e388c86e0a8c59070a7f4a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_allegro.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cogvideox.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cogvideox.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4ed0f76e0812194e85a2632e2b5729ea4d85aa4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cogvideox.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cosmos.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cosmos.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d16097881ccf28148eea030614a75fa9c758bdd Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_cosmos.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_flux2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_flux2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd29d7ff456a6738eded233b0049457260bd70fd Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_flux2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuan_video.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuan_video.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a762986485df55c4dd8d9f47aa80392875ed144d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuan_video.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanimage.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanimage.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed5bf273f4fccecf5d6314538e77ff31eec65ddb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanimage.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanimage_refiner.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanimage_refiner.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..803f10acd37d2ddef55836575f91923b1dd7aa0c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanimage_refiner.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanvideo15.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanvideo15.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d37274ecc8bc31999aa51fb0b7eefe10047bc1ef Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanvideo15.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30aea101a08be2a714becdc7302d97cdc396a720 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4dbe6d4a2a325f824570850ffe98b44f41ff6443 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx2_audio.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx2_audio.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..489048af54c8b644ca725ff56cecef81819cf1e2 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx2_audio.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_magvit.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_magvit.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..982a8539b8f24414affd88b2ac3eb3bb97b571c0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_magvit.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_mochi.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_mochi.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df39dc9cbb11e965905d7b2c729ac3946507d9eb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_mochi.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_qwenimage.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_qwenimage.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6171b87ab31200ed17df1514062bef2cb1021f2c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_qwenimage.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_temporal_decoder.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_temporal_decoder.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfaaa94c5333f5c1bb151c042d3989c57f1cc88a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_temporal_decoder.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_wan.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_wan.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..535be143aa3911552e2a524ae6e710cf2d761c41 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_kl_wan.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_oobleck.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_oobleck.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d86c1c3489fb86f1623f34e2fbff5e867ce7f4aa Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_oobleck.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_rae.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_rae.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2efe708de8c172292828016d513e7b17b54104f8 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_rae.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_tiny.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_tiny.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5346009fb3e754521f40ef7697ff4d1894d06acc Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/autoencoder_tiny.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/consistency_decoder_vae.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/consistency_decoder_vae.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bbe61a7823ea75b1ceab52e84d17d7e37b7dbb47 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/consistency_decoder_vae.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/vae.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/vae.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..915f457b4a4912e9baf3f4a2d9607fdd799fa842 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/vae.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/vq_model.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/vq_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00c8b9d13be4edb936c3bb097e970d11633f39a1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/__pycache__/vq_model.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_cosmos.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_cosmos.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe1f62890be88b962240efa8bcb3abc2cfc9f02 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_cosmos.py @@ -0,0 +1,1090 @@ +# Copyright 2025 The NVIDIA Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import get_logger +from ...utils.accelerate_utils import apply_forward_hook +from ..modeling_outputs import AutoencoderKLOutput +from ..modeling_utils import ModelMixin +from .vae import AutoencoderMixin, DecoderOutput, IdentityDistribution + + +logger = get_logger(__name__) + + +# fmt: off +# These latents and means are from CV8x8x8-1.0. Each checkpoint has different values, but since this is the main VAE used, +# we will default to these values. +LATENTS_MEAN = [0.11362758, -0.0171717, 0.03071163, 0.02046862, 0.01931456, 0.02138567, 0.01999342, 0.02189187, 0.02011935, 0.01872694, 0.02168613, 0.02207148, 0.01986941, 0.01770413, 0.02067643, 0.02028245, 0.19125476, 0.04556972, 0.0595558, 0.05315534, 0.05496629, 0.05356264, 0.04856596, 0.05327453, 0.05410472, 0.05597149, 0.05524866, 0.05181874, 0.05071663, 0.05204537, 0.0564108, 0.05518042, 0.01306714, 0.03341161, 0.03847246, 0.02810185, 0.02790166, 0.02920026, 0.02823597, 0.02631033, 0.0278531, 0.02880507, 0.02977769, 0.03145441, 0.02888389, 0.03280773, 0.03484927, 0.03049198, -0.00197727, 0.07534957, 0.04963879, 0.05530893, 0.05410828, 0.05252541, 0.05029899, 0.05321025, 0.05149245, 0.0511921, 0.04643495, 0.04604527, 0.04631618, 0.04404101, 0.04403536, 0.04499495, -0.02994183, -0.04787003, -0.01064558, -0.01779824, -0.01490502, -0.02157517, -0.0204778, -0.02180816, -0.01945375, -0.02062863, -0.02192209, -0.02520639, -0.02246656, -0.02427533, -0.02683363, -0.02762006, 0.08019473, -0.13005368, -0.07568636, -0.06082374, -0.06036175, -0.05875364, -0.05921887, -0.05869788, -0.05273941, -0.052565, -0.05346428, -0.05456541, -0.053657, -0.05656897, -0.05728589, -0.05321847, 0.16718403, -0.00390146, 0.0379406, 0.0356561, 0.03554131, 0.03924074, 0.03873615, 0.04187329, 0.04226924, 0.04378717, 0.04684274, 0.05117614, 0.04547792, 0.05251586, 0.05048339, 0.04950784, 0.09564418, 0.0547128, 0.08183969, 0.07978633, 0.08076023, 0.08108605, 0.08011818, 0.07965573, 0.08187773, 0.08350263, 0.08101469, 0.0786941, 0.0774442, 0.07724521, 0.07830418, 0.07599796, -0.04987567, 0.05923908, -0.01058746, -0.01177603, -0.01116162, -0.01364149, -0.01546014, -0.0117213, -0.01780043, -0.01648314, -0.02100247, -0.02104417, -0.02482123, -0.02611689, -0.02561143, -0.02597336, -0.05364667, 0.08211684, 0.04686937, 0.04605641, 0.04304186, 0.0397355, 0.03686767, 0.04087112, 0.03704741, 0.03706401, 0.03120073, 0.03349091, 0.03319963, 0.03205781, 0.03195127, 0.03180481, 0.16427967, -0.11048453, -0.04595276, -0.04982893, -0.05213465, -0.04809378, -0.05080318, -0.04992863, -0.04493337, -0.0467619, -0.04884703, -0.04627892, -0.04913311, -0.04955709, -0.04533982, -0.04570218, -0.10612928, -0.05121198, -0.06761009, -0.07251801, -0.07265285, -0.07417855, -0.07202412, -0.07499027, -0.07625481, -0.07535747, -0.07638787, -0.07920305, -0.07596069, -0.07959418, -0.08265036, -0.07955471, -0.16888915, 0.0753242, 0.04062594, 0.03375093, 0.03337452, 0.03699376, 0.03651138, 0.03611023, 0.03555622, 0.03378554, 0.0300498, 0.03395559, 0.02941847, 0.03156432, 0.03431173, 0.03016853, -0.03415358, -0.01699573, -0.04029295, -0.04912157, -0.0498858, -0.04917918, -0.04918056, -0.0525189, -0.05325506, -0.05341973, -0.04983329, -0.04883146, -0.04985548, -0.04736718, -0.0462027, -0.04836091, 0.02055675, 0.03419799, -0.02907669, -0.04350509, -0.04156144, -0.04234421, -0.04446109, -0.04461774, -0.04882839, -0.04822346, -0.04502493, -0.0506244, -0.05146913, -0.04655267, -0.04862994, -0.04841615, 0.20312774, -0.07208502, -0.03635615, -0.03556088, -0.04246174, -0.04195838, -0.04293778, -0.04071276, -0.04240569, -0.04125213, -0.04395144, -0.03959096, -0.04044993, -0.04015875, -0.04088107, -0.03885176] +LATENTS_STD = [0.56700271, 0.65488982, 0.65589428, 0.66524369, 0.66619784, 0.6666382, 0.6720838, 0.66955978, 0.66928875, 0.67108786, 0.67092526, 0.67397463, 0.67894882, 0.67668313, 0.67769569, 0.67479557, 0.85245121, 0.8688373, 0.87348086, 0.88459337, 0.89135885, 0.8910504, 0.89714909, 0.89947474, 0.90201765, 0.90411824, 0.90692616, 0.90847772, 0.90648711, 0.91006982, 0.91033435, 0.90541548, 0.84960359, 0.85863352, 0.86895317, 0.88460612, 0.89245003, 0.89451706, 0.89931005, 0.90647358, 0.90338236, 0.90510076, 0.91008312, 0.90961218, 0.9123717, 0.91313171, 0.91435546, 0.91565102, 0.91877103, 0.85155135, 0.857804, 0.86998034, 0.87365264, 0.88161767, 0.88151032, 0.88758916, 0.89015514, 0.89245576, 0.89276224, 0.89450496, 0.90054202, 0.89994133, 0.90136105, 0.90114892, 0.77755755, 0.81456852, 0.81911844, 0.83137071, 0.83820474, 0.83890373, 0.84401101, 0.84425181, 0.84739357, 0.84798753, 0.85249585, 0.85114998, 0.85160935, 0.85626358, 0.85677862, 0.85641026, 0.69903517, 0.71697885, 0.71696913, 0.72583169, 0.72931731, 0.73254126, 0.73586977, 0.73734969, 0.73664582, 0.74084908, 0.74399322, 0.74471819, 0.74493188, 0.74824578, 0.75024873, 0.75274801, 0.8187142, 0.82251883, 0.82616025, 0.83164483, 0.84072375, 0.8396467, 0.84143305, 0.84880769, 0.8503468, 0.85196948, 0.85211051, 0.85386664, 0.85410017, 0.85439342, 0.85847849, 0.85385275, 0.67583984, 0.68259847, 0.69198853, 0.69928843, 0.70194328, 0.70467001, 0.70755547, 0.70917857, 0.71007699, 0.70963502, 0.71064079, 0.71027333, 0.71291167, 0.71537536, 0.71902508, 0.71604162, 0.72450989, 0.71979928, 0.72057378, 0.73035461, 0.73329622, 0.73660028, 0.73891461, 0.74279994, 0.74105692, 0.74002433, 0.74257588, 0.74416119, 0.74543899, 0.74694443, 0.74747062, 0.74586403, 0.90176988, 0.90990674, 0.91106802, 0.92163783, 0.92390233, 0.93056196, 0.93482202, 0.93642414, 0.93858379, 0.94064975, 0.94078934, 0.94325715, 0.94955301, 0.94814706, 0.95144123, 0.94923073, 0.49853548, 0.64968109, 0.6427654, 0.64966393, 0.6487664, 0.65203559, 0.6584242, 0.65351611, 0.65464371, 0.6574859, 0.65626335, 0.66123748, 0.66121179, 0.66077942, 0.66040152, 0.66474909, 0.61986589, 0.69138134, 0.6884557, 0.6955843, 0.69765401, 0.70015347, 0.70529598, 0.70468754, 0.70399523, 0.70479989, 0.70887572, 0.71126866, 0.7097227, 0.71249932, 0.71231949, 0.71175605, 0.35586974, 0.68723857, 0.68973219, 0.69958478, 0.6943453, 0.6995818, 0.70980215, 0.69899458, 0.70271689, 0.70095056, 0.69912851, 0.70522696, 0.70392174, 0.70916915, 0.70585734, 0.70373541, 0.98101336, 0.89024764, 0.89607251, 0.90678179, 0.91308665, 0.91812348, 0.91980827, 0.92480654, 0.92635667, 0.92887944, 0.93338072, 0.93468094, 0.93619436, 0.93906063, 0.94191772, 0.94471723, 0.83202779, 0.84106231, 0.84463632, 0.85829508, 0.86319661, 0.86751342, 0.86914337, 0.87085921, 0.87286359, 0.87537396, 0.87931138, 0.88054478, 0.8811838, 0.88872558, 0.88942474, 0.88934827, 0.44025335, 0.63061613, 0.63110614, 0.63601959, 0.6395812, 0.64104342, 0.65019929, 0.6502797, 0.64355946, 0.64657205, 0.64847094, 0.64728117, 0.64972943, 0.65162975, 0.65328044, 0.64914775] +_WAVELETS = { + "haar": torch.tensor([0.7071067811865476, 0.7071067811865476]), + "rearrange": torch.tensor([1.0, 1.0]), +} +# fmt: on + + +class CosmosCausalConv3d(nn.Conv3d): + def __init__( + self, + in_channels: int = 1, + out_channels: int = 1, + kernel_size: int | tuple[int, int, int] = (3, 3, 3), + dilation: int | tuple[int, int, int] = (1, 1, 1), + stride: int | tuple[int, int, int] = (1, 1, 1), + padding: int = 1, + pad_mode: str = "constant", + ) -> None: + kernel_size = (kernel_size, kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size + dilation = (dilation, dilation, dilation) if isinstance(dilation, int) else dilation + stride = (stride, stride, stride) if isinstance(stride, int) else stride + + _, height_kernel_size, width_kernel_size = kernel_size + assert height_kernel_size % 2 == 1 and width_kernel_size % 2 == 1 + + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + dilation=dilation, + ) + + self.pad_mode = pad_mode + self.temporal_pad = dilation[0] * (kernel_size[0] - 1) + (1 - stride[0]) + self.spatial_pad = (padding, padding, padding, padding) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states_prev = hidden_states[:, :, :1, ...].repeat(1, 1, self.temporal_pad, 1, 1) + hidden_states = torch.cat([hidden_states_prev, hidden_states], dim=2) + hidden_states = F.pad(hidden_states, (*self.spatial_pad, 0, 0), mode=self.pad_mode, value=0.0) + return super().forward(hidden_states) + + +class CosmosCausalGroupNorm(torch.nn.Module): + def __init__(self, in_channels: int, num_groups: int = 1): + super().__init__() + self.norm = nn.GroupNorm( + num_groups=num_groups, + num_channels=in_channels, + eps=1e-6, + affine=True, + ) + self.num_groups = num_groups + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.num_groups == 1: + batch_size = hidden_states.size(0) + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).flatten(0, 1) # [B, C, T, H, W] -> [B * T, C, H, W] + hidden_states = self.norm(hidden_states) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)).permute( + 0, 2, 1, 3, 4 + ) # [B * T, C, H, W] -> [B, C, T, H, W] + else: + hidden_states = self.norm(hidden_states) + return hidden_states + + +class CosmosPatchEmbed3d(nn.Module): + def __init__(self, patch_size: int = 1, patch_method: str = "haar") -> None: + super().__init__() + + self.patch_size = patch_size + self.patch_method = patch_method + + wavelets = _WAVELETS.get(patch_method).clone() + arange = torch.arange(wavelets.shape[0]) + + self.register_buffer("wavelets", wavelets, persistent=False) + self.register_buffer("_arange", arange, persistent=False) + + def _dwt(self, hidden_states: torch.Tensor, mode: str = "reflect", rescale=False) -> torch.Tensor: + dtype = hidden_states.dtype + wavelets = self.wavelets + + n = wavelets.shape[0] + g = hidden_states.shape[1] + hl = wavelets.flip(0).reshape(1, 1, -1).repeat(g, 1, 1) + hh = (wavelets * ((-1) ** self._arange)).reshape(1, 1, -1).repeat(g, 1, 1) + hh = hh.to(dtype=dtype) + hl = hl.to(dtype=dtype) + + # Handles temporal axis + hidden_states = F.pad(hidden_states, pad=(max(0, n - 2), n - 1, n - 2, n - 1, n - 2, n - 1), mode=mode).to( + dtype + ) + xl = F.conv3d(hidden_states, hl.unsqueeze(3).unsqueeze(4), groups=g, stride=(2, 1, 1)) + xh = F.conv3d(hidden_states, hh.unsqueeze(3).unsqueeze(4), groups=g, stride=(2, 1, 1)) + + # Handles spatial axes + xll = F.conv3d(xl, hl.unsqueeze(2).unsqueeze(4), groups=g, stride=(1, 2, 1)) + xlh = F.conv3d(xl, hh.unsqueeze(2).unsqueeze(4), groups=g, stride=(1, 2, 1)) + xhl = F.conv3d(xh, hl.unsqueeze(2).unsqueeze(4), groups=g, stride=(1, 2, 1)) + xhh = F.conv3d(xh, hh.unsqueeze(2).unsqueeze(4), groups=g, stride=(1, 2, 1)) + + xlll = F.conv3d(xll, hl.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xllh = F.conv3d(xll, hh.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xlhl = F.conv3d(xlh, hl.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xlhh = F.conv3d(xlh, hh.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xhll = F.conv3d(xhl, hl.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xhlh = F.conv3d(xhl, hh.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xhhl = F.conv3d(xhh, hl.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xhhh = F.conv3d(xhh, hh.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + + hidden_states = torch.cat([xlll, xllh, xlhl, xlhh, xhll, xhlh, xhhl, xhhh], dim=1) + if rescale: + hidden_states = hidden_states / 8**0.5 + return hidden_states + + def _haar(self, hidden_states: torch.Tensor) -> torch.Tensor: + xi, xv = torch.split(hidden_states, [1, hidden_states.shape[2] - 1], dim=2) + hidden_states = torch.cat([xi.repeat_interleave(self.patch_size, dim=2), xv], dim=2) + for _ in range(int(math.log2(self.patch_size))): + hidden_states = self._dwt(hidden_states, rescale=True) + return hidden_states + + def _arrange(self, hidden_states: torch.Tensor) -> torch.Tensor: + xi, xv = torch.split(hidden_states, [1, hidden_states.shape[2] - 1], dim=2) + hidden_states = torch.cat([xi.repeat_interleave(self.patch_size, dim=2), xv], dim=2) + + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p = self.patch_size + + hidden_states = hidden_states.reshape( + batch_size, num_channels, num_frames // p, p, height // p, p, width // p, p + ) + hidden_states = hidden_states.permute(0, 1, 3, 5, 7, 2, 4, 6).flatten(1, 4).contiguous() + return hidden_states + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.patch_method == "haar": + return self._haar(hidden_states) + elif self.patch_method == "rearrange": + return self._arrange(hidden_states) + else: + raise ValueError(f"Unsupported patch method: {self.patch_method}") + + +class CosmosUnpatcher3d(nn.Module): + def __init__(self, patch_size: int = 1, patch_method: str = "haar"): + super().__init__() + + self.patch_size = patch_size + self.patch_method = patch_method + + wavelets = _WAVELETS.get(patch_method).clone() + arange = torch.arange(wavelets.shape[0]) + + self.register_buffer("wavelets", wavelets, persistent=False) + self.register_buffer("_arange", arange, persistent=False) + + def _idwt(self, hidden_states: torch.Tensor, rescale: bool = False) -> torch.Tensor: + device = hidden_states.device + dtype = hidden_states.dtype + h = self.wavelets.to(device) + + g = hidden_states.shape[1] // 8 # split into 8 spatio-temporal filtered tesnors. + hl = h.flip([0]).reshape(1, 1, -1).repeat([g, 1, 1]) + hh = (h * ((-1) ** self._arange.to(device))).reshape(1, 1, -1).repeat(g, 1, 1) + hl = hl.to(dtype=dtype) + hh = hh.to(dtype=dtype) + + xlll, xllh, xlhl, xlhh, xhll, xhlh, xhhl, xhhh = torch.chunk(hidden_states, 8, dim=1) + + # Handle height transposed convolutions + xll = F.conv_transpose3d(xlll, hl.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xll = F.conv_transpose3d(xllh, hh.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xll + + xlh = F.conv_transpose3d(xlhl, hl.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xlh = F.conv_transpose3d(xlhh, hh.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xlh + + xhl = F.conv_transpose3d(xhll, hl.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xhl = F.conv_transpose3d(xhlh, hh.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xhl + + xhh = F.conv_transpose3d(xhhl, hl.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xhh = F.conv_transpose3d(xhhh, hh.unsqueeze(2).unsqueeze(3), groups=g, stride=(1, 1, 2)) + xhh + + # Handles width transposed convolutions + xl = F.conv_transpose3d(xll, hl.unsqueeze(2).unsqueeze(4), groups=g, stride=(1, 2, 1)) + xl = F.conv_transpose3d(xlh, hh.unsqueeze(2).unsqueeze(4), groups=g, stride=(1, 2, 1)) + xl + xh = F.conv_transpose3d(xhl, hl.unsqueeze(2).unsqueeze(4), groups=g, stride=(1, 2, 1)) + xh = F.conv_transpose3d(xhh, hh.unsqueeze(2).unsqueeze(4), groups=g, stride=(1, 2, 1)) + xh + + # Handles time axis transposed convolutions + hidden_states = F.conv_transpose3d(xl, hl.unsqueeze(3).unsqueeze(4), groups=g, stride=(2, 1, 1)) + hidden_states = ( + F.conv_transpose3d(xh, hh.unsqueeze(3).unsqueeze(4), groups=g, stride=(2, 1, 1)) + hidden_states + ) + + if rescale: + hidden_states = hidden_states * 8**0.5 + + return hidden_states + + def _ihaar(self, hidden_states: torch.Tensor) -> torch.Tensor: + for _ in range(int(math.log2(self.patch_size))): + hidden_states = self._idwt(hidden_states, rescale=True) + hidden_states = hidden_states[:, :, self.patch_size - 1 :, ...] + return hidden_states + + def _irearrange(self, hidden_states: torch.Tensor) -> torch.Tensor: + p = self.patch_size + hidden_states = hidden_states.unflatten(1, (-1, p, p, p)) + hidden_states = hidden_states.permute(0, 1, 5, 2, 6, 3, 7, 4) + hidden_states = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + hidden_states = hidden_states[:, :, p - 1 :, ...] + return hidden_states + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.patch_method == "haar": + return self._ihaar(hidden_states) + elif self.patch_method == "rearrange": + return self._irearrange(hidden_states) + else: + raise ValueError("Unknown patch method: " + self.patch_method) + + +class CosmosConvProjection3d(nn.Module): + def __init__(self, in_channels: int, out_channels: int) -> None: + super().__init__() + + self.conv_s = CosmosCausalConv3d(in_channels, out_channels, kernel_size=(1, 3, 3), stride=1, padding=1) + self.conv_t = CosmosCausalConv3d(out_channels, out_channels, kernel_size=(3, 1, 1), stride=1, padding=0) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.conv_s(hidden_states) + hidden_states = self.conv_t(hidden_states) + return hidden_states + + +class CosmosResnetBlock3d(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_groups: int = 1, + ) -> None: + super().__init__() + out_channels = out_channels or in_channels + + self.norm1 = CosmosCausalGroupNorm(in_channels, num_groups) + self.conv1 = CosmosConvProjection3d(in_channels, out_channels) + + self.norm2 = CosmosCausalGroupNorm(out_channels, num_groups) + self.dropout = nn.Dropout(dropout) + self.conv2 = CosmosConvProjection3d(out_channels, out_channels) + + if in_channels != out_channels: + self.conv_shortcut = CosmosCausalConv3d(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + else: + self.conv_shortcut = nn.Identity() + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + residual = hidden_states + residual = self.conv_shortcut(residual) + + hidden_states = self.norm1(hidden_states) + hidden_states = F.silu(hidden_states) + hidden_states = self.conv1(hidden_states) + + hidden_states = self.norm2(hidden_states) + hidden_states = F.silu(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.conv2(hidden_states) + + return hidden_states + residual + + +class CosmosDownsample3d(nn.Module): + def __init__( + self, + in_channels: int, + spatial_downsample: bool = True, + temporal_downsample: bool = True, + ) -> None: + super().__init__() + + self.spatial_downsample = spatial_downsample + self.temporal_downsample = temporal_downsample + + self.conv1 = nn.Identity() + self.conv2 = nn.Identity() + self.conv3 = nn.Identity() + + if spatial_downsample: + self.conv1 = CosmosCausalConv3d( + in_channels, in_channels, kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=0 + ) + if temporal_downsample: + self.conv2 = CosmosCausalConv3d( + in_channels, in_channels, kernel_size=(3, 1, 1), stride=(2, 1, 1), padding=0 + ) + if spatial_downsample or temporal_downsample: + self.conv3 = CosmosCausalConv3d( + in_channels, in_channels, kernel_size=(1, 1, 1), stride=(1, 1, 1), padding=0 + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if not self.spatial_downsample and not self.temporal_downsample: + return hidden_states + + if self.spatial_downsample: + pad = (0, 1, 0, 1, 0, 0) + hidden_states = F.pad(hidden_states, pad, mode="constant", value=0) + conv_out = self.conv1(hidden_states) + pool_out = F.avg_pool3d(hidden_states, kernel_size=(1, 2, 2), stride=(1, 2, 2)) + hidden_states = conv_out + pool_out + + if self.temporal_downsample: + hidden_states = torch.cat([hidden_states[:, :, :1, ...], hidden_states], dim=2) + conv_out = self.conv2(hidden_states) + pool_out = F.avg_pool3d(hidden_states, kernel_size=(2, 1, 1), stride=(2, 1, 1)) + hidden_states = conv_out + pool_out + + hidden_states = self.conv3(hidden_states) + return hidden_states + + +class CosmosUpsample3d(nn.Module): + def __init__( + self, + in_channels: int, + spatial_upsample: bool = True, + temporal_upsample: bool = True, + ) -> None: + super().__init__() + + self.spatial_upsample = spatial_upsample + self.temporal_upsample = temporal_upsample + + self.conv1 = nn.Identity() + self.conv2 = nn.Identity() + self.conv3 = nn.Identity() + + if temporal_upsample: + self.conv1 = CosmosCausalConv3d( + in_channels, in_channels, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=0 + ) + if spatial_upsample: + self.conv2 = CosmosCausalConv3d( + in_channels, in_channels, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=1 + ) + if spatial_upsample or temporal_upsample: + self.conv3 = CosmosCausalConv3d( + in_channels, in_channels, kernel_size=(1, 1, 1), stride=(1, 1, 1), padding=0 + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if not self.spatial_upsample and not self.temporal_upsample: + return hidden_states + + if self.temporal_upsample: + num_frames = hidden_states.size(2) + time_factor = int(1.0 + 1.0 * (num_frames > 1)) + hidden_states = hidden_states.repeat_interleave(int(time_factor), dim=2) + hidden_states = hidden_states[..., time_factor - 1 :, :, :] + hidden_states = self.conv1(hidden_states) + hidden_states + + if self.spatial_upsample: + hidden_states = hidden_states.repeat_interleave(2, dim=3).repeat_interleave(2, dim=4) + hidden_states = self.conv2(hidden_states) + hidden_states + + hidden_states = self.conv3(hidden_states) + return hidden_states + + +class CosmosCausalAttention(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + num_groups: int = 1, + dropout: float = 0.0, + processor: "CosmosSpatialAttentionProcessor2_0" | "CosmosTemporalAttentionProcessor2_0" = None, + ) -> None: + super().__init__() + self.num_attention_heads = num_attention_heads + + self.norm = CosmosCausalGroupNorm(attention_head_dim, num_groups=num_groups) + self.to_q = CosmosCausalConv3d(attention_head_dim, attention_head_dim, kernel_size=1, stride=1, padding=0) + self.to_k = CosmosCausalConv3d(attention_head_dim, attention_head_dim, kernel_size=1, stride=1, padding=0) + self.to_v = CosmosCausalConv3d(attention_head_dim, attention_head_dim, kernel_size=1, stride=1, padding=0) + self.to_out = nn.ModuleList([]) + self.to_out.append( + CosmosCausalConv3d(attention_head_dim, attention_head_dim, kernel_size=1, stride=1, padding=0) + ) + self.to_out.append(nn.Dropout(dropout)) + + self.processor = processor + if self.processor is None: + raise ValueError("CosmosCausalAttention requires a processor.") + + def forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor | None = None) -> torch.Tensor: + return self.processor(self, hidden_states=hidden_states, attention_mask=attention_mask) + + +class CosmosSpatialAttentionProcessor2_0: + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "CosmosSpatialAttentionProcessor2_0 requires PyTorch 2.0 or higher. To use it, please upgrade PyTorch." + ) + + def __call__( + self, attn: CosmosCausalAttention, hidden_states: torch.Tensor, attention_mask: torch.Tensor | None = None + ) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + residual = hidden_states + + hidden_states = attn.norm(hidden_states) + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + # [B, C, T, H, W] -> [B * T, H * W, C] + query = query.permute(0, 2, 3, 4, 1).flatten(2, 3).flatten(0, 1) + key = key.permute(0, 2, 3, 4, 1).flatten(2, 3).flatten(0, 1) + value = value.permute(0, 2, 3, 4, 1).flatten(2, 3).flatten(0, 1) + + # [B * T, H * W, C] -> [B * T, N, H * W, C // N] + query = query.unflatten(2, (attn.num_attention_heads, -1)).transpose(1, 2) + key = key.unflatten(2, (attn.num_attention_heads, -1)).transpose(1, 2) + value = value.unflatten(2, (attn.num_attention_heads, -1)).transpose(1, 2) + + hidden_states = F.scaled_dot_product_attention(query, key, value, attn_mask=attention_mask) + hidden_states = hidden_states.transpose(1, 2).flatten(2, 3).type_as(query) + hidden_states = hidden_states.unflatten(1, (height, width)).unflatten(0, (batch_size, num_frames)) + hidden_states = hidden_states.permute(0, 4, 1, 2, 3) + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states + residual + + +class CosmosTemporalAttentionProcessor2_0: + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "CosmosSpatialAttentionProcessor2_0 requires PyTorch 2.0 or higher. To use it, please upgrade PyTorch." + ) + + def __call__( + self, attn: CosmosCausalAttention, hidden_states: torch.Tensor, attention_mask: torch.Tensor | None = None + ) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + residual = hidden_states + + hidden_states = attn.norm(hidden_states) + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + # [B, C, T, H, W] -> [B * T, H * W, C] + query = query.permute(0, 3, 4, 2, 1).flatten(0, 2) + key = key.permute(0, 3, 4, 2, 1).flatten(0, 2) + value = value.permute(0, 3, 4, 2, 1).flatten(0, 2) + + # [B * T, H * W, C] -> [B * T, N, H * W, C // N] + query = query.unflatten(2, (attn.num_attention_heads, -1)).transpose(1, 2) + key = key.unflatten(2, (attn.num_attention_heads, -1)).transpose(1, 2) + value = value.unflatten(2, (attn.num_attention_heads, -1)).transpose(1, 2) + + hidden_states = F.scaled_dot_product_attention(query, key, value, attn_mask=attention_mask) + hidden_states = hidden_states.transpose(1, 2).flatten(2, 3).type_as(query) + hidden_states = hidden_states.unflatten(0, (batch_size, height, width)) + hidden_states = hidden_states.permute(0, 4, 3, 1, 2) + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states + residual + + +class CosmosDownBlock3d(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + num_layers: int, + dropout: float, + use_attention: bool, + use_downsample: bool, + spatial_downsample: bool, + temporal_downsample: bool, + ) -> None: + super().__init__() + + resnets, attentions, temp_attentions = [], [], [] + in_channel, out_channel = in_channels, out_channels + + for _ in range(num_layers): + resnets.append(CosmosResnetBlock3d(in_channel, out_channel, dropout, num_groups=1)) + in_channel = out_channel + + if use_attention: + attentions.append( + CosmosCausalAttention( + num_attention_heads=1, + attention_head_dim=out_channel, + num_groups=1, + dropout=dropout, + processor=CosmosSpatialAttentionProcessor2_0(), + ) + ) + temp_attentions.append( + CosmosCausalAttention( + num_attention_heads=1, + attention_head_dim=out_channel, + num_groups=1, + dropout=dropout, + processor=CosmosTemporalAttentionProcessor2_0(), + ) + ) + else: + attentions.append(None) + temp_attentions.append(None) + + self.resnets = nn.ModuleList(resnets) + self.attentions = nn.ModuleList(attentions) + self.temp_attentions = nn.ModuleList(temp_attentions) + + self.downsamplers = None + if use_downsample: + self.downsamplers = nn.ModuleList([]) + self.downsamplers.append(CosmosDownsample3d(out_channel, spatial_downsample, temporal_downsample)) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + for resnet, attention, temp_attention in zip(self.resnets, self.attentions, self.temp_attentions): + hidden_states = resnet(hidden_states) + if attention is not None: + hidden_states = attention(hidden_states) + if temp_attention is not None: + num_frames = hidden_states.size(2) + attention_mask = torch.tril(hidden_states.new_ones(num_frames, num_frames)).bool() + hidden_states = temp_attention(hidden_states, attention_mask) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class CosmosMidBlock3d(nn.Module): + def __init__(self, in_channels: int, num_layers: int, dropout: float, num_groups: int = 1) -> None: + super().__init__() + + resnets, attentions, temp_attentions = [], [], [] + + resnets.append(CosmosResnetBlock3d(in_channels, in_channels, dropout, num_groups)) + for _ in range(num_layers): + attentions.append( + CosmosCausalAttention( + num_attention_heads=1, + attention_head_dim=in_channels, + num_groups=num_groups, + dropout=dropout, + processor=CosmosSpatialAttentionProcessor2_0(), + ) + ) + temp_attentions.append( + CosmosCausalAttention( + num_attention_heads=1, + attention_head_dim=in_channels, + num_groups=num_groups, + dropout=dropout, + processor=CosmosTemporalAttentionProcessor2_0(), + ) + ) + resnets.append(CosmosResnetBlock3d(in_channels, in_channels, dropout, num_groups)) + + self.resnets = nn.ModuleList(resnets) + self.attentions = nn.ModuleList(attentions) + self.temp_attentions = nn.ModuleList(temp_attentions) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.resnets[0](hidden_states) + + for attention, temp_attention, resnet in zip(self.attentions, self.temp_attentions, self.resnets[1:]): + num_frames = hidden_states.size(2) + attention_mask = torch.tril(hidden_states.new_ones(num_frames, num_frames)).bool() + + hidden_states = attention(hidden_states) + hidden_states = temp_attention(hidden_states, attention_mask) + hidden_states = resnet(hidden_states) + + return hidden_states + + +class CosmosUpBlock3d(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + num_layers: int, + dropout: float, + use_attention: bool, + use_upsample: bool, + spatial_upsample: bool, + temporal_upsample: bool, + ) -> None: + super().__init__() + + resnets, attention, temp_attentions = [], [], [] + in_channel, out_channel = in_channels, out_channels + + for _ in range(num_layers): + resnets.append(CosmosResnetBlock3d(in_channel, out_channel, dropout, num_groups=1)) + in_channel = out_channel + + if use_attention: + attention.append( + CosmosCausalAttention( + num_attention_heads=1, + attention_head_dim=out_channel, + num_groups=1, + dropout=dropout, + processor=CosmosSpatialAttentionProcessor2_0(), + ) + ) + temp_attentions.append( + CosmosCausalAttention( + num_attention_heads=1, + attention_head_dim=out_channel, + num_groups=1, + dropout=dropout, + processor=CosmosTemporalAttentionProcessor2_0(), + ) + ) + else: + attention.append(None) + temp_attentions.append(None) + + self.resnets = nn.ModuleList(resnets) + self.attentions = nn.ModuleList(attention) + self.temp_attentions = nn.ModuleList(temp_attentions) + + self.upsamplers = None + if use_upsample: + self.upsamplers = nn.ModuleList([]) + self.upsamplers.append(CosmosUpsample3d(out_channel, spatial_upsample, temporal_upsample)) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + for resnet, attention, temp_attention in zip(self.resnets, self.attentions, self.temp_attentions): + hidden_states = resnet(hidden_states) + if attention is not None: + hidden_states = attention(hidden_states) + if temp_attention is not None: + num_frames = hidden_states.size(2) + attention_mask = torch.tril(hidden_states.new_ones(num_frames, num_frames)).bool() + hidden_states = temp_attention(hidden_states, attention_mask) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class CosmosEncoder3d(nn.Module): + def __init__( + self, + in_channels: int = 3, + out_channels: int = 16, + block_out_channels: tuple[int, ...] = (128, 256, 512, 512), + num_resnet_blocks: int = 2, + attention_resolutions: tuple[int, ...] = (32,), + resolution: int = 1024, + patch_size: int = 4, + patch_type: str = "haar", + dropout: float = 0.0, + spatial_compression_ratio: int = 8, + temporal_compression_ratio: int = 8, + ) -> None: + super().__init__() + inner_dim = in_channels * patch_size**3 + num_spatial_layers = int(math.log2(spatial_compression_ratio)) - int(math.log2(patch_size)) + num_temporal_layers = int(math.log2(temporal_compression_ratio)) - int(math.log2(patch_size)) + + # 1. Input patching & projection + self.patch_embed = CosmosPatchEmbed3d(patch_size, patch_type) + + self.conv_in = CosmosConvProjection3d(inner_dim, block_out_channels[0]) + + # 2. Down blocks + current_resolution = resolution // patch_size + down_blocks = [] + for i in range(len(block_out_channels) - 1): + in_channel = block_out_channels[i] + out_channel = block_out_channels[i + 1] + + use_attention = current_resolution in attention_resolutions + spatial_downsample = temporal_downsample = False + if i < len(block_out_channels) - 2: + use_downsample = True + spatial_downsample = i < num_spatial_layers + temporal_downsample = i < num_temporal_layers + current_resolution = current_resolution // 2 + else: + use_downsample = False + + down_blocks.append( + CosmosDownBlock3d( + in_channel, + out_channel, + num_resnet_blocks, + dropout, + use_attention, + use_downsample, + spatial_downsample, + temporal_downsample, + ) + ) + self.down_blocks = nn.ModuleList(down_blocks) + + # 3. Mid block + self.mid_block = CosmosMidBlock3d(block_out_channels[-1], num_layers=1, dropout=dropout, num_groups=1) + + # 4. Output norm & projection + self.norm_out = CosmosCausalGroupNorm(block_out_channels[-1], num_groups=1) + self.conv_out = CosmosConvProjection3d(block_out_channels[-1], out_channels) + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.patch_embed(hidden_states) + hidden_states = self.conv_in(hidden_states) + + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.down_blocks: + hidden_states = self._gradient_checkpointing_func(block, hidden_states) + hidden_states = self._gradient_checkpointing_func(self.mid_block, hidden_states) + else: + for block in self.down_blocks: + hidden_states = block(hidden_states) + hidden_states = self.mid_block(hidden_states) + + hidden_states = self.norm_out(hidden_states) + hidden_states = F.silu(hidden_states) + hidden_states = self.conv_out(hidden_states) + return hidden_states + + +class CosmosDecoder3d(nn.Module): + def __init__( + self, + in_channels: int = 16, + out_channels: int = 3, + block_out_channels: tuple[int, ...] = (128, 256, 512, 512), + num_resnet_blocks: int = 2, + attention_resolutions: tuple[int, ...] = (32,), + resolution: int = 1024, + patch_size: int = 4, + patch_type: str = "haar", + dropout: float = 0.0, + spatial_compression_ratio: int = 8, + temporal_compression_ratio: int = 8, + ) -> None: + super().__init__() + inner_dim = out_channels * patch_size**3 + num_spatial_layers = int(math.log2(spatial_compression_ratio)) - int(math.log2(patch_size)) + num_temporal_layers = int(math.log2(temporal_compression_ratio)) - int(math.log2(patch_size)) + reversed_block_out_channels = list(reversed(block_out_channels)) + + # 1. Input projection + self.conv_in = CosmosConvProjection3d(in_channels, reversed_block_out_channels[0]) + + # 2. Mid block + self.mid_block = CosmosMidBlock3d(reversed_block_out_channels[0], num_layers=1, dropout=dropout, num_groups=1) + + # 3. Up blocks + current_resolution = (resolution // patch_size) // 2 ** (len(block_out_channels) - 2) + up_blocks = [] + for i in range(len(block_out_channels) - 1): + in_channel = reversed_block_out_channels[i] + out_channel = reversed_block_out_channels[i + 1] + + use_attention = current_resolution in attention_resolutions + spatial_upsample = temporal_upsample = False + if i < len(block_out_channels) - 2: + use_upsample = True + temporal_upsample = 0 < i < num_temporal_layers + 1 + spatial_upsample = temporal_upsample or ( + i < num_spatial_layers and num_spatial_layers > num_temporal_layers + ) + current_resolution = current_resolution * 2 + else: + use_upsample = False + + up_blocks.append( + CosmosUpBlock3d( + in_channel, + out_channel, + num_resnet_blocks + 1, + dropout, + use_attention, + use_upsample, + spatial_upsample, + temporal_upsample, + ) + ) + self.up_blocks = nn.ModuleList(up_blocks) + + # 4. Output norm & projection & unpatching + self.norm_out = CosmosCausalGroupNorm(reversed_block_out_channels[-1], num_groups=1) + self.conv_out = CosmosConvProjection3d(reversed_block_out_channels[-1], inner_dim) + + self.unpatch_embed = CosmosUnpatcher3d(patch_size, patch_type) + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.conv_in(hidden_states) + hidden_states = self.mid_block(hidden_states) + + for block in self.up_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func(block, hidden_states) + else: + hidden_states = block(hidden_states) + + hidden_states = self.norm_out(hidden_states) + hidden_states = F.silu(hidden_states) + hidden_states = self.conv_out(hidden_states) + hidden_states = self.unpatch_embed(hidden_states) + return hidden_states + + +class AutoencoderKLCosmos(ModelMixin, AutoencoderMixin, ConfigMixin): + r""" + Autoencoder used in [Cosmos](https://huggingface.co/papers/2501.03575). + + Args: + in_channels (`int`, defaults to `3`): + Number of input channels. + out_channels (`int`, defaults to `3`): + Number of output channels. + latent_channels (`int`, defaults to `16`): + Number of latent channels. + encoder_block_out_channels (`tuple[int, ...]`, defaults to `(128, 256, 512, 512)`): + Number of output channels for each encoder down block. + decode_block_out_channels (`tuple[int, ...]`, defaults to `(256, 512, 512, 512)`): + Number of output channels for each decoder up block. + attention_resolutions (`tuple[int, ...]`, defaults to `(32,)`): + list of image/video resolutions at which to apply attention. + resolution (`int`, defaults to `1024`): + Base image/video resolution used for computing whether a block should have attention layers. + num_layers (`int`, defaults to `2`): + Number of resnet blocks in each encoder/decoder block. + patch_size (`int`, defaults to `4`): + Patch size used for patching the input image/video. + patch_type (`str`, defaults to `haar`): + Patch type used for patching the input image/video. Can be either `haar` or `rearrange`. + scaling_factor (`float`, defaults to `1.0`): + The component-wise standard deviation of the trained latent space computed using the first batch of the + training set. This is used to scale the latent space to have unit variance when training the diffusion + model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the + diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z = 1 + / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution Image + Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper. Not applicable in + Cosmos, but we default to 1.0 for consistency. + spatial_compression_ratio (`int`, defaults to `8`): + The spatial compression ratio to apply in the VAE. The number of downsample blocks is determined using + this. + temporal_compression_ratio (`int`, defaults to `8`): + The temporal compression ratio to apply in the VAE. The number of downsample blocks is determined using + this. + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + in_channels: int = 3, + out_channels: int = 3, + latent_channels: int = 16, + encoder_block_out_channels: tuple[int, ...] = (128, 256, 512, 512), + decode_block_out_channels: tuple[int, ...] = (256, 512, 512, 512), + attention_resolutions: tuple[int, ...] = (32,), + resolution: int = 1024, + num_layers: int = 2, + patch_size: int = 4, + patch_type: str = "haar", + scaling_factor: float = 1.0, + spatial_compression_ratio: int = 8, + temporal_compression_ratio: int = 8, + latents_mean: list[float] | None = LATENTS_MEAN, + latents_std: list[float] | None = LATENTS_STD, + ) -> None: + super().__init__() + + self.encoder = CosmosEncoder3d( + in_channels=in_channels, + out_channels=latent_channels, + block_out_channels=encoder_block_out_channels, + num_resnet_blocks=num_layers, + attention_resolutions=attention_resolutions, + resolution=resolution, + patch_size=patch_size, + patch_type=patch_type, + spatial_compression_ratio=spatial_compression_ratio, + temporal_compression_ratio=temporal_compression_ratio, + ) + self.decoder = CosmosDecoder3d( + in_channels=latent_channels, + out_channels=out_channels, + block_out_channels=decode_block_out_channels, + num_resnet_blocks=num_layers, + attention_resolutions=attention_resolutions, + resolution=resolution, + patch_size=patch_size, + patch_type=patch_type, + spatial_compression_ratio=spatial_compression_ratio, + temporal_compression_ratio=temporal_compression_ratio, + ) + + self.quant_conv = CosmosCausalConv3d(latent_channels, latent_channels, kernel_size=1, padding=0) + self.post_quant_conv = CosmosCausalConv3d(latent_channels, latent_channels, kernel_size=1, padding=0) + + # When decoding a batch of video latents at a time, one can save memory by slicing across the batch dimension + # to perform decoding of a single video latent at a time. + self.use_slicing = False + + # When decoding spatially large video latents, the memory requirement is very high. By breaking the video latent + # frames spatially into smaller tiles and performing multiple forward passes for decoding, and then blending the + # intermediate tiles together, the memory requirement can be lowered. + self.use_tiling = False + + # When decoding temporally long video latents, the memory requirement is very high. By decoding latent frames + # at a fixed frame batch size (based on `self.num_latent_frames_batch_sizes`), the memory requirement can be lowered. + self.use_framewise_encoding = False + self.use_framewise_decoding = False + + # This can be configured based on the amount of GPU memory available. + # `16` for sample frames and `2` for latent frames are sensible defaults for consumer GPUs. + # Setting it to higher values results in higher memory usage. + self.num_sample_frames_batch_size = 16 + self.num_latent_frames_batch_size = 2 + + # The minimal tile height and width for spatial tiling to be used + self.tile_sample_min_height = 512 + self.tile_sample_min_width = 512 + self.tile_sample_min_num_frames = 16 + + # The minimal distance between two spatial tiles + self.tile_sample_stride_height = 448 + self.tile_sample_stride_width = 448 + self.tile_sample_stride_num_frames = 8 + + def enable_tiling( + self, + tile_sample_min_height: int | None = None, + tile_sample_min_width: int | None = None, + tile_sample_min_num_frames: int | None = None, + tile_sample_stride_height: float | None = None, + tile_sample_stride_width: float | None = None, + tile_sample_stride_num_frames: float | None = None, + ) -> None: + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + + Args: + tile_sample_min_height (`int`, *optional*): + The minimum height required for a sample to be separated into tiles across the height dimension. + tile_sample_min_width (`int`, *optional*): + The minimum width required for a sample to be separated into tiles across the width dimension. + tile_sample_stride_height (`int`, *optional*): + The minimum amount of overlap between two consecutive vertical tiles. This is to ensure that there are + no tiling artifacts produced across the height dimension. + tile_sample_stride_width (`int`, *optional*): + The stride between two consecutive horizontal tiles. This is to ensure that there are no tiling + artifacts produced across the width dimension. + """ + self.use_tiling = True + self.tile_sample_min_height = tile_sample_min_height or self.tile_sample_min_height + self.tile_sample_min_width = tile_sample_min_width or self.tile_sample_min_width + self.tile_sample_min_num_frames = tile_sample_min_num_frames or self.tile_sample_min_num_frames + self.tile_sample_stride_height = tile_sample_stride_height or self.tile_sample_stride_height + self.tile_sample_stride_width = tile_sample_stride_width or self.tile_sample_stride_width + self.tile_sample_stride_num_frames = tile_sample_stride_num_frames or self.tile_sample_stride_num_frames + + def _encode(self, x: torch.Tensor) -> torch.Tensor: + x = self.encoder(x) + enc = self.quant_conv(x) + return enc + + @apply_forward_hook + def encode(self, x: torch.Tensor, return_dict: bool = True) -> torch.Tensor: + if self.use_slicing and x.shape[0] > 1: + encoded_slices = [self._encode(x_slice) for x_slice in x.split(1)] + h = torch.cat(encoded_slices) + else: + h = self._encode(x) + + posterior = IdentityDistribution(h) + + if not return_dict: + return (posterior,) + return AutoencoderKLOutput(latent_dist=posterior) + + def _decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | tuple[torch.Tensor]: + z = self.post_quant_conv(z) + dec = self.decoder(z) + + if not return_dict: + return (dec,) + return DecoderOutput(sample=dec) + + @apply_forward_hook + def decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | tuple[torch.Tensor]: + if self.use_slicing and z.shape[0] > 1: + decoded_slices = [self._decode(z_slice).sample for z_slice in z.split(1)] + decoded = torch.cat(decoded_slices) + else: + decoded = self._decode(z).sample + + if not return_dict: + return (decoded,) + return DecoderOutput(sample=decoded) + + def forward( + self, + sample: torch.Tensor, + sample_posterior: bool = False, + return_dict: bool = True, + generator: torch.Generator | None = None, + ) -> tuple[torch.Tensor] | DecoderOutput: + x = sample + posterior = self.encode(x).latent_dist + if sample_posterior: + z = posterior.sample(generator=generator) + else: + z = posterior.mode() + dec = self.decode(z).sample + if not return_dict: + return (dec,) + return DecoderOutput(sample=dec) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py new file mode 100644 index 0000000000000000000000000000000000000000..a19c267b6d36304975a9a25ae90045a43c852345 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py @@ -0,0 +1,1072 @@ +# Copyright 2025 The Hunyuan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ...utils.accelerate_utils import apply_forward_hook +from ..activations import get_activation +from ..attention_processor import Attention +from ..modeling_outputs import AutoencoderKLOutput +from ..modeling_utils import ModelMixin +from .vae import AutoencoderMixin, DecoderOutput, DiagonalGaussianDistribution + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def prepare_causal_attention_mask( + num_frames: int, height_width: int, dtype: torch.dtype, device: torch.device, batch_size: int = None +) -> torch.Tensor: + indices = torch.arange(1, num_frames + 1, dtype=torch.int32, device=device) + indices_blocks = indices.repeat_interleave(height_width) + x, y = torch.meshgrid(indices_blocks, indices_blocks, indexing="xy") + mask = torch.where(x <= y, 0, -float("inf")).to(dtype=dtype) + + if batch_size is not None: + mask = mask.unsqueeze(0).expand(batch_size, -1, -1) + return mask + + +class HunyuanVideoCausalConv3d(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int | tuple[int, int, int] = 3, + stride: int | tuple[int, int, int] = 1, + padding: int | tuple[int, int, int] = 0, + dilation: int | tuple[int, int, int] = 1, + bias: bool = True, + pad_mode: str = "replicate", + ) -> None: + super().__init__() + + kernel_size = (kernel_size, kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size + + self.pad_mode = pad_mode + self.time_causal_padding = ( + kernel_size[0] // 2, + kernel_size[0] // 2, + kernel_size[1] // 2, + kernel_size[1] // 2, + kernel_size[2] - 1, + 0, + ) + + self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = F.pad(hidden_states, self.time_causal_padding, mode=self.pad_mode) + return self.conv(hidden_states) + + +class HunyuanVideoUpsampleCausal3D(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int | None = None, + kernel_size: int = 3, + stride: int = 1, + bias: bool = True, + upsample_factor: tuple[float, float, float] = (2, 2, 2), + ) -> None: + super().__init__() + + out_channels = out_channels or in_channels + self.upsample_factor = upsample_factor + + self.conv = HunyuanVideoCausalConv3d(in_channels, out_channels, kernel_size, stride, bias=bias) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + num_frames = hidden_states.size(2) + + first_frame, other_frames = hidden_states.split((1, num_frames - 1), dim=2) + first_frame = F.interpolate( + first_frame.squeeze(2), scale_factor=self.upsample_factor[1:], mode="nearest" + ).unsqueeze(2) + + if num_frames > 1: + # See: https://github.com/pytorch/pytorch/issues/81665 + # Unless you have a version of pytorch where non-contiguous implementation of F.interpolate + # is fixed, this will raise either a runtime error, or fail silently with bad outputs. + # If you are encountering an error here, make sure to try running encoding/decoding with + # `vae.enable_tiling()` first. If that doesn't work, open an issue at: + # https://github.com/huggingface/diffusers/issues + other_frames = other_frames.contiguous() + other_frames = F.interpolate(other_frames, scale_factor=self.upsample_factor, mode="nearest") + hidden_states = torch.cat((first_frame, other_frames), dim=2) + else: + hidden_states = first_frame + + hidden_states = self.conv(hidden_states) + return hidden_states + + +class HunyuanVideoDownsampleCausal3D(nn.Module): + def __init__( + self, + channels: int, + out_channels: int | None = None, + padding: int = 1, + kernel_size: int = 3, + bias: bool = True, + stride=2, + ) -> None: + super().__init__() + out_channels = out_channels or channels + + self.conv = HunyuanVideoCausalConv3d(channels, out_channels, kernel_size, stride, padding, bias=bias) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.conv(hidden_states) + return hidden_states + + +class HunyuanVideoResnetBlockCausal3D(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int | None = None, + dropout: float = 0.0, + groups: int = 32, + eps: float = 1e-6, + non_linearity: str = "swish", + ) -> None: + super().__init__() + out_channels = out_channels or in_channels + + self.nonlinearity = get_activation(non_linearity) + + self.norm1 = nn.GroupNorm(groups, in_channels, eps=eps, affine=True) + self.conv1 = HunyuanVideoCausalConv3d(in_channels, out_channels, 3, 1, 0) + + self.norm2 = nn.GroupNorm(groups, out_channels, eps=eps, affine=True) + self.dropout = nn.Dropout(dropout) + self.conv2 = HunyuanVideoCausalConv3d(out_channels, out_channels, 3, 1, 0) + + self.conv_shortcut = None + if in_channels != out_channels: + self.conv_shortcut = HunyuanVideoCausalConv3d(in_channels, out_channels, 1, 1, 0) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = hidden_states.contiguous() + residual = hidden_states + + hidden_states = self.norm1(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + hidden_states = self.conv1(hidden_states) + + hidden_states = self.norm2(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.conv2(hidden_states) + + if self.conv_shortcut is not None: + residual = self.conv_shortcut(residual) + + hidden_states = hidden_states + residual + return hidden_states + + +class HunyuanVideoMidBlock3D(nn.Module): + def __init__( + self, + in_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + add_attention: bool = True, + attention_head_dim: int = 1, + ) -> None: + super().__init__() + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + self.add_attention = add_attention + + # There is always at least one resnet + resnets = [ + HunyuanVideoResnetBlockCausal3D( + in_channels=in_channels, + out_channels=in_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + non_linearity=resnet_act_fn, + ) + ] + attentions = [] + + for _ in range(num_layers): + if self.add_attention: + attentions.append( + Attention( + in_channels, + heads=in_channels // attention_head_dim, + dim_head=attention_head_dim, + eps=resnet_eps, + norm_num_groups=resnet_groups, + residual_connection=True, + bias=True, + upcast_softmax=True, + _from_deprecated_attn_block=True, + ) + ) + else: + attentions.append(None) + + resnets.append( + HunyuanVideoResnetBlockCausal3D( + in_channels=in_channels, + out_channels=in_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + non_linearity=resnet_act_fn, + ) + ) + + self.attentions = nn.ModuleList(attentions) + self.resnets = nn.ModuleList(resnets) + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func(self.resnets[0], hidden_states) + + for attn, resnet in zip(self.attentions, self.resnets[1:]): + if attn is not None: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + hidden_states = hidden_states.permute(0, 2, 3, 4, 1).flatten(1, 3) + attention_mask = prepare_causal_attention_mask( + num_frames, height * width, hidden_states.dtype, hidden_states.device, batch_size=batch_size + ) + hidden_states = attn(hidden_states, attention_mask=attention_mask) + hidden_states = hidden_states.unflatten(1, (num_frames, height, width)).permute(0, 4, 1, 2, 3) + + hidden_states = self._gradient_checkpointing_func(resnet, hidden_states) + + else: + hidden_states = self.resnets[0](hidden_states) + + for attn, resnet in zip(self.attentions, self.resnets[1:]): + if attn is not None: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + hidden_states = hidden_states.permute(0, 2, 3, 4, 1).flatten(1, 3) + attention_mask = prepare_causal_attention_mask( + num_frames, height * width, hidden_states.dtype, hidden_states.device, batch_size=batch_size + ) + hidden_states = attn(hidden_states, attention_mask=attention_mask) + hidden_states = hidden_states.unflatten(1, (num_frames, height, width)).permute(0, 4, 1, 2, 3) + + hidden_states = resnet(hidden_states) + + return hidden_states + + +class HunyuanVideoDownBlock3D(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + add_downsample: bool = True, + downsample_stride: int = 2, + downsample_padding: int = 1, + ) -> None: + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + HunyuanVideoResnetBlockCausal3D( + in_channels=in_channels, + out_channels=out_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + non_linearity=resnet_act_fn, + ) + ) + + self.resnets = nn.ModuleList(resnets) + + if add_downsample: + self.downsamplers = nn.ModuleList( + [ + HunyuanVideoDownsampleCausal3D( + out_channels, + out_channels=out_channels, + padding=downsample_padding, + stride=downsample_stride, + ) + ] + ) + else: + self.downsamplers = None + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if torch.is_grad_enabled() and self.gradient_checkpointing: + for resnet in self.resnets: + hidden_states = self._gradient_checkpointing_func(resnet, hidden_states) + else: + for resnet in self.resnets: + hidden_states = resnet(hidden_states) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class HunyuanVideoUpBlock3D(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + add_upsample: bool = True, + upsample_scale_factor: tuple[int, int, int] = (2, 2, 2), + ) -> None: + super().__init__() + resnets = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + HunyuanVideoResnetBlockCausal3D( + in_channels=input_channels, + out_channels=out_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + non_linearity=resnet_act_fn, + ) + ) + + self.resnets = nn.ModuleList(resnets) + + if add_upsample: + self.upsamplers = nn.ModuleList( + [ + HunyuanVideoUpsampleCausal3D( + out_channels, + out_channels=out_channels, + upsample_factor=upsample_scale_factor, + ) + ] + ) + else: + self.upsamplers = None + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if torch.is_grad_enabled() and self.gradient_checkpointing: + for resnet in self.resnets: + hidden_states = self._gradient_checkpointing_func(resnet, hidden_states) + + else: + for resnet in self.resnets: + hidden_states = resnet(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class HunyuanVideoEncoder3D(nn.Module): + r""" + Causal encoder for 3D video-like data introduced in [Hunyuan Video](https://huggingface.co/papers/2412.03603). + """ + + def __init__( + self, + in_channels: int = 3, + out_channels: int = 3, + down_block_types: tuple[str, ...] = ( + "HunyuanVideoDownBlock3D", + "HunyuanVideoDownBlock3D", + "HunyuanVideoDownBlock3D", + "HunyuanVideoDownBlock3D", + ), + block_out_channels: tuple[int, ...] = (128, 256, 512, 512), + layers_per_block: int = 2, + norm_num_groups: int = 32, + act_fn: str = "silu", + double_z: bool = True, + mid_block_add_attention=True, + temporal_compression_ratio: int = 4, + spatial_compression_ratio: int = 8, + ) -> None: + super().__init__() + + self.conv_in = HunyuanVideoCausalConv3d(in_channels, block_out_channels[0], kernel_size=3, stride=1) + self.mid_block = None + self.down_blocks = nn.ModuleList([]) + + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + if down_block_type != "HunyuanVideoDownBlock3D": + raise ValueError(f"Unsupported down_block_type: {down_block_type}") + + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + num_spatial_downsample_layers = int(np.log2(spatial_compression_ratio)) + num_time_downsample_layers = int(np.log2(temporal_compression_ratio)) + + if temporal_compression_ratio == 4: + add_spatial_downsample = bool(i < num_spatial_downsample_layers) + add_time_downsample = bool( + i >= (len(block_out_channels) - 1 - num_time_downsample_layers) and not is_final_block + ) + elif temporal_compression_ratio == 8: + add_spatial_downsample = bool(i < num_spatial_downsample_layers) + add_time_downsample = bool(i < num_time_downsample_layers) + else: + raise ValueError(f"Unsupported time_compression_ratio: {temporal_compression_ratio}") + + downsample_stride_HW = (2, 2) if add_spatial_downsample else (1, 1) + downsample_stride_T = (2,) if add_time_downsample else (1,) + downsample_stride = tuple(downsample_stride_T + downsample_stride_HW) + + down_block = HunyuanVideoDownBlock3D( + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + add_downsample=bool(add_spatial_downsample or add_time_downsample), + resnet_eps=1e-6, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + downsample_stride=downsample_stride, + downsample_padding=0, + ) + + self.down_blocks.append(down_block) + + self.mid_block = HunyuanVideoMidBlock3D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + attention_head_dim=block_out_channels[-1], + resnet_groups=norm_num_groups, + add_attention=mid_block_add_attention, + ) + + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[-1], num_groups=norm_num_groups, eps=1e-6) + self.conv_act = nn.SiLU() + + conv_out_channels = 2 * out_channels if double_z else out_channels + self.conv_out = HunyuanVideoCausalConv3d(block_out_channels[-1], conv_out_channels, kernel_size=3) + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.conv_in(hidden_states) + + if torch.is_grad_enabled() and self.gradient_checkpointing: + for down_block in self.down_blocks: + hidden_states = self._gradient_checkpointing_func(down_block, hidden_states) + + hidden_states = self._gradient_checkpointing_func(self.mid_block, hidden_states) + else: + for down_block in self.down_blocks: + hidden_states = down_block(hidden_states) + + hidden_states = self.mid_block(hidden_states) + + hidden_states = self.conv_norm_out(hidden_states) + hidden_states = self.conv_act(hidden_states) + hidden_states = self.conv_out(hidden_states) + + return hidden_states + + +class HunyuanVideoDecoder3D(nn.Module): + r""" + Causal decoder for 3D video-like data introduced in [Hunyuan Video](https://huggingface.co/papers/2412.03603). + """ + + def __init__( + self, + in_channels: int = 3, + out_channels: int = 3, + up_block_types: tuple[str, ...] = ( + "HunyuanVideoUpBlock3D", + "HunyuanVideoUpBlock3D", + "HunyuanVideoUpBlock3D", + "HunyuanVideoUpBlock3D", + ), + block_out_channels: tuple[int, ...] = (128, 256, 512, 512), + layers_per_block: int = 2, + norm_num_groups: int = 32, + act_fn: str = "silu", + mid_block_add_attention=True, + time_compression_ratio: int = 4, + spatial_compression_ratio: int = 8, + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = HunyuanVideoCausalConv3d(in_channels, block_out_channels[-1], kernel_size=3, stride=1) + self.up_blocks = nn.ModuleList([]) + + # mid + self.mid_block = HunyuanVideoMidBlock3D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + attention_head_dim=block_out_channels[-1], + resnet_groups=norm_num_groups, + add_attention=mid_block_add_attention, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + if up_block_type != "HunyuanVideoUpBlock3D": + raise ValueError(f"Unsupported up_block_type: {up_block_type}") + + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + num_spatial_upsample_layers = int(np.log2(spatial_compression_ratio)) + num_time_upsample_layers = int(np.log2(time_compression_ratio)) + + if time_compression_ratio == 4: + add_spatial_upsample = bool(i < num_spatial_upsample_layers) + add_time_upsample = bool( + i >= len(block_out_channels) - 1 - num_time_upsample_layers and not is_final_block + ) + else: + raise ValueError(f"Unsupported time_compression_ratio: {time_compression_ratio}") + + upsample_scale_factor_HW = (2, 2) if add_spatial_upsample else (1, 1) + upsample_scale_factor_T = (2,) if add_time_upsample else (1,) + upsample_scale_factor = tuple(upsample_scale_factor_T + upsample_scale_factor_HW) + + up_block = HunyuanVideoUpBlock3D( + num_layers=self.layers_per_block + 1, + in_channels=prev_output_channel, + out_channels=output_channel, + add_upsample=bool(add_spatial_upsample or add_time_upsample), + upsample_scale_factor=upsample_scale_factor, + resnet_eps=1e-6, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + ) + + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=norm_num_groups, eps=1e-6) + self.conv_act = nn.SiLU() + self.conv_out = HunyuanVideoCausalConv3d(block_out_channels[0], out_channels, kernel_size=3) + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.conv_in(hidden_states) + + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func(self.mid_block, hidden_states) + + for up_block in self.up_blocks: + hidden_states = self._gradient_checkpointing_func(up_block, hidden_states) + else: + hidden_states = self.mid_block(hidden_states) + + for up_block in self.up_blocks: + hidden_states = up_block(hidden_states) + + # post-process + hidden_states = self.conv_norm_out(hidden_states) + hidden_states = self.conv_act(hidden_states) + hidden_states = self.conv_out(hidden_states) + + return hidden_states + + +class AutoencoderKLHunyuanVideo(ModelMixin, AutoencoderMixin, ConfigMixin): + r""" + A VAE model with KL loss for encoding videos into latents and decoding latent representations into videos. + Introduced in [HunyuanVideo](https://huggingface.co/papers/2412.03603). + + This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented + for all models (such as downloading or saving). + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + in_channels: int = 3, + out_channels: int = 3, + latent_channels: int = 16, + down_block_types: tuple[str, ...] = ( + "HunyuanVideoDownBlock3D", + "HunyuanVideoDownBlock3D", + "HunyuanVideoDownBlock3D", + "HunyuanVideoDownBlock3D", + ), + up_block_types: tuple[str, ...] = ( + "HunyuanVideoUpBlock3D", + "HunyuanVideoUpBlock3D", + "HunyuanVideoUpBlock3D", + "HunyuanVideoUpBlock3D", + ), + block_out_channels: tuple[int] = (128, 256, 512, 512), + layers_per_block: int = 2, + act_fn: str = "silu", + norm_num_groups: int = 32, + scaling_factor: float = 0.476986, + spatial_compression_ratio: int = 8, + temporal_compression_ratio: int = 4, + mid_block_add_attention: bool = True, + ) -> None: + super().__init__() + + self.time_compression_ratio = temporal_compression_ratio + + self.encoder = HunyuanVideoEncoder3D( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + norm_num_groups=norm_num_groups, + act_fn=act_fn, + double_z=True, + mid_block_add_attention=mid_block_add_attention, + temporal_compression_ratio=temporal_compression_ratio, + spatial_compression_ratio=spatial_compression_ratio, + ) + + self.decoder = HunyuanVideoDecoder3D( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + norm_num_groups=norm_num_groups, + act_fn=act_fn, + time_compression_ratio=temporal_compression_ratio, + spatial_compression_ratio=spatial_compression_ratio, + mid_block_add_attention=mid_block_add_attention, + ) + + self.quant_conv = nn.Conv3d(2 * latent_channels, 2 * latent_channels, kernel_size=1) + self.post_quant_conv = nn.Conv3d(latent_channels, latent_channels, kernel_size=1) + + self.spatial_compression_ratio = spatial_compression_ratio + self.temporal_compression_ratio = temporal_compression_ratio + + # When decoding a batch of video latents at a time, one can save memory by slicing across the batch dimension + # to perform decoding of a single video latent at a time. + self.use_slicing = False + + # When decoding spatially large video latents, the memory requirement is very high. By breaking the video latent + # frames spatially into smaller tiles and performing multiple forward passes for decoding, and then blending the + # intermediate tiles together, the memory requirement can be lowered. + self.use_tiling = False + + # When decoding temporally long video latents, the memory requirement is very high. By decoding latent frames + # at a fixed frame batch size (based on `self.tile_sample_min_num_frames`), the memory requirement can be lowered. + self.use_framewise_encoding = True + self.use_framewise_decoding = True + + # The minimal tile height and width for spatial tiling to be used + self.tile_sample_min_height = 256 + self.tile_sample_min_width = 256 + self.tile_sample_min_num_frames = 16 + + # The minimal distance between two spatial tiles + self.tile_sample_stride_height = 192 + self.tile_sample_stride_width = 192 + self.tile_sample_stride_num_frames = 12 + + def enable_tiling( + self, + tile_sample_min_height: int | None = None, + tile_sample_min_width: int | None = None, + tile_sample_min_num_frames: int | None = None, + tile_sample_stride_height: float | None = None, + tile_sample_stride_width: float | None = None, + tile_sample_stride_num_frames: float | None = None, + ) -> None: + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + + Args: + tile_sample_min_height (`int`, *optional*): + The minimum height required for a sample to be separated into tiles across the height dimension. + tile_sample_min_width (`int`, *optional*): + The minimum width required for a sample to be separated into tiles across the width dimension. + tile_sample_min_num_frames (`int`, *optional*): + The minimum number of frames required for a sample to be separated into tiles across the frame + dimension. + tile_sample_stride_height (`int`, *optional*): + The minimum amount of overlap between two consecutive vertical tiles. This is to ensure that there are + no tiling artifacts produced across the height dimension. + tile_sample_stride_width (`int`, *optional*): + The stride between two consecutive horizontal tiles. This is to ensure that there are no tiling + artifacts produced across the width dimension. + tile_sample_stride_num_frames (`int`, *optional*): + The stride between two consecutive frame tiles. This is to ensure that there are no tiling artifacts + produced across the frame dimension. + """ + self.use_tiling = True + self.tile_sample_min_height = tile_sample_min_height or self.tile_sample_min_height + self.tile_sample_min_width = tile_sample_min_width or self.tile_sample_min_width + self.tile_sample_min_num_frames = tile_sample_min_num_frames or self.tile_sample_min_num_frames + self.tile_sample_stride_height = tile_sample_stride_height or self.tile_sample_stride_height + self.tile_sample_stride_width = tile_sample_stride_width or self.tile_sample_stride_width + self.tile_sample_stride_num_frames = tile_sample_stride_num_frames or self.tile_sample_stride_num_frames + + def _encode(self, x: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = x.shape + + if self.use_framewise_encoding and num_frames > self.tile_sample_min_num_frames: + return self._temporal_tiled_encode(x) + + if self.use_tiling and (width > self.tile_sample_min_width or height > self.tile_sample_min_height): + return self.tiled_encode(x) + + x = self.encoder(x) + enc = self.quant_conv(x) + return enc + + @apply_forward_hook + def encode( + self, x: torch.Tensor, return_dict: bool = True + ) -> AutoencoderKLOutput | tuple[DiagonalGaussianDistribution]: + r""" + Encode a batch of images into latents. + + Args: + x (`torch.Tensor`): Input batch of images. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a [`~models.autoencoder_kl.AutoencoderKLOutput`] instead of a plain tuple. + + Returns: + The latent representations of the encoded videos. If `return_dict` is True, a + [`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain `tuple` is returned. + """ + if self.use_slicing and x.shape[0] > 1: + encoded_slices = [self._encode(x_slice) for x_slice in x.split(1)] + h = torch.cat(encoded_slices) + else: + h = self._encode(x) + + posterior = DiagonalGaussianDistribution(h) + + if not return_dict: + return (posterior,) + return AutoencoderKLOutput(latent_dist=posterior) + + def _decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | torch.Tensor: + batch_size, num_channels, num_frames, height, width = z.shape + tile_latent_min_height = self.tile_sample_min_height // self.spatial_compression_ratio + tile_latent_min_width = self.tile_sample_min_width // self.spatial_compression_ratio + tile_latent_min_num_frames = self.tile_sample_min_num_frames // self.temporal_compression_ratio + + if self.use_framewise_decoding and num_frames > tile_latent_min_num_frames: + return self._temporal_tiled_decode(z, return_dict=return_dict) + + if self.use_tiling and (width > tile_latent_min_width or height > tile_latent_min_height): + return self.tiled_decode(z, return_dict=return_dict) + + z = self.post_quant_conv(z) + dec = self.decoder(z) + + if not return_dict: + return (dec,) + + return DecoderOutput(sample=dec) + + @apply_forward_hook + def decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | torch.Tensor: + r""" + Decode a batch of images. + + Args: + z (`torch.Tensor`): Input batch of latent vectors. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a [`~models.vae.DecoderOutput`] instead of a plain tuple. + + Returns: + [`~models.vae.DecoderOutput`] or `tuple`: + If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is + returned. + """ + if self.use_slicing and z.shape[0] > 1: + decoded_slices = [self._decode(z_slice).sample for z_slice in z.split(1)] + decoded = torch.cat(decoded_slices) + else: + decoded = self._decode(z).sample + + if not return_dict: + return (decoded,) + + return DecoderOutput(sample=decoded) + + def blend_v(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int) -> torch.Tensor: + blend_extent = min(a.shape[-2], b.shape[-2], blend_extent) + for y in range(blend_extent): + b[:, :, :, y, :] = a[:, :, :, -blend_extent + y, :] * (1 - y / blend_extent) + b[:, :, :, y, :] * ( + y / blend_extent + ) + return b + + def blend_h(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int) -> torch.Tensor: + blend_extent = min(a.shape[-1], b.shape[-1], blend_extent) + for x in range(blend_extent): + b[:, :, :, :, x] = a[:, :, :, :, -blend_extent + x] * (1 - x / blend_extent) + b[:, :, :, :, x] * ( + x / blend_extent + ) + return b + + def blend_t(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int) -> torch.Tensor: + blend_extent = min(a.shape[-3], b.shape[-3], blend_extent) + for x in range(blend_extent): + b[:, :, x, :, :] = a[:, :, -blend_extent + x, :, :] * (1 - x / blend_extent) + b[:, :, x, :, :] * ( + x / blend_extent + ) + return b + + def tiled_encode(self, x: torch.Tensor) -> AutoencoderKLOutput: + r"""Encode a batch of images using a tiled encoder. + + Args: + x (`torch.Tensor`): Input batch of videos. + + Returns: + `torch.Tensor`: + The latent representation of the encoded videos. + """ + batch_size, num_channels, num_frames, height, width = x.shape + latent_height = height // self.spatial_compression_ratio + latent_width = width // self.spatial_compression_ratio + + tile_latent_min_height = self.tile_sample_min_height // self.spatial_compression_ratio + tile_latent_min_width = self.tile_sample_min_width // self.spatial_compression_ratio + tile_latent_stride_height = self.tile_sample_stride_height // self.spatial_compression_ratio + tile_latent_stride_width = self.tile_sample_stride_width // self.spatial_compression_ratio + + blend_height = tile_latent_min_height - tile_latent_stride_height + blend_width = tile_latent_min_width - tile_latent_stride_width + + # Split x into overlapping tiles and encode them separately. + # The tiles have an overlap to avoid seams between tiles. + rows = [] + for i in range(0, height, self.tile_sample_stride_height): + row = [] + for j in range(0, width, self.tile_sample_stride_width): + tile = x[:, :, :, i : i + self.tile_sample_min_height, j : j + self.tile_sample_min_width] + tile = self.encoder(tile) + tile = self.quant_conv(tile) + row.append(tile) + rows.append(row) + + result_rows = [] + for i, row in enumerate(rows): + result_row = [] + for j, tile in enumerate(row): + # blend the above tile and the left tile + # to the current tile and add the current tile to the result row + if i > 0: + tile = self.blend_v(rows[i - 1][j], tile, blend_height) + if j > 0: + tile = self.blend_h(row[j - 1], tile, blend_width) + result_row.append(tile[:, :, :, :tile_latent_stride_height, :tile_latent_stride_width]) + result_rows.append(torch.cat(result_row, dim=4)) + + enc = torch.cat(result_rows, dim=3)[:, :, :, :latent_height, :latent_width] + return enc + + def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | torch.Tensor: + r""" + Decode a batch of images using a tiled decoder. + + Args: + z (`torch.Tensor`): Input batch of latent vectors. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.vae.DecoderOutput`] instead of a plain tuple. + + Returns: + [`~models.vae.DecoderOutput`] or `tuple`: + If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is + returned. + """ + + batch_size, num_channels, num_frames, height, width = z.shape + sample_height = height * self.spatial_compression_ratio + sample_width = width * self.spatial_compression_ratio + + tile_latent_min_height = self.tile_sample_min_height // self.spatial_compression_ratio + tile_latent_min_width = self.tile_sample_min_width // self.spatial_compression_ratio + tile_latent_stride_height = self.tile_sample_stride_height // self.spatial_compression_ratio + tile_latent_stride_width = self.tile_sample_stride_width // self.spatial_compression_ratio + + blend_height = self.tile_sample_min_height - self.tile_sample_stride_height + blend_width = self.tile_sample_min_width - self.tile_sample_stride_width + + # Split z into overlapping tiles and decode them separately. + # The tiles have an overlap to avoid seams between tiles. + rows = [] + for i in range(0, height, tile_latent_stride_height): + row = [] + for j in range(0, width, tile_latent_stride_width): + tile = z[:, :, :, i : i + tile_latent_min_height, j : j + tile_latent_min_width] + tile = self.post_quant_conv(tile) + decoded = self.decoder(tile) + row.append(decoded) + rows.append(row) + + result_rows = [] + for i, row in enumerate(rows): + result_row = [] + for j, tile in enumerate(row): + # blend the above tile and the left tile + # to the current tile and add the current tile to the result row + if i > 0: + tile = self.blend_v(rows[i - 1][j], tile, blend_height) + if j > 0: + tile = self.blend_h(row[j - 1], tile, blend_width) + result_row.append(tile[:, :, :, : self.tile_sample_stride_height, : self.tile_sample_stride_width]) + result_rows.append(torch.cat(result_row, dim=-1)) + + dec = torch.cat(result_rows, dim=3)[:, :, :, :sample_height, :sample_width] + + if not return_dict: + return (dec,) + return DecoderOutput(sample=dec) + + def _temporal_tiled_encode(self, x: torch.Tensor) -> AutoencoderKLOutput: + batch_size, num_channels, num_frames, height, width = x.shape + latent_num_frames = (num_frames - 1) // self.temporal_compression_ratio + 1 + + tile_latent_min_num_frames = self.tile_sample_min_num_frames // self.temporal_compression_ratio + tile_latent_stride_num_frames = self.tile_sample_stride_num_frames // self.temporal_compression_ratio + blend_num_frames = tile_latent_min_num_frames - tile_latent_stride_num_frames + + row = [] + for i in range(0, num_frames, self.tile_sample_stride_num_frames): + tile = x[:, :, i : i + self.tile_sample_min_num_frames + 1, :, :] + if self.use_tiling and (height > self.tile_sample_min_height or width > self.tile_sample_min_width): + tile = self.tiled_encode(tile) + else: + tile = self.encoder(tile) + tile = self.quant_conv(tile) + if i > 0: + tile = tile[:, :, 1:, :, :] + row.append(tile) + + result_row = [] + for i, tile in enumerate(row): + if i > 0: + tile = self.blend_t(row[i - 1], tile, blend_num_frames) + result_row.append(tile[:, :, :tile_latent_stride_num_frames, :, :]) + else: + result_row.append(tile[:, :, : tile_latent_stride_num_frames + 1, :, :]) + + enc = torch.cat(result_row, dim=2)[:, :, :latent_num_frames] + return enc + + def _temporal_tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | torch.Tensor: + batch_size, num_channels, num_frames, height, width = z.shape + num_sample_frames = (num_frames - 1) * self.temporal_compression_ratio + 1 + + tile_latent_min_height = self.tile_sample_min_height // self.spatial_compression_ratio + tile_latent_min_width = self.tile_sample_min_width // self.spatial_compression_ratio + tile_latent_min_num_frames = self.tile_sample_min_num_frames // self.temporal_compression_ratio + tile_latent_stride_num_frames = self.tile_sample_stride_num_frames // self.temporal_compression_ratio + blend_num_frames = self.tile_sample_min_num_frames - self.tile_sample_stride_num_frames + + row = [] + for i in range(0, num_frames, tile_latent_stride_num_frames): + tile = z[:, :, i : i + tile_latent_min_num_frames + 1, :, :] + if self.use_tiling and (tile.shape[-1] > tile_latent_min_width or tile.shape[-2] > tile_latent_min_height): + decoded = self.tiled_decode(tile, return_dict=True).sample + else: + tile = self.post_quant_conv(tile) + decoded = self.decoder(tile) + if i > 0: + decoded = decoded[:, :, 1:, :, :] + row.append(decoded) + + result_row = [] + for i, tile in enumerate(row): + if i > 0: + tile = self.blend_t(row[i - 1], tile, blend_num_frames) + result_row.append(tile[:, :, : self.tile_sample_stride_num_frames, :, :]) + else: + result_row.append(tile[:, :, : self.tile_sample_stride_num_frames + 1, :, :]) + + dec = torch.cat(result_row, dim=2)[:, :, :num_sample_frames] + + if not return_dict: + return (dec,) + return DecoderOutput(sample=dec) + + def forward( + self, + sample: torch.Tensor, + sample_posterior: bool = False, + return_dict: bool = True, + generator: torch.Generator | None = None, + ) -> DecoderOutput | torch.Tensor: + r""" + Args: + sample (`torch.Tensor`): Input sample. + sample_posterior (`bool`, *optional*, defaults to `False`): + Whether to sample from the posterior. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`DecoderOutput`] instead of a plain tuple. + """ + x = sample + posterior = self.encode(x).latent_dist + if sample_posterior: + z = posterior.sample(generator=generator) + else: + z = posterior.mode() + dec = self.decode(z, return_dict=return_dict) + return dec diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_magvit.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_magvit.py new file mode 100644 index 0000000000000000000000000000000000000000..ea0e2cd00d52ff2c4d6e2153cf00c60606a07e52 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_kl_magvit.py @@ -0,0 +1,1072 @@ +# Copyright 2025 The EasyAnimate team and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ...utils.accelerate_utils import apply_forward_hook +from ..activations import get_activation +from ..modeling_outputs import AutoencoderKLOutput +from ..modeling_utils import ModelMixin +from .vae import AutoencoderMixin, DecoderOutput, DiagonalGaussianDistribution + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class EasyAnimateCausalConv3d(nn.Conv3d): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int | tuple[int, ...] = 3, + stride: int | tuple[int, ...] = 1, + padding: int | tuple[int, ...] = 1, + dilation: int | tuple[int, ...] = 1, + groups: int = 1, + bias: bool = True, + padding_mode: str = "zeros", + ): + # Ensure kernel_size, stride, and dilation are tuples of length 3 + kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size,) * 3 + assert len(kernel_size) == 3, f"Kernel size must be a 3-tuple, got {kernel_size} instead." + + stride = stride if isinstance(stride, tuple) else (stride,) * 3 + assert len(stride) == 3, f"Stride must be a 3-tuple, got {stride} instead." + + dilation = dilation if isinstance(dilation, tuple) else (dilation,) * 3 + assert len(dilation) == 3, f"Dilation must be a 3-tuple, got {dilation} instead." + + # Unpack kernel size, stride, and dilation for temporal, height, and width dimensions + t_ks, h_ks, w_ks = kernel_size + self.t_stride, h_stride, w_stride = stride + t_dilation, h_dilation, w_dilation = dilation + + # Calculate padding for temporal dimension to maintain causality + t_pad = (t_ks - 1) * t_dilation + + # Calculate padding for height and width dimensions based on the padding parameter + if padding is None: + h_pad = math.ceil(((h_ks - 1) * h_dilation + (1 - h_stride)) / 2) + w_pad = math.ceil(((w_ks - 1) * w_dilation + (1 - w_stride)) / 2) + elif isinstance(padding, int): + h_pad = w_pad = padding + else: + assert NotImplementedError + + # Store temporal padding and initialize flags and previous features cache + self.temporal_padding = t_pad + self.temporal_padding_origin = math.ceil(((t_ks - 1) * w_dilation + (1 - w_stride)) / 2) + + self.prev_features = None + + # Initialize the parent class with modified padding + super().__init__( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=(0, h_pad, w_pad), + groups=groups, + bias=bias, + padding_mode=padding_mode, + ) + + def _clear_conv_cache(self): + del self.prev_features + self.prev_features = None + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + # Ensure input tensor is of the correct type + dtype = hidden_states.dtype + if self.prev_features is None: + # Pad the input tensor in the temporal dimension to maintain causality + hidden_states = F.pad( + hidden_states, + pad=(0, 0, 0, 0, self.temporal_padding, 0), + mode="replicate", # TODO: check if this is necessary + ) + hidden_states = hidden_states.to(dtype=dtype) + + # Clear cache before processing and store previous features for causality + self._clear_conv_cache() + self.prev_features = hidden_states[:, :, -self.temporal_padding :].clone() + + # Process the input tensor in chunks along the temporal dimension + num_frames = hidden_states.size(2) + outputs = [] + i = 0 + while i + self.temporal_padding + 1 <= num_frames: + out = super().forward(hidden_states[:, :, i : i + self.temporal_padding + 1]) + i += self.t_stride + outputs.append(out) + return torch.concat(outputs, 2) + else: + # Concatenate previous features with the input tensor for continuous temporal processing + if self.t_stride == 2: + hidden_states = torch.concat( + [self.prev_features[:, :, -(self.temporal_padding - 1) :], hidden_states], dim=2 + ) + else: + hidden_states = torch.concat([self.prev_features, hidden_states], dim=2) + hidden_states = hidden_states.to(dtype=dtype) + + # Clear cache and update previous features + self._clear_conv_cache() + self.prev_features = hidden_states[:, :, -self.temporal_padding :].clone() + + # Process the concatenated tensor in chunks along the temporal dimension + num_frames = hidden_states.size(2) + outputs = [] + i = 0 + while i + self.temporal_padding + 1 <= num_frames: + out = super().forward(hidden_states[:, :, i : i + self.temporal_padding + 1]) + i += self.t_stride + outputs.append(out) + return torch.concat(outputs, 2) + + +class EasyAnimateResidualBlock3D(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + non_linearity: str = "silu", + norm_num_groups: int = 32, + norm_eps: float = 1e-6, + spatial_group_norm: bool = True, + dropout: float = 0.0, + output_scale_factor: float = 1.0, + ): + super().__init__() + + self.output_scale_factor = output_scale_factor + + # Group normalization for input tensor + self.norm1 = nn.GroupNorm( + num_groups=norm_num_groups, + num_channels=in_channels, + eps=norm_eps, + affine=True, + ) + self.nonlinearity = get_activation(non_linearity) + self.conv1 = EasyAnimateCausalConv3d(in_channels, out_channels, kernel_size=3) + + self.norm2 = nn.GroupNorm(num_groups=norm_num_groups, num_channels=out_channels, eps=norm_eps, affine=True) + self.dropout = nn.Dropout(dropout) + self.conv2 = EasyAnimateCausalConv3d(out_channels, out_channels, kernel_size=3) + + if in_channels != out_channels: + self.shortcut = nn.Conv3d(in_channels, out_channels, kernel_size=1) + else: + self.shortcut = nn.Identity() + + self.spatial_group_norm = spatial_group_norm + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + shortcut = self.shortcut(hidden_states) + + if self.spatial_group_norm: + batch_size = hidden_states.size(0) + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).flatten(0, 1) # [B, C, T, H, W] -> [B * T, C, H, W] + hidden_states = self.norm1(hidden_states) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)).permute( + 0, 2, 1, 3, 4 + ) # [B * T, C, H, W] -> [B, C, T, H, W] + else: + hidden_states = self.norm1(hidden_states) + + hidden_states = self.nonlinearity(hidden_states) + hidden_states = self.conv1(hidden_states) + + if self.spatial_group_norm: + batch_size = hidden_states.size(0) + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).flatten(0, 1) # [B, C, T, H, W] -> [B * T, C, H, W] + hidden_states = self.norm2(hidden_states) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)).permute( + 0, 2, 1, 3, 4 + ) # [B * T, C, H, W] -> [B, C, T, H, W] + else: + hidden_states = self.norm2(hidden_states) + + hidden_states = self.nonlinearity(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.conv2(hidden_states) + + return (hidden_states + shortcut) / self.output_scale_factor + + +class EasyAnimateDownsampler3D(nn.Module): + def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3, stride: tuple = (2, 2, 2)): + super().__init__() + + self.conv = EasyAnimateCausalConv3d( + in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=0 + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = F.pad(hidden_states, (0, 1, 0, 1)) + hidden_states = self.conv(hidden_states) + return hidden_states + + +class EasyAnimateUpsampler3D(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + temporal_upsample: bool = False, + spatial_group_norm: bool = True, + ): + super().__init__() + out_channels = out_channels or in_channels + + self.temporal_upsample = temporal_upsample + self.spatial_group_norm = spatial_group_norm + + self.conv = EasyAnimateCausalConv3d( + in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size + ) + self.prev_features = None + + def _clear_conv_cache(self): + del self.prev_features + self.prev_features = None + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = F.interpolate(hidden_states, scale_factor=(1, 2, 2), mode="nearest") + hidden_states = self.conv(hidden_states) + + if self.temporal_upsample: + if self.prev_features is None: + self.prev_features = hidden_states + else: + hidden_states = F.interpolate( + hidden_states, + scale_factor=(2, 1, 1), + mode="trilinear" if not self.spatial_group_norm else "nearest", + ) + return hidden_states + + +class EasyAnimateDownBlock3D(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + num_layers: int = 1, + act_fn: str = "silu", + norm_num_groups: int = 32, + norm_eps: float = 1e-6, + spatial_group_norm: bool = True, + dropout: float = 0.0, + output_scale_factor: float = 1.0, + add_downsample: bool = True, + add_temporal_downsample: bool = True, + ): + super().__init__() + + self.convs = nn.ModuleList([]) + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.convs.append( + EasyAnimateResidualBlock3D( + in_channels=in_channels, + out_channels=out_channels, + non_linearity=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=norm_eps, + spatial_group_norm=spatial_group_norm, + dropout=dropout, + output_scale_factor=output_scale_factor, + ) + ) + + if add_downsample and add_temporal_downsample: + self.downsampler = EasyAnimateDownsampler3D(out_channels, out_channels, kernel_size=3, stride=(2, 2, 2)) + self.spatial_downsample_factor = 2 + self.temporal_downsample_factor = 2 + elif add_downsample and not add_temporal_downsample: + self.downsampler = EasyAnimateDownsampler3D(out_channels, out_channels, kernel_size=3, stride=(1, 2, 2)) + self.spatial_downsample_factor = 2 + self.temporal_downsample_factor = 1 + else: + self.downsampler = None + self.spatial_downsample_factor = 1 + self.temporal_downsample_factor = 1 + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + for conv in self.convs: + hidden_states = conv(hidden_states) + if self.downsampler is not None: + hidden_states = self.downsampler(hidden_states) + return hidden_states + + +class EasyAnimateUpBlock3d(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + num_layers: int = 1, + act_fn: str = "silu", + norm_num_groups: int = 32, + norm_eps: float = 1e-6, + spatial_group_norm: bool = False, + dropout: float = 0.0, + output_scale_factor: float = 1.0, + add_upsample: bool = True, + add_temporal_upsample: bool = True, + ): + super().__init__() + + self.convs = nn.ModuleList([]) + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.convs.append( + EasyAnimateResidualBlock3D( + in_channels=in_channels, + out_channels=out_channels, + non_linearity=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=norm_eps, + spatial_group_norm=spatial_group_norm, + dropout=dropout, + output_scale_factor=output_scale_factor, + ) + ) + + if add_upsample: + self.upsampler = EasyAnimateUpsampler3D( + in_channels, + in_channels, + temporal_upsample=add_temporal_upsample, + spatial_group_norm=spatial_group_norm, + ) + else: + self.upsampler = None + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + for conv in self.convs: + hidden_states = conv(hidden_states) + if self.upsampler is not None: + hidden_states = self.upsampler(hidden_states) + return hidden_states + + +class EasyAnimateMidBlock3d(nn.Module): + def __init__( + self, + in_channels: int, + num_layers: int = 1, + act_fn: str = "silu", + norm_num_groups: int = 32, + norm_eps: float = 1e-6, + spatial_group_norm: bool = True, + dropout: float = 0.0, + output_scale_factor: float = 1.0, + ): + super().__init__() + + norm_num_groups = norm_num_groups if norm_num_groups is not None else min(in_channels // 4, 32) + + self.convs = nn.ModuleList( + [ + EasyAnimateResidualBlock3D( + in_channels=in_channels, + out_channels=in_channels, + non_linearity=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=norm_eps, + spatial_group_norm=spatial_group_norm, + dropout=dropout, + output_scale_factor=output_scale_factor, + ) + ] + ) + + for _ in range(num_layers - 1): + self.convs.append( + EasyAnimateResidualBlock3D( + in_channels=in_channels, + out_channels=in_channels, + non_linearity=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=norm_eps, + spatial_group_norm=spatial_group_norm, + dropout=dropout, + output_scale_factor=output_scale_factor, + ) + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.convs[0](hidden_states) + for resnet in self.convs[1:]: + hidden_states = resnet(hidden_states) + return hidden_states + + +class EasyAnimateEncoder(nn.Module): + r""" + Causal encoder for 3D video-like data used in [EasyAnimate](https://huggingface.co/papers/2405.18991). + """ + + _supports_gradient_checkpointing = True + + def __init__( + self, + in_channels: int = 3, + out_channels: int = 8, + down_block_types: tuple[str, ...] = ( + "SpatialDownBlock3D", + "SpatialTemporalDownBlock3D", + "SpatialTemporalDownBlock3D", + "SpatialTemporalDownBlock3D", + ), + block_out_channels: tuple[int, ...] = [128, 256, 512, 512], + layers_per_block: int = 2, + norm_num_groups: int = 32, + act_fn: str = "silu", + double_z: bool = True, + spatial_group_norm: bool = False, + ): + super().__init__() + + # 1. Input convolution + self.conv_in = EasyAnimateCausalConv3d(in_channels, block_out_channels[0], kernel_size=3) + + # 2. Down blocks + self.down_blocks = nn.ModuleList([]) + output_channels = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channels = output_channels + output_channels = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + if down_block_type == "SpatialDownBlock3D": + down_block = EasyAnimateDownBlock3D( + in_channels=input_channels, + out_channels=output_channels, + num_layers=layers_per_block, + act_fn=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=1e-6, + spatial_group_norm=spatial_group_norm, + add_downsample=not is_final_block, + add_temporal_downsample=False, + ) + elif down_block_type == "SpatialTemporalDownBlock3D": + down_block = EasyAnimateDownBlock3D( + in_channels=input_channels, + out_channels=output_channels, + num_layers=layers_per_block, + act_fn=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=1e-6, + spatial_group_norm=spatial_group_norm, + add_downsample=not is_final_block, + add_temporal_downsample=True, + ) + else: + raise ValueError(f"Unknown up block type: {down_block_type}") + self.down_blocks.append(down_block) + + # 3. Middle block + self.mid_block = EasyAnimateMidBlock3d( + in_channels=block_out_channels[-1], + num_layers=layers_per_block, + act_fn=act_fn, + spatial_group_norm=spatial_group_norm, + norm_num_groups=norm_num_groups, + norm_eps=1e-6, + dropout=0, + output_scale_factor=1, + ) + + # 4. Output normalization & convolution + self.spatial_group_norm = spatial_group_norm + self.conv_norm_out = nn.GroupNorm( + num_channels=block_out_channels[-1], + num_groups=norm_num_groups, + eps=1e-6, + ) + self.conv_act = get_activation(act_fn) + + # Initialize the output convolution layer + conv_out_channels = 2 * out_channels if double_z else out_channels + self.conv_out = EasyAnimateCausalConv3d(block_out_channels[-1], conv_out_channels, kernel_size=3) + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + # hidden_states: (B, C, T, H, W) + hidden_states = self.conv_in(hidden_states) + + for down_block in self.down_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func(down_block, hidden_states) + else: + hidden_states = down_block(hidden_states) + + hidden_states = self.mid_block(hidden_states) + + if self.spatial_group_norm: + batch_size = hidden_states.size(0) + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).flatten(0, 1) + hidden_states = self.conv_norm_out(hidden_states) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)).permute(0, 2, 1, 3, 4) + else: + hidden_states = self.conv_norm_out(hidden_states) + + hidden_states = self.conv_act(hidden_states) + hidden_states = self.conv_out(hidden_states) + return hidden_states + + +class EasyAnimateDecoder(nn.Module): + r""" + Causal decoder for 3D video-like data used in [EasyAnimate](https://huggingface.co/papers/2405.18991). + """ + + _supports_gradient_checkpointing = True + + def __init__( + self, + in_channels: int = 8, + out_channels: int = 3, + up_block_types: tuple[str, ...] = ( + "SpatialUpBlock3D", + "SpatialTemporalUpBlock3D", + "SpatialTemporalUpBlock3D", + "SpatialTemporalUpBlock3D", + ), + block_out_channels: tuple[int, ...] = [128, 256, 512, 512], + layers_per_block: int = 2, + norm_num_groups: int = 32, + act_fn: str = "silu", + spatial_group_norm: bool = False, + ): + super().__init__() + + # 1. Input convolution + self.conv_in = EasyAnimateCausalConv3d(in_channels, block_out_channels[-1], kernel_size=3) + + # 2. Middle block + self.mid_block = EasyAnimateMidBlock3d( + in_channels=block_out_channels[-1], + num_layers=layers_per_block, + act_fn=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=1e-6, + dropout=0, + output_scale_factor=1, + ) + + # 3. Up blocks + self.up_blocks = nn.ModuleList([]) + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channels = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + input_channels = output_channels + output_channels = reversed_block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + # Create and append up block to up_blocks + if up_block_type == "SpatialUpBlock3D": + up_block = EasyAnimateUpBlock3d( + in_channels=input_channels, + out_channels=output_channels, + num_layers=layers_per_block + 1, + act_fn=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=1e-6, + spatial_group_norm=spatial_group_norm, + add_upsample=not is_final_block, + add_temporal_upsample=False, + ) + elif up_block_type == "SpatialTemporalUpBlock3D": + up_block = EasyAnimateUpBlock3d( + in_channels=input_channels, + out_channels=output_channels, + num_layers=layers_per_block + 1, + act_fn=act_fn, + norm_num_groups=norm_num_groups, + norm_eps=1e-6, + spatial_group_norm=spatial_group_norm, + add_upsample=not is_final_block, + add_temporal_upsample=True, + ) + else: + raise ValueError(f"Unknown up block type: {up_block_type}") + self.up_blocks.append(up_block) + + # Output normalization and activation + self.spatial_group_norm = spatial_group_norm + self.conv_norm_out = nn.GroupNorm( + num_channels=block_out_channels[0], + num_groups=norm_num_groups, + eps=1e-6, + ) + self.conv_act = get_activation(act_fn) + + # Output convolution layer + self.conv_out = EasyAnimateCausalConv3d(block_out_channels[0], out_channels, kernel_size=3) + + self.gradient_checkpointing = False + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + # hidden_states: (B, C, T, H, W) + hidden_states = self.conv_in(hidden_states) + + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func(self.mid_block, hidden_states) + else: + hidden_states = self.mid_block(hidden_states) + + for up_block in self.up_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func(up_block, hidden_states) + else: + hidden_states = up_block(hidden_states) + + if self.spatial_group_norm: + batch_size = hidden_states.size(0) + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).flatten(0, 1) # [B, C, T, H, W] -> [B * T, C, H, W] + hidden_states = self.conv_norm_out(hidden_states) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)).permute( + 0, 2, 1, 3, 4 + ) # [B * T, C, H, W] -> [B, C, T, H, W] + else: + hidden_states = self.conv_norm_out(hidden_states) + + hidden_states = self.conv_act(hidden_states) + hidden_states = self.conv_out(hidden_states) + return hidden_states + + +class AutoencoderKLMagvit(ModelMixin, AutoencoderMixin, ConfigMixin): + r""" + A VAE model with KL loss for encoding images into latents and decoding latent representations into images. This + model is used in [EasyAnimate](https://huggingface.co/papers/2405.18991). + + This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented + for all models (such as downloading or saving). + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + in_channels: int = 3, + latent_channels: int = 16, + out_channels: int = 3, + block_out_channels: tuple[int, ...] = [128, 256, 512, 512], + down_block_types: tuple[str, ...] = [ + "SpatialDownBlock3D", + "SpatialTemporalDownBlock3D", + "SpatialTemporalDownBlock3D", + "SpatialTemporalDownBlock3D", + ], + up_block_types: tuple[str, ...] = [ + "SpatialUpBlock3D", + "SpatialTemporalUpBlock3D", + "SpatialTemporalUpBlock3D", + "SpatialTemporalUpBlock3D", + ], + layers_per_block: int = 2, + act_fn: str = "silu", + norm_num_groups: int = 32, + scaling_factor: float = 0.7125, + spatial_group_norm: bool = True, + ): + super().__init__() + + # Initialize the encoder + self.encoder = EasyAnimateEncoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + norm_num_groups=norm_num_groups, + act_fn=act_fn, + double_z=True, + spatial_group_norm=spatial_group_norm, + ) + + # Initialize the decoder + self.decoder = EasyAnimateDecoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + norm_num_groups=norm_num_groups, + act_fn=act_fn, + spatial_group_norm=spatial_group_norm, + ) + + # Initialize convolution layers for quantization and post-quantization + self.quant_conv = nn.Conv3d(2 * latent_channels, 2 * latent_channels, kernel_size=1) + self.post_quant_conv = nn.Conv3d(latent_channels, latent_channels, kernel_size=1) + + self.spatial_compression_ratio = 2 ** (len(block_out_channels) - 1) + self.temporal_compression_ratio = 2 ** (len(block_out_channels) - 2) + + # When decoding a batch of video latents at a time, one can save memory by slicing across the batch dimension + # to perform decoding of a single video latent at a time. + self.use_slicing = False + + # When decoding spatially large video latents, the memory requirement is very high. By breaking the video latent + # frames spatially into smaller tiles and performing multiple forward passes for decoding, and then blending the + # intermediate tiles together, the memory requirement can be lowered. + self.use_tiling = False + + # When decoding temporally long video latents, the memory requirement is very high. By decoding latent frames + # at a fixed frame batch size (based on `self.num_latent_frames_batch_size`), the memory requirement can be lowered. + self.use_framewise_encoding = False + self.use_framewise_decoding = False + + # Assign mini-batch sizes for encoder and decoder + self.num_sample_frames_batch_size = 4 + self.num_latent_frames_batch_size = 1 + + # The minimal tile height and width for spatial tiling to be used + self.tile_sample_min_height = 512 + self.tile_sample_min_width = 512 + self.tile_sample_min_num_frames = 4 + + # The minimal distance between two spatial tiles + self.tile_sample_stride_height = 448 + self.tile_sample_stride_width = 448 + self.tile_sample_stride_num_frames = 8 + + def _clear_conv_cache(self): + # Clear cache for convolutional layers if needed + for name, module in self.named_modules(): + if isinstance(module, EasyAnimateCausalConv3d): + module._clear_conv_cache() + if isinstance(module, EasyAnimateUpsampler3D): + module._clear_conv_cache() + + def enable_tiling( + self, + tile_sample_min_height: int | None = None, + tile_sample_min_width: int | None = None, + tile_sample_min_num_frames: int | None = None, + tile_sample_stride_height: float | None = None, + tile_sample_stride_width: float | None = None, + tile_sample_stride_num_frames: float | None = None, + ) -> None: + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + + Args: + tile_sample_min_height (`int`, *optional*): + The minimum height required for a sample to be separated into tiles across the height dimension. + tile_sample_min_width (`int`, *optional*): + The minimum width required for a sample to be separated into tiles across the width dimension. + tile_sample_stride_height (`int`, *optional*): + The minimum amount of overlap between two consecutive vertical tiles. This is to ensure that there are + no tiling artifacts produced across the height dimension. + tile_sample_stride_width (`int`, *optional*): + The stride between two consecutive horizontal tiles. This is to ensure that there are no tiling + artifacts produced across the width dimension. + """ + self.use_tiling = True + self.use_framewise_decoding = True + self.use_framewise_encoding = True + self.tile_sample_min_height = tile_sample_min_height or self.tile_sample_min_height + self.tile_sample_min_width = tile_sample_min_width or self.tile_sample_min_width + self.tile_sample_min_num_frames = tile_sample_min_num_frames or self.tile_sample_min_num_frames + self.tile_sample_stride_height = tile_sample_stride_height or self.tile_sample_stride_height + self.tile_sample_stride_width = tile_sample_stride_width or self.tile_sample_stride_width + self.tile_sample_stride_num_frames = tile_sample_stride_num_frames or self.tile_sample_stride_num_frames + + @apply_forward_hook + def _encode( + self, x: torch.Tensor, return_dict: bool = True + ) -> AutoencoderKLOutput | tuple[DiagonalGaussianDistribution]: + """ + Encode a batch of images into latents. + + Args: + x (`torch.Tensor`): Input batch of images. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a [`~models.autoencoder_kl.AutoencoderKLOutput`] instead of a plain tuple. + + Returns: + The latent representations of the encoded images. If `return_dict` is True, a + [`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain `tuple` is returned. + """ + if self.use_tiling and (x.shape[-1] > self.tile_sample_min_height or x.shape[-2] > self.tile_sample_min_width): + return self.tiled_encode(x, return_dict=return_dict) + + first_frames = self.encoder(x[:, :, :1, :, :]) + h = [first_frames] + for i in range(1, x.shape[2], self.num_sample_frames_batch_size): + next_frames = self.encoder(x[:, :, i : i + self.num_sample_frames_batch_size, :, :]) + h.append(next_frames) + h = torch.cat(h, dim=2) + moments = self.quant_conv(h) + + self._clear_conv_cache() + return moments + + @apply_forward_hook + def encode( + self, x: torch.Tensor, return_dict: bool = True + ) -> AutoencoderKLOutput | tuple[DiagonalGaussianDistribution]: + """ + Encode a batch of images into latents. + + Args: + x (`torch.Tensor`): Input batch of images. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a [`~models.autoencoder_kl.AutoencoderKLOutput`] instead of a plain tuple. + + Returns: + The latent representations of the encoded videos. If `return_dict` is True, a + [`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain `tuple` is returned. + """ + if self.use_slicing and x.shape[0] > 1: + encoded_slices = [self._encode(x_slice) for x_slice in x.split(1)] + h = torch.cat(encoded_slices) + else: + h = self._encode(x) + + posterior = DiagonalGaussianDistribution(h) + + if not return_dict: + return (posterior,) + return AutoencoderKLOutput(latent_dist=posterior) + + def _decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | torch.Tensor: + batch_size, num_channels, num_frames, height, width = z.shape + tile_latent_min_height = self.tile_sample_min_height // self.spatial_compression_ratio + tile_latent_min_width = self.tile_sample_min_width // self.spatial_compression_ratio + + if self.use_tiling and (z.shape[-1] > tile_latent_min_height or z.shape[-2] > tile_latent_min_width): + return self.tiled_decode(z, return_dict=return_dict) + + z = self.post_quant_conv(z) + + # Process the first frame and save the result + first_frames = self.decoder(z[:, :, :1, :, :]) + # Initialize the list to store the processed frames, starting with the first frame + dec = [first_frames] + # Process the remaining frames, with the number of frames processed at a time determined by mini_batch_decoder + for i in range(1, z.shape[2], self.num_latent_frames_batch_size): + next_frames = self.decoder(z[:, :, i : i + self.num_latent_frames_batch_size, :, :]) + dec.append(next_frames) + # Concatenate all processed frames along the channel dimension + dec = torch.cat(dec, dim=2) + + if not return_dict: + return (dec,) + + return DecoderOutput(sample=dec) + + @apply_forward_hook + def decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | torch.Tensor: + """ + Decode a batch of images. + + Args: + z (`torch.Tensor`): Input batch of latent vectors. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a [`~models.vae.DecoderOutput`] instead of a plain tuple. + + Returns: + [`~models.vae.DecoderOutput`] or `tuple`: + If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is + returned. + """ + if self.use_slicing and z.shape[0] > 1: + decoded_slices = [self._decode(z_slice).sample for z_slice in z.split(1)] + decoded = torch.cat(decoded_slices) + else: + decoded = self._decode(z).sample + + self._clear_conv_cache() + if not return_dict: + return (decoded,) + return DecoderOutput(sample=decoded) + + def blend_v(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int) -> torch.Tensor: + blend_extent = min(a.shape[3], b.shape[3], blend_extent) + for y in range(blend_extent): + b[:, :, :, y, :] = a[:, :, :, -blend_extent + y, :] * (1 - y / blend_extent) + b[:, :, :, y, :] * ( + y / blend_extent + ) + return b + + def blend_h(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int) -> torch.Tensor: + blend_extent = min(a.shape[4], b.shape[4], blend_extent) + for x in range(blend_extent): + b[:, :, :, :, x] = a[:, :, :, :, -blend_extent + x] * (1 - x / blend_extent) + b[:, :, :, :, x] * ( + x / blend_extent + ) + return b + + def tiled_encode(self, x: torch.Tensor, return_dict: bool = True) -> AutoencoderKLOutput: + batch_size, num_channels, num_frames, height, width = x.shape + latent_height = height // self.spatial_compression_ratio + latent_width = width // self.spatial_compression_ratio + + tile_latent_min_height = self.tile_sample_min_height // self.spatial_compression_ratio + tile_latent_min_width = self.tile_sample_min_width // self.spatial_compression_ratio + tile_latent_stride_height = self.tile_sample_stride_height // self.spatial_compression_ratio + tile_latent_stride_width = self.tile_sample_stride_width // self.spatial_compression_ratio + + blend_height = tile_latent_min_height - tile_latent_stride_height + blend_width = tile_latent_min_width - tile_latent_stride_width + + # Split the image into 512x512 tiles and encode them separately. + rows = [] + for i in range(0, height, self.tile_sample_stride_height): + row = [] + for j in range(0, width, self.tile_sample_stride_width): + tile = x[ + :, + :, + :, + i : i + self.tile_sample_min_height, + j : j + self.tile_sample_min_width, + ] + + first_frames = self.encoder(tile[:, :, 0:1, :, :]) + tile_h = [first_frames] + for k in range(1, num_frames, self.num_sample_frames_batch_size): + next_frames = self.encoder(tile[:, :, k : k + self.num_sample_frames_batch_size, :, :]) + tile_h.append(next_frames) + tile = torch.cat(tile_h, dim=2) + tile = self.quant_conv(tile) + self._clear_conv_cache() + row.append(tile) + rows.append(row) + result_rows = [] + for i, row in enumerate(rows): + result_row = [] + for j, tile in enumerate(row): + # blend the above tile and the left tile + # to the current tile and add the current tile to the result row + if i > 0: + tile = self.blend_v(rows[i - 1][j], tile, blend_height) + if j > 0: + tile = self.blend_h(row[j - 1], tile, blend_width) + result_row.append(tile[:, :, :, :latent_height, :latent_width]) + result_rows.append(torch.cat(result_row, dim=4)) + + moments = torch.cat(result_rows, dim=3)[:, :, :, :latent_height, :latent_width] + return moments + + def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> DecoderOutput | torch.Tensor: + batch_size, num_channels, num_frames, height, width = z.shape + sample_height = height * self.spatial_compression_ratio + sample_width = width * self.spatial_compression_ratio + + tile_latent_min_height = self.tile_sample_min_height // self.spatial_compression_ratio + tile_latent_min_width = self.tile_sample_min_width // self.spatial_compression_ratio + tile_latent_stride_height = self.tile_sample_stride_height // self.spatial_compression_ratio + tile_latent_stride_width = self.tile_sample_stride_width // self.spatial_compression_ratio + + blend_height = self.tile_sample_min_height - self.tile_sample_stride_height + blend_width = self.tile_sample_min_width - self.tile_sample_stride_width + + # Split z into overlapping 64x64 tiles and decode them separately. + # The tiles have an overlap to avoid seams between tiles. + rows = [] + for i in range(0, height, tile_latent_stride_height): + row = [] + for j in range(0, width, tile_latent_stride_width): + tile = z[ + :, + :, + :, + i : i + tile_latent_min_height, + j : j + tile_latent_min_width, + ] + tile = self.post_quant_conv(tile) + + # Process the first frame and save the result + first_frames = self.decoder(tile[:, :, :1, :, :]) + # Initialize the list to store the processed frames, starting with the first frame + tile_dec = [first_frames] + # Process the remaining frames, with the number of frames processed at a time determined by mini_batch_decoder + for k in range(1, num_frames, self.num_latent_frames_batch_size): + next_frames = self.decoder(tile[:, :, k : k + self.num_latent_frames_batch_size, :, :]) + tile_dec.append(next_frames) + # Concatenate all processed frames along the channel dimension + decoded = torch.cat(tile_dec, dim=2) + self._clear_conv_cache() + row.append(decoded) + rows.append(row) + result_rows = [] + for i, row in enumerate(rows): + result_row = [] + for j, tile in enumerate(row): + # blend the above tile and the left tile + # to the current tile and add the current tile to the result row + if i > 0: + tile = self.blend_v(rows[i - 1][j], tile, blend_height) + if j > 0: + tile = self.blend_h(row[j - 1], tile, blend_width) + result_row.append(tile[:, :, :, : self.tile_sample_stride_height, : self.tile_sample_stride_width]) + result_rows.append(torch.cat(result_row, dim=4)) + + dec = torch.cat(result_rows, dim=3)[:, :, :, :sample_height, :sample_width] + + if not return_dict: + return (dec,) + + return DecoderOutput(sample=dec) + + def forward( + self, + sample: torch.Tensor, + sample_posterior: bool = False, + return_dict: bool = True, + generator: torch.Generator | None = None, + ) -> DecoderOutput | torch.Tensor: + r""" + Args: + sample (`torch.Tensor`): Input sample. + sample_posterior (`bool`, *optional*, defaults to `False`): + Whether to sample from the posterior. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`DecoderOutput`] instead of a plain tuple. + """ + x = sample + posterior = self.encode(x).latent_dist + if sample_posterior: + z = posterior.sample(generator=generator) + else: + z = posterior.mode() + dec = self.decode(z).sample + + if not return_dict: + return (dec,) + + return DecoderOutput(sample=dec) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_oobleck.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_oobleck.py new file mode 100644 index 0000000000000000000000000000000000000000..239317cffd713e2db6d03e3bbeddf37f319d833b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/autoencoders/autoencoder_oobleck.py @@ -0,0 +1,451 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from dataclasses import dataclass + +import numpy as np +import torch +import torch.nn as nn +from torch.nn.utils import weight_norm + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import BaseOutput +from ...utils.accelerate_utils import apply_forward_hook +from ...utils.torch_utils import randn_tensor +from ..modeling_utils import ModelMixin +from .vae import AutoencoderMixin + + +class Snake1d(nn.Module): + """ + A 1-dimensional Snake activation function module. + """ + + def __init__(self, hidden_dim, logscale=True): + super().__init__() + self.alpha = nn.Parameter(torch.zeros(1, hidden_dim, 1)) + self.beta = nn.Parameter(torch.zeros(1, hidden_dim, 1)) + + self.alpha.requires_grad = True + self.beta.requires_grad = True + self.logscale = logscale + + def forward(self, hidden_states): + shape = hidden_states.shape + + alpha = self.alpha if not self.logscale else torch.exp(self.alpha) + beta = self.beta if not self.logscale else torch.exp(self.beta) + + hidden_states = hidden_states.reshape(shape[0], shape[1], -1) + hidden_states = hidden_states + (beta + 1e-9).reciprocal() * torch.sin(alpha * hidden_states).pow(2) + hidden_states = hidden_states.reshape(shape) + return hidden_states + + +class OobleckResidualUnit(nn.Module): + """ + A residual unit composed of Snake1d and weight-normalized Conv1d layers with dilations. + """ + + def __init__(self, dimension: int = 16, dilation: int = 1): + super().__init__() + pad = ((7 - 1) * dilation) // 2 + + self.snake1 = Snake1d(dimension) + self.conv1 = weight_norm(nn.Conv1d(dimension, dimension, kernel_size=7, dilation=dilation, padding=pad)) + self.snake2 = Snake1d(dimension) + self.conv2 = weight_norm(nn.Conv1d(dimension, dimension, kernel_size=1)) + + def forward(self, hidden_state): + """ + Forward pass through the residual unit. + + Args: + hidden_state (`torch.Tensor` of shape `(batch_size, channels, time_steps)`): + Input tensor . + + Returns: + output_tensor (`torch.Tensor` of shape `(batch_size, channels, time_steps)`) + Input tensor after passing through the residual unit. + """ + output_tensor = hidden_state + output_tensor = self.conv1(self.snake1(output_tensor)) + output_tensor = self.conv2(self.snake2(output_tensor)) + + padding = (hidden_state.shape[-1] - output_tensor.shape[-1]) // 2 + if padding > 0: + hidden_state = hidden_state[..., padding:-padding] + output_tensor = hidden_state + output_tensor + return output_tensor + + +class OobleckEncoderBlock(nn.Module): + """Encoder block used in Oobleck encoder.""" + + def __init__(self, input_dim, output_dim, stride: int = 1): + super().__init__() + + self.res_unit1 = OobleckResidualUnit(input_dim, dilation=1) + self.res_unit2 = OobleckResidualUnit(input_dim, dilation=3) + self.res_unit3 = OobleckResidualUnit(input_dim, dilation=9) + self.snake1 = Snake1d(input_dim) + self.conv1 = weight_norm( + nn.Conv1d(input_dim, output_dim, kernel_size=2 * stride, stride=stride, padding=math.ceil(stride / 2)) + ) + + def forward(self, hidden_state): + hidden_state = self.res_unit1(hidden_state) + hidden_state = self.res_unit2(hidden_state) + hidden_state = self.snake1(self.res_unit3(hidden_state)) + hidden_state = self.conv1(hidden_state) + + return hidden_state + + +class OobleckDecoderBlock(nn.Module): + """Decoder block used in Oobleck decoder.""" + + def __init__(self, input_dim, output_dim, stride: int = 1): + super().__init__() + + self.snake1 = Snake1d(input_dim) + self.conv_t1 = weight_norm( + nn.ConvTranspose1d( + input_dim, + output_dim, + kernel_size=2 * stride, + stride=stride, + padding=math.ceil(stride / 2), + ) + ) + self.res_unit1 = OobleckResidualUnit(output_dim, dilation=1) + self.res_unit2 = OobleckResidualUnit(output_dim, dilation=3) + self.res_unit3 = OobleckResidualUnit(output_dim, dilation=9) + + def forward(self, hidden_state): + hidden_state = self.snake1(hidden_state) + hidden_state = self.conv_t1(hidden_state) + hidden_state = self.res_unit1(hidden_state) + hidden_state = self.res_unit2(hidden_state) + hidden_state = self.res_unit3(hidden_state) + + return hidden_state + + +class OobleckDiagonalGaussianDistribution(object): + def __init__(self, parameters: torch.Tensor, deterministic: bool = False): + self.parameters = parameters + self.mean, self.scale = parameters.chunk(2, dim=1) + self.std = nn.functional.softplus(self.scale) + 1e-4 + self.var = self.std * self.std + self.logvar = torch.log(self.var) + self.deterministic = deterministic + + def sample(self, generator: torch.Generator | None = None) -> torch.Tensor: + # make sure sample is on the same device as the parameters and has same dtype + sample = randn_tensor( + self.mean.shape, + generator=generator, + device=self.parameters.device, + dtype=self.parameters.dtype, + ) + x = self.mean + self.std * sample + return x + + def kl(self, other: "OobleckDiagonalGaussianDistribution" = None) -> torch.Tensor: + if self.deterministic: + return torch.Tensor([0.0]) + else: + if other is None: + return (self.mean * self.mean + self.var - self.logvar - 1.0).sum(1).mean() + else: + normalized_diff = torch.pow(self.mean - other.mean, 2) / other.var + var_ratio = self.var / other.var + logvar_diff = self.logvar - other.logvar + + kl = normalized_diff + var_ratio + logvar_diff - 1 + + kl = kl.sum(1).mean() + return kl + + def mode(self) -> torch.Tensor: + return self.mean + + +@dataclass +class AutoencoderOobleckOutput(BaseOutput): + """ + Output of AutoencoderOobleck encoding method. + + Args: + latent_dist (`OobleckDiagonalGaussianDistribution`): + Encoded outputs of `Encoder` represented as the mean and standard deviation of + `OobleckDiagonalGaussianDistribution`. `OobleckDiagonalGaussianDistribution` allows for sampling latents + from the distribution. + """ + + latent_dist: "OobleckDiagonalGaussianDistribution" # noqa: F821 + + +@dataclass +class OobleckDecoderOutput(BaseOutput): + r""" + Output of decoding method. + + Args: + sample (`torch.Tensor` of shape `(batch_size, audio_channels, sequence_length)`): + The decoded output sample from the last layer of the model. + """ + + sample: torch.Tensor + + +class OobleckEncoder(nn.Module): + """Oobleck Encoder""" + + def __init__(self, encoder_hidden_size, audio_channels, downsampling_ratios, channel_multiples): + super().__init__() + + strides = downsampling_ratios + channel_multiples = [1] + channel_multiples + + # Create first convolution + self.conv1 = weight_norm(nn.Conv1d(audio_channels, encoder_hidden_size, kernel_size=7, padding=3)) + + self.block = [] + # Create EncoderBlocks that double channels as they downsample by `stride` + for stride_index, stride in enumerate(strides): + self.block += [ + OobleckEncoderBlock( + input_dim=encoder_hidden_size * channel_multiples[stride_index], + output_dim=encoder_hidden_size * channel_multiples[stride_index + 1], + stride=stride, + ) + ] + + self.block = nn.ModuleList(self.block) + d_model = encoder_hidden_size * channel_multiples[-1] + self.snake1 = Snake1d(d_model) + self.conv2 = weight_norm(nn.Conv1d(d_model, encoder_hidden_size, kernel_size=3, padding=1)) + + def forward(self, hidden_state): + hidden_state = self.conv1(hidden_state) + + for module in self.block: + hidden_state = module(hidden_state) + + hidden_state = self.snake1(hidden_state) + hidden_state = self.conv2(hidden_state) + + return hidden_state + + +class OobleckDecoder(nn.Module): + """Oobleck Decoder""" + + def __init__(self, channels, input_channels, audio_channels, upsampling_ratios, channel_multiples): + super().__init__() + + strides = upsampling_ratios + channel_multiples = [1] + channel_multiples + + # Add first conv layer + self.conv1 = weight_norm(nn.Conv1d(input_channels, channels * channel_multiples[-1], kernel_size=7, padding=3)) + + # Add upsampling + MRF blocks + block = [] + for stride_index, stride in enumerate(strides): + block += [ + OobleckDecoderBlock( + input_dim=channels * channel_multiples[len(strides) - stride_index], + output_dim=channels * channel_multiples[len(strides) - stride_index - 1], + stride=stride, + ) + ] + + self.block = nn.ModuleList(block) + output_dim = channels + self.snake1 = Snake1d(output_dim) + self.conv2 = weight_norm(nn.Conv1d(channels, audio_channels, kernel_size=7, padding=3, bias=False)) + + def forward(self, hidden_state): + hidden_state = self.conv1(hidden_state) + + for layer in self.block: + hidden_state = layer(hidden_state) + + hidden_state = self.snake1(hidden_state) + hidden_state = self.conv2(hidden_state) + + return hidden_state + + +class AutoencoderOobleck(ModelMixin, AutoencoderMixin, ConfigMixin): + r""" + An autoencoder for encoding waveforms into latents and decoding latent representations into waveforms. First + introduced in Stable Audio. + + This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented + for all models (such as downloading or saving). + + Parameters: + encoder_hidden_size (`int`, *optional*, defaults to 128): + Intermediate representation dimension for the encoder. + downsampling_ratios (`list[int]`, *optional*, defaults to `[2, 4, 4, 8, 8]`): + Ratios for downsampling in the encoder. These are used in reverse order for upsampling in the decoder. + channel_multiples (`list[int]`, *optional*, defaults to `[1, 2, 4, 8, 16]`): + Multiples used to determine the hidden sizes of the hidden layers. + decoder_channels (`int`, *optional*, defaults to 128): + Intermediate representation dimension for the decoder. + decoder_input_channels (`int`, *optional*, defaults to 64): + Input dimension for the decoder. Corresponds to the latent dimension. + audio_channels (`int`, *optional*, defaults to 2): + Number of channels in the audio data. Either 1 for mono or 2 for stereo. + sampling_rate (`int`, *optional*, defaults to 44100): + The sampling rate at which the audio waveform should be digitalized expressed in hertz (Hz). + """ + + _supports_gradient_checkpointing = False + _supports_group_offloading = False + + @register_to_config + def __init__( + self, + encoder_hidden_size=128, + downsampling_ratios=[2, 4, 4, 8, 8], + channel_multiples=[1, 2, 4, 8, 16], + decoder_channels=128, + decoder_input_channels=64, + audio_channels=2, + sampling_rate=44100, + ): + super().__init__() + + self.encoder_hidden_size = encoder_hidden_size + self.downsampling_ratios = downsampling_ratios + self.decoder_channels = decoder_channels + self.upsampling_ratios = downsampling_ratios[::-1] + self.hop_length = int(np.prod(downsampling_ratios)) + self.sampling_rate = sampling_rate + + self.encoder = OobleckEncoder( + encoder_hidden_size=encoder_hidden_size, + audio_channels=audio_channels, + downsampling_ratios=downsampling_ratios, + channel_multiples=channel_multiples, + ) + + self.decoder = OobleckDecoder( + channels=decoder_channels, + input_channels=decoder_input_channels, + audio_channels=audio_channels, + upsampling_ratios=self.upsampling_ratios, + channel_multiples=channel_multiples, + ) + + self.use_slicing = False + + @apply_forward_hook + def encode( + self, x: torch.Tensor, return_dict: bool = True + ) -> AutoencoderOobleckOutput | tuple[OobleckDiagonalGaussianDistribution]: + """ + Encode a batch of images into latents. + + Args: + x (`torch.Tensor`): Input batch of images. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a [`~models.autoencoder_kl.AutoencoderKLOutput`] instead of a plain tuple. + + Returns: + The latent representations of the encoded images. If `return_dict` is True, a + [`~models.autoencoder_kl.AutoencoderKLOutput`] is returned, otherwise a plain `tuple` is returned. + """ + if self.use_slicing and x.shape[0] > 1: + encoded_slices = [self.encoder(x_slice) for x_slice in x.split(1)] + h = torch.cat(encoded_slices) + else: + h = self.encoder(x) + + posterior = OobleckDiagonalGaussianDistribution(h) + + if not return_dict: + return (posterior,) + + return AutoencoderOobleckOutput(latent_dist=posterior) + + def _decode(self, z: torch.Tensor, return_dict: bool = True) -> OobleckDecoderOutput | torch.Tensor: + dec = self.decoder(z) + + if not return_dict: + return (dec,) + + return OobleckDecoderOutput(sample=dec) + + @apply_forward_hook + def decode( + self, z: torch.FloatTensor, return_dict: bool = True, generator=None + ) -> OobleckDecoderOutput | torch.FloatTensor: + """ + Decode a batch of images. + + Args: + z (`torch.Tensor`): Input batch of latent vectors. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a [`~models.vae.OobleckDecoderOutput`] instead of a plain tuple. + + Returns: + [`~models.vae.OobleckDecoderOutput`] or `tuple`: + If return_dict is True, a [`~models.vae.OobleckDecoderOutput`] is returned, otherwise a plain `tuple` + is returned. + + """ + if self.use_slicing and z.shape[0] > 1: + decoded_slices = [self._decode(z_slice).sample for z_slice in z.split(1)] + decoded = torch.cat(decoded_slices) + else: + decoded = self._decode(z).sample + + if not return_dict: + return (decoded,) + + return OobleckDecoderOutput(sample=decoded) + + def forward( + self, + sample: torch.Tensor, + sample_posterior: bool = False, + return_dict: bool = True, + generator: torch.Generator | None = None, + ) -> OobleckDecoderOutput | torch.Tensor: + r""" + Args: + sample (`torch.Tensor`): Input sample. + sample_posterior (`bool`, *optional*, defaults to `False`): + Whether to sample from the posterior. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`OobleckDecoderOutput`] instead of a plain tuple. + """ + x = sample + posterior = self.encode(x).latent_dist + if sample_posterior: + z = posterior.sample(generator=generator) + else: + z = posterior.mode() + dec = self.decode(z).sample + + if not return_dict: + return (dec,) + + return OobleckDecoderOutput(sample=dec) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/cache_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/cache_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5f9587a1b4de99021417ae6a71170cccee2eb6dd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/cache_utils.py @@ -0,0 +1,154 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from contextlib import contextmanager + +from ..utils.logging import get_logger + + +logger = get_logger(__name__) # pylint: disable=invalid-name + + +class CacheMixin: + r""" + A class for enable/disabling caching techniques on diffusion models. + + Supported caching techniques: + - [Pyramid Attention Broadcast](https://huggingface.co/papers/2408.12588) + - [FasterCache](https://huggingface.co/papers/2410.19355) + - [FirstBlockCache](https://github.com/chengzeyi/ParaAttention/blob/7a266123671b55e7e5a2fe9af3121f07a36afc78/README.md#first-block-cache-our-dynamic-caching) + """ + + _cache_config = None + + @property + def is_cache_enabled(self) -> bool: + return self._cache_config is not None + + def enable_cache(self, config) -> None: + r""" + Enable caching techniques on the model. + + Args: + config (`PyramidAttentionBroadcastConfig | FasterCacheConfig | FirstBlockCacheConfig`): + The configuration for applying the caching technique. Currently supported caching techniques are: + - [`~hooks.PyramidAttentionBroadcastConfig`] + - [`~hooks.FasterCacheConfig`] + - [`~hooks.FirstBlockCacheConfig`] + + Example: + + ```python + >>> import torch + >>> from diffusers import CogVideoXPipeline, PyramidAttentionBroadcastConfig + + >>> pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16) + >>> pipe.to("cuda") + + >>> config = PyramidAttentionBroadcastConfig( + ... spatial_attention_block_skip_range=2, + ... spatial_attention_timestep_skip_range=(100, 800), + ... current_timestep_callback=lambda: pipe.current_timestep, + ... ) + >>> pipe.transformer.enable_cache(config) + ``` + """ + + from ..hooks import ( + FasterCacheConfig, + FirstBlockCacheConfig, + MagCacheConfig, + PyramidAttentionBroadcastConfig, + TaylorSeerCacheConfig, + apply_faster_cache, + apply_first_block_cache, + apply_mag_cache, + apply_pyramid_attention_broadcast, + apply_taylorseer_cache, + ) + + if self.is_cache_enabled: + raise ValueError( + f"Caching has already been enabled with {type(self._cache_config)}. To apply a new caching technique, please disable the existing one first." + ) + + if isinstance(config, FasterCacheConfig): + apply_faster_cache(self, config) + elif isinstance(config, FirstBlockCacheConfig): + apply_first_block_cache(self, config) + elif isinstance(config, MagCacheConfig): + apply_mag_cache(self, config) + elif isinstance(config, PyramidAttentionBroadcastConfig): + apply_pyramid_attention_broadcast(self, config) + elif isinstance(config, TaylorSeerCacheConfig): + apply_taylorseer_cache(self, config) + else: + raise ValueError(f"Cache config {type(config)} is not supported.") + + self._cache_config = config + + def disable_cache(self) -> None: + from ..hooks import ( + FasterCacheConfig, + FirstBlockCacheConfig, + HookRegistry, + MagCacheConfig, + PyramidAttentionBroadcastConfig, + TaylorSeerCacheConfig, + ) + from ..hooks.faster_cache import _FASTER_CACHE_BLOCK_HOOK, _FASTER_CACHE_DENOISER_HOOK + from ..hooks.first_block_cache import _FBC_BLOCK_HOOK, _FBC_LEADER_BLOCK_HOOK + from ..hooks.mag_cache import _MAG_CACHE_BLOCK_HOOK, _MAG_CACHE_LEADER_BLOCK_HOOK + from ..hooks.pyramid_attention_broadcast import _PYRAMID_ATTENTION_BROADCAST_HOOK + from ..hooks.taylorseer_cache import _TAYLORSEER_CACHE_HOOK + + if self._cache_config is None: + logger.warning("Caching techniques have not been enabled, so there's nothing to disable.") + return + + registry = HookRegistry.check_if_exists_or_initialize(self) + if isinstance(self._cache_config, FasterCacheConfig): + registry.remove_hook(_FASTER_CACHE_DENOISER_HOOK, recurse=True) + registry.remove_hook(_FASTER_CACHE_BLOCK_HOOK, recurse=True) + elif isinstance(self._cache_config, FirstBlockCacheConfig): + registry.remove_hook(_FBC_LEADER_BLOCK_HOOK, recurse=True) + registry.remove_hook(_FBC_BLOCK_HOOK, recurse=True) + elif isinstance(self._cache_config, MagCacheConfig): + registry.remove_hook(_MAG_CACHE_LEADER_BLOCK_HOOK, recurse=True) + registry.remove_hook(_MAG_CACHE_BLOCK_HOOK, recurse=True) + elif isinstance(self._cache_config, PyramidAttentionBroadcastConfig): + registry.remove_hook(_PYRAMID_ATTENTION_BROADCAST_HOOK, recurse=True) + elif isinstance(self._cache_config, TaylorSeerCacheConfig): + registry.remove_hook(_TAYLORSEER_CACHE_HOOK, recurse=True) + else: + raise ValueError(f"Cache config {type(self._cache_config)} is not supported.") + + self._cache_config = None + + def _reset_stateful_cache(self, recurse: bool = True) -> None: + from ..hooks import HookRegistry + + HookRegistry.check_if_exists_or_initialize(self).reset_stateful_hooks(recurse=recurse) + + @contextmanager + def cache_context(self, name: str): + r"""Context manager that provides additional methods for cache management.""" + from ..hooks import HookRegistry + + registry = HookRegistry.check_if_exists_or_initialize(self) + registry._set_context(name) + + yield + + registry._set_context(None) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..853a2207f90340e82302994fdcca7dc70d824c59 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/__init__.py @@ -0,0 +1,28 @@ +from ...utils import is_flax_available, is_torch_available + + +if is_torch_available(): + from .controlnet import ControlNetModel, ControlNetOutput + from .controlnet_cosmos import CosmosControlNetModel + from .controlnet_flux import FluxControlNetModel, FluxControlNetOutput, FluxMultiControlNetModel + from .controlnet_hunyuan import ( + HunyuanControlNetOutput, + HunyuanDiT2DControlNetModel, + HunyuanDiT2DMultiControlNetModel, + ) + from .controlnet_qwenimage import QwenImageControlNetModel, QwenImageMultiControlNetModel + from .controlnet_sana import SanaControlNetModel + from .controlnet_sd3 import SD3ControlNetModel, SD3ControlNetOutput, SD3MultiControlNetModel + from .controlnet_sparsectrl import ( + SparseControlNetConditioningEmbedding, + SparseControlNetModel, + SparseControlNetOutput, + ) + from .controlnet_union import ControlNetUnionModel + from .controlnet_xs import ControlNetXSAdapter, ControlNetXSOutput, UNetControlNetXSModel + from .controlnet_z_image import ZImageControlNetModel + from .multicontrolnet import MultiControlNetModel + from .multicontrolnet_union import MultiControlNetUnionModel + +if is_flax_available(): + from .controlnet_flax import FlaxControlNetModel diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet.py new file mode 100644 index 0000000000000000000000000000000000000000..8c2ff2fdd1233f0992dcd46d4b89d16e17a0fd60 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet.py @@ -0,0 +1,809 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from typing import Any + +import torch +from torch import nn +from torch.nn import functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...loaders.single_file_model import FromOriginalModelMixin +from ...utils import BaseOutput, apply_lora_scale, logging +from ..attention import AttentionMixin +from ..attention_processor import ( + ADDED_KV_ATTENTION_PROCESSORS, + CROSS_ATTENTION_PROCESSORS, + AttnAddedKVProcessor, + AttnProcessor, +) +from ..embeddings import TextImageProjection, TextImageTimeEmbedding, TextTimeEmbedding, TimestepEmbedding, Timesteps +from ..modeling_utils import ModelMixin +from ..unets.unet_2d_blocks import ( + UNetMidBlock2D, + UNetMidBlock2DCrossAttn, + get_down_block, +) +from ..unets.unet_2d_condition import UNet2DConditionModel + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class ControlNetOutput(BaseOutput): + """ + The output of [`ControlNetModel`]. + + Args: + down_block_res_samples (`tuple[torch.Tensor]`): + A tuple of downsample activations at different resolutions for each downsampling block. Each tensor should + be of shape `(batch_size, channel * resolution, height //resolution, width // resolution)`. Output can be + used to condition the original UNet's downsampling activations. + mid_down_block_re_sample (`torch.Tensor`): + The activation of the middle block (the lowest sample resolution). Each tensor should be of shape + `(batch_size, channel * lowest_resolution, height // lowest_resolution, width // lowest_resolution)`. + Output can be used to condition the original UNet's middle block activation. + """ + + down_block_res_samples: tuple[torch.Tensor] + mid_block_res_sample: torch.Tensor + + +class ControlNetConditioningEmbedding(nn.Module): + """ + Quoting from https://huggingface.co/papers/2302.05543: "Stable Diffusion uses a pre-processing method similar to + VQ-GAN [11] to convert the entire dataset of 512 × 512 images into smaller 64 × 64 “latent images” for stabilized + training. This requires ControlNets to convert image-based conditions to 64 × 64 feature space to match the + convolution size. We use a tiny network E(·) of four convolution layers with 4 × 4 kernels and 2 × 2 strides + (activated by ReLU, channels are 16, 32, 64, 128, initialized with Gaussian weights, trained jointly with the full + model) to encode image-space conditions ... into feature maps ..." + """ + + def __init__( + self, + conditioning_embedding_channels: int, + conditioning_channels: int = 3, + block_out_channels: tuple[int, ...] = (16, 32, 96, 256), + ): + super().__init__() + + self.conv_in = nn.Conv2d(conditioning_channels, block_out_channels[0], kernel_size=3, padding=1) + + self.blocks = nn.ModuleList([]) + + for i in range(len(block_out_channels) - 1): + channel_in = block_out_channels[i] + channel_out = block_out_channels[i + 1] + self.blocks.append(nn.Conv2d(channel_in, channel_in, kernel_size=3, padding=1)) + self.blocks.append(nn.Conv2d(channel_in, channel_out, kernel_size=3, padding=1, stride=2)) + + self.conv_out = zero_module( + nn.Conv2d(block_out_channels[-1], conditioning_embedding_channels, kernel_size=3, padding=1) + ) + + def forward(self, conditioning): + embedding = self.conv_in(conditioning) + embedding = F.silu(embedding) + + for block in self.blocks: + embedding = block(embedding) + embedding = F.silu(embedding) + + embedding = self.conv_out(embedding) + + return embedding + + +class ControlNetModel(ModelMixin, AttentionMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapterMixin): + """ + A ControlNet model. + + Args: + in_channels (`int`, defaults to 4): + The number of channels in the input sample. + flip_sin_to_cos (`bool`, defaults to `True`): + Whether to flip the sin to cos in the time embedding. + freq_shift (`int`, defaults to 0): + The frequency shift to apply to the time embedding. + down_block_types (`tuple[str]`, defaults to `("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")`): + The tuple of downsample blocks to use. + only_cross_attention (`bool | tuple[bool]`, defaults to `False`): + block_out_channels (`tuple[int]`, defaults to `(320, 640, 1280, 1280)`): + The tuple of output channels for each block. + layers_per_block (`int`, defaults to 2): + The number of layers per block. + downsample_padding (`int`, defaults to 1): + The padding to use for the downsampling convolution. + mid_block_scale_factor (`float`, defaults to 1): + The scale factor to use for the mid block. + act_fn (`str`, defaults to "silu"): + The activation function to use. + norm_num_groups (`int`, *optional*, defaults to 32): + The number of groups to use for the normalization. If None, normalization and activation layers is skipped + in post-processing. + norm_eps (`float`, defaults to 1e-5): + The epsilon to use for the normalization. + cross_attention_dim (`int`, defaults to 1280): + The dimension of the cross attention features. + transformer_layers_per_block (`int` or `tuple[int]`, *optional*, defaults to 1): + The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`]. Only relevant for + [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.CrossAttnUpBlock2D`], + [`~models.unet_2d_blocks.UNetMidBlock2DCrossAttn`]. + encoder_hid_dim (`int`, *optional*, defaults to None): + If `encoder_hid_dim_type` is defined, `encoder_hidden_states` will be projected from `encoder_hid_dim` + dimension to `cross_attention_dim`. + encoder_hid_dim_type (`str`, *optional*, defaults to `None`): + If given, the `encoder_hidden_states` and potentially other embeddings are down-projected to text + embeddings of dimension `cross_attention` according to `encoder_hid_dim_type`. + attention_head_dim (`int | tuple[int]`, defaults to 8): + The dimension of the attention heads. + use_linear_projection (`bool`, defaults to `False`): + class_embed_type (`str`, *optional*, defaults to `None`): + The type of class embedding to use which is ultimately summed with the time embeddings. Choose from None, + `"timestep"`, `"identity"`, `"projection"`, or `"simple_projection"`. + addition_embed_type (`str`, *optional*, defaults to `None`): + Configures an optional embedding which will be summed with the time embeddings. Choose from `None` or + "text". "text" will use the `TextTimeEmbedding` layer. + num_class_embeds (`int`, *optional*, defaults to 0): + Input dimension of the learnable embedding matrix to be projected to `time_embed_dim`, when performing + class conditioning with `class_embed_type` equal to `None`. + upcast_attention (`bool`, defaults to `False`): + resnet_time_scale_shift (`str`, defaults to `"default"`): + Time scale shift config for ResNet blocks (see `ResnetBlock2D`). Choose from `default` or `scale_shift`. + projection_class_embeddings_input_dim (`int`, *optional*, defaults to `None`): + The dimension of the `class_labels` input when `class_embed_type="projection"`. Required when + `class_embed_type="projection"`. + controlnet_conditioning_channel_order (`str`, defaults to `"rgb"`): + The channel order of conditional image. Will convert to `rgb` if it's `bgr`. + conditioning_embedding_out_channels (`tuple[int]`, *optional*, defaults to `(16, 32, 96, 256)`): + The tuple of output channel for each block in the `conditioning_embedding` layer. + global_pool_conditions (`bool`, defaults to `False`): + TODO(Patrick) - unused parameter. + addition_embed_type_num_heads (`int`, defaults to 64): + The number of heads to use for the `TextTimeEmbedding` layer. + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + in_channels: int = 4, + conditioning_channels: int = 3, + flip_sin_to_cos: bool = True, + freq_shift: int = 0, + down_block_types: tuple[str, ...] = ( + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "DownBlock2D", + ), + mid_block_type: str | None = "UNetMidBlock2DCrossAttn", + only_cross_attention: bool | tuple[bool] = False, + block_out_channels: tuple[int, ...] = (320, 640, 1280, 1280), + layers_per_block: int = 2, + downsample_padding: int = 1, + mid_block_scale_factor: float = 1, + act_fn: str = "silu", + norm_num_groups: int | None = 32, + norm_eps: float = 1e-5, + cross_attention_dim: int = 1280, + transformer_layers_per_block: int | tuple[int, ...] = 1, + encoder_hid_dim: int | None = None, + encoder_hid_dim_type: str | None = None, + attention_head_dim: int | tuple[int, ...] = 8, + num_attention_heads: int | tuple[int, ...] | None = None, + use_linear_projection: bool = False, + class_embed_type: str | None = None, + addition_embed_type: str | None = None, + addition_time_embed_dim: int | None = None, + num_class_embeds: int | None = None, + upcast_attention: bool = False, + resnet_time_scale_shift: str = "default", + projection_class_embeddings_input_dim: int | None = None, + controlnet_conditioning_channel_order: str = "rgb", + conditioning_embedding_out_channels: tuple[int, ...] | None = (16, 32, 96, 256), + global_pool_conditions: bool = False, + addition_embed_type_num_heads: int = 64, + ): + super().__init__() + + # If `num_attention_heads` is not defined (which is the case for most models) + # it will default to `attention_head_dim`. This looks weird upon first reading it and it is. + # The reason for this behavior is to correct for incorrectly named variables that were introduced + # when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 + # Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking + # which is why we correct for the naming here. + num_attention_heads = num_attention_heads or attention_head_dim + + # Check inputs + if len(block_out_channels) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: {block_out_channels}. `down_block_types`: {down_block_types}." + ) + + if not isinstance(only_cross_attention, bool) and len(only_cross_attention) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `only_cross_attention` as `down_block_types`. `only_cross_attention`: {only_cross_attention}. `down_block_types`: {down_block_types}." + ) + + if not isinstance(num_attention_heads, int) and len(num_attention_heads) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: {num_attention_heads}. `down_block_types`: {down_block_types}." + ) + + if isinstance(transformer_layers_per_block, int): + transformer_layers_per_block = [transformer_layers_per_block] * len(down_block_types) + + # input + conv_in_kernel = 3 + conv_in_padding = (conv_in_kernel - 1) // 2 + self.conv_in = nn.Conv2d( + in_channels, block_out_channels[0], kernel_size=conv_in_kernel, padding=conv_in_padding + ) + + # time + time_embed_dim = block_out_channels[0] * 4 + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + self.time_embedding = TimestepEmbedding( + timestep_input_dim, + time_embed_dim, + act_fn=act_fn, + ) + + if encoder_hid_dim_type is None and encoder_hid_dim is not None: + encoder_hid_dim_type = "text_proj" + self.register_to_config(encoder_hid_dim_type=encoder_hid_dim_type) + logger.info("encoder_hid_dim_type defaults to 'text_proj' as `encoder_hid_dim` is defined.") + + if encoder_hid_dim is None and encoder_hid_dim_type is not None: + raise ValueError( + f"`encoder_hid_dim` has to be defined when `encoder_hid_dim_type` is set to {encoder_hid_dim_type}." + ) + + if encoder_hid_dim_type == "text_proj": + self.encoder_hid_proj = nn.Linear(encoder_hid_dim, cross_attention_dim) + elif encoder_hid_dim_type == "text_image_proj": + # image_embed_dim DOESN'T have to be `cross_attention_dim`. To not clutter the __init__ too much + # they are set to `cross_attention_dim` here as this is exactly the required dimension for the currently only use + # case when `addition_embed_type == "text_image_proj"` (Kandinsky 2.1)` + self.encoder_hid_proj = TextImageProjection( + text_embed_dim=encoder_hid_dim, + image_embed_dim=cross_attention_dim, + cross_attention_dim=cross_attention_dim, + ) + + elif encoder_hid_dim_type is not None: + raise ValueError( + f"encoder_hid_dim_type: {encoder_hid_dim_type} must be None, 'text_proj' or 'text_image_proj'." + ) + else: + self.encoder_hid_proj = None + + # class embedding + if class_embed_type is None and num_class_embeds is not None: + self.class_embedding = nn.Embedding(num_class_embeds, time_embed_dim) + elif class_embed_type == "timestep": + self.class_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + elif class_embed_type == "identity": + self.class_embedding = nn.Identity(time_embed_dim, time_embed_dim) + elif class_embed_type == "projection": + if projection_class_embeddings_input_dim is None: + raise ValueError( + "`class_embed_type`: 'projection' requires `projection_class_embeddings_input_dim` be set" + ) + # The projection `class_embed_type` is the same as the timestep `class_embed_type` except + # 1. the `class_labels` inputs are not first converted to sinusoidal embeddings + # 2. it projects from an arbitrary input dimension. + # + # Note that `TimestepEmbedding` is quite general, being mainly linear layers and activations. + # When used for embedding actual timesteps, the timesteps are first converted to sinusoidal embeddings. + # As a result, `TimestepEmbedding` can be passed arbitrary vectors. + self.class_embedding = TimestepEmbedding(projection_class_embeddings_input_dim, time_embed_dim) + else: + self.class_embedding = None + + if addition_embed_type == "text": + if encoder_hid_dim is not None: + text_time_embedding_from_dim = encoder_hid_dim + else: + text_time_embedding_from_dim = cross_attention_dim + + self.add_embedding = TextTimeEmbedding( + text_time_embedding_from_dim, time_embed_dim, num_heads=addition_embed_type_num_heads + ) + elif addition_embed_type == "text_image": + # text_embed_dim and image_embed_dim DON'T have to be `cross_attention_dim`. To not clutter the __init__ too much + # they are set to `cross_attention_dim` here as this is exactly the required dimension for the currently only use + # case when `addition_embed_type == "text_image"` (Kandinsky 2.1)` + self.add_embedding = TextImageTimeEmbedding( + text_embed_dim=cross_attention_dim, image_embed_dim=cross_attention_dim, time_embed_dim=time_embed_dim + ) + elif addition_embed_type == "text_time": + self.add_time_proj = Timesteps(addition_time_embed_dim, flip_sin_to_cos, freq_shift) + self.add_embedding = TimestepEmbedding(projection_class_embeddings_input_dim, time_embed_dim) + + elif addition_embed_type is not None: + raise ValueError(f"addition_embed_type: {addition_embed_type} must be None, 'text' or 'text_image'.") + + # control net conditioning embedding + self.controlnet_cond_embedding = ControlNetConditioningEmbedding( + conditioning_embedding_channels=block_out_channels[0], + block_out_channels=conditioning_embedding_out_channels, + conditioning_channels=conditioning_channels, + ) + + self.down_blocks = nn.ModuleList([]) + self.controlnet_down_blocks = nn.ModuleList([]) + + if isinstance(only_cross_attention, bool): + only_cross_attention = [only_cross_attention] * len(down_block_types) + + if isinstance(attention_head_dim, int): + attention_head_dim = (attention_head_dim,) * len(down_block_types) + + if isinstance(num_attention_heads, int): + num_attention_heads = (num_attention_heads,) * len(down_block_types) + + # down + output_channel = block_out_channels[0] + + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + transformer_layers_per_block=transformer_layers_per_block[i], + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + cross_attention_dim=cross_attention_dim, + num_attention_heads=num_attention_heads[i], + attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel, + downsample_padding=downsample_padding, + use_linear_projection=use_linear_projection, + only_cross_attention=only_cross_attention[i], + upcast_attention=upcast_attention, + resnet_time_scale_shift=resnet_time_scale_shift, + ) + self.down_blocks.append(down_block) + + for _ in range(layers_per_block): + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + if not is_final_block: + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + # mid + mid_block_channel = block_out_channels[-1] + + controlnet_block = nn.Conv2d(mid_block_channel, mid_block_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_mid_block = controlnet_block + + if mid_block_type == "UNetMidBlock2DCrossAttn": + self.mid_block = UNetMidBlock2DCrossAttn( + transformer_layers_per_block=transformer_layers_per_block[-1], + in_channels=mid_block_channel, + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift=resnet_time_scale_shift, + cross_attention_dim=cross_attention_dim, + num_attention_heads=num_attention_heads[-1], + resnet_groups=norm_num_groups, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + ) + elif mid_block_type == "UNetMidBlock2D": + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + num_layers=0, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_groups=norm_num_groups, + resnet_time_scale_shift=resnet_time_scale_shift, + add_attention=False, + ) + else: + raise ValueError(f"unknown mid_block_type : {mid_block_type}") + + @classmethod + def from_unet( + cls, + unet: UNet2DConditionModel, + controlnet_conditioning_channel_order: str = "rgb", + conditioning_embedding_out_channels: tuple[int, ...] | None = (16, 32, 96, 256), + load_weights_from_unet: bool = True, + conditioning_channels: int = 3, + ): + r""" + Instantiate a [`ControlNetModel`] from [`UNet2DConditionModel`]. + + Parameters: + unet (`UNet2DConditionModel`): + The UNet model weights to copy to the [`ControlNetModel`]. All configuration options are also copied + where applicable. + """ + transformer_layers_per_block = ( + unet.config.transformer_layers_per_block if "transformer_layers_per_block" in unet.config else 1 + ) + encoder_hid_dim = unet.config.encoder_hid_dim if "encoder_hid_dim" in unet.config else None + encoder_hid_dim_type = unet.config.encoder_hid_dim_type if "encoder_hid_dim_type" in unet.config else None + addition_embed_type = unet.config.addition_embed_type if "addition_embed_type" in unet.config else None + addition_time_embed_dim = ( + unet.config.addition_time_embed_dim if "addition_time_embed_dim" in unet.config else None + ) + + controlnet = cls( + encoder_hid_dim=encoder_hid_dim, + encoder_hid_dim_type=encoder_hid_dim_type, + addition_embed_type=addition_embed_type, + addition_time_embed_dim=addition_time_embed_dim, + transformer_layers_per_block=transformer_layers_per_block, + in_channels=unet.config.in_channels, + flip_sin_to_cos=unet.config.flip_sin_to_cos, + freq_shift=unet.config.freq_shift, + down_block_types=unet.config.down_block_types, + only_cross_attention=unet.config.only_cross_attention, + block_out_channels=unet.config.block_out_channels, + layers_per_block=unet.config.layers_per_block, + downsample_padding=unet.config.downsample_padding, + mid_block_scale_factor=unet.config.mid_block_scale_factor, + act_fn=unet.config.act_fn, + norm_num_groups=unet.config.norm_num_groups, + norm_eps=unet.config.norm_eps, + cross_attention_dim=unet.config.cross_attention_dim, + attention_head_dim=unet.config.attention_head_dim, + num_attention_heads=unet.config.num_attention_heads, + use_linear_projection=unet.config.use_linear_projection, + class_embed_type=unet.config.class_embed_type, + num_class_embeds=unet.config.num_class_embeds, + upcast_attention=unet.config.upcast_attention, + resnet_time_scale_shift=unet.config.resnet_time_scale_shift, + projection_class_embeddings_input_dim=unet.config.projection_class_embeddings_input_dim, + mid_block_type=unet.config.mid_block_type, + controlnet_conditioning_channel_order=controlnet_conditioning_channel_order, + conditioning_embedding_out_channels=conditioning_embedding_out_channels, + conditioning_channels=conditioning_channels, + ) + + if load_weights_from_unet: + controlnet.conv_in.load_state_dict(unet.conv_in.state_dict()) + controlnet.time_proj.load_state_dict(unet.time_proj.state_dict()) + controlnet.time_embedding.load_state_dict(unet.time_embedding.state_dict()) + + if controlnet.class_embedding: + controlnet.class_embedding.load_state_dict(unet.class_embedding.state_dict()) + + if hasattr(controlnet, "add_embedding"): + controlnet.add_embedding.load_state_dict(unet.add_embedding.state_dict()) + + controlnet.down_blocks.load_state_dict(unet.down_blocks.state_dict()) + controlnet.mid_block.load_state_dict(unet.mid_block.state_dict()) + + return controlnet + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_default_attn_processor + def set_default_attn_processor(self): + """ + Disables custom attention processors and sets the default attention implementation. + """ + if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnAddedKVProcessor() + elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnProcessor() + else: + raise ValueError( + f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}" + ) + + self.set_attn_processor(processor) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_attention_slice + def set_attention_slice(self, slice_size: str | int | list[int]) -> None: + r""" + Enable sliced attention computation. + + When this option is enabled, the attention module splits the input tensor in slices to compute attention in + several steps. This is useful for saving some memory in exchange for a small decrease in speed. + + Args: + slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`): + When `"auto"`, input to the attention heads is halved, so attention is computed in two steps. If + `"max"`, maximum amount of memory is saved by running only one slice at a time. If a number is + provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim` + must be a multiple of `slice_size`. + """ + sliceable_head_dims = [] + + def fn_recursive_retrieve_sliceable_dims(module: torch.nn.Module): + if hasattr(module, "set_attention_slice"): + sliceable_head_dims.append(module.sliceable_head_dim) + + for child in module.children(): + fn_recursive_retrieve_sliceable_dims(child) + + # retrieve number of attention layers + for module in self.children(): + fn_recursive_retrieve_sliceable_dims(module) + + num_sliceable_layers = len(sliceable_head_dims) + + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = [dim // 2 for dim in sliceable_head_dims] + elif slice_size == "max": + # make smallest slice possible + slice_size = num_sliceable_layers * [1] + + slice_size = num_sliceable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size + + if len(slice_size) != len(sliceable_head_dims): + raise ValueError( + f"You have provided {len(slice_size)}, but {self.config} has {len(sliceable_head_dims)} different" + f" attention layers. Make sure to match `len(slice_size)` to be {len(sliceable_head_dims)}." + ) + + for i in range(len(slice_size)): + size = slice_size[i] + dim = sliceable_head_dims[i] + if size is not None and size > dim: + raise ValueError(f"size {size} has to be smaller or equal to {dim}.") + + # Recursively walk through all the children. + # Any children which exposes the set_attention_slice method + # gets the message + def fn_recursive_set_attention_slice(module: torch.nn.Module, slice_size: list[int]): + if hasattr(module, "set_attention_slice"): + module.set_attention_slice(slice_size.pop()) + + for child in module.children(): + fn_recursive_set_attention_slice(child, slice_size) + + reversed_slice_size = list(reversed(slice_size)) + for module in self.children(): + fn_recursive_set_attention_slice(module, reversed_slice_size) + + @apply_lora_scale("cross_attention_kwargs") + def forward( + self, + sample: torch.Tensor, + timestep: torch.Tensor | float | int, + encoder_hidden_states: torch.Tensor, + controlnet_cond: torch.Tensor, + conditioning_scale: float = 1.0, + class_labels: torch.Tensor | None = None, + timestep_cond: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + added_cond_kwargs: dict[str, torch.Tensor] | None = None, + cross_attention_kwargs: dict[str, Any] | None = None, + guess_mode: bool = False, + return_dict: bool = True, + ) -> ControlNetOutput | tuple[tuple[torch.Tensor, ...], torch.Tensor]: + """ + The [`ControlNetModel`] forward method. + + Args: + sample (`torch.Tensor`): + The noisy input tensor. + timestep (`torch.Tensor | float | int`): + The number of timesteps to denoise an input. + encoder_hidden_states (`torch.Tensor`): + The encoder hidden states. + controlnet_cond (`torch.Tensor`): + The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`. + conditioning_scale (`float`, defaults to `1.0`): + The scale factor for ControlNet outputs. + class_labels (`torch.Tensor`, *optional*, defaults to `None`): + Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings. + timestep_cond (`torch.Tensor`, *optional*, defaults to `None`): + Additional conditional embeddings for timestep. If provided, the embeddings will be summed with the + timestep_embedding passed through the `self.time_embedding` layer to obtain the final timestep + embeddings. + attention_mask (`torch.Tensor`, *optional*, defaults to `None`): + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask + is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large + negative values to the attention scores corresponding to "discard" tokens. + added_cond_kwargs (`dict`): + Additional conditions for the Stable Diffusion XL UNet. + cross_attention_kwargs (`dict[str]`, *optional*, defaults to `None`): + A kwargs dictionary that if specified is passed along to the `AttnProcessor`. + guess_mode (`bool`, defaults to `False`): + In this mode, the ControlNet encoder tries its best to recognize the input content of the input even if + you remove all prompts. A `guidance_scale` between 3.0 and 5.0 is recommended. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~models.controlnets.controlnet.ControlNetOutput`] instead of a plain + tuple. + + Returns: + [`~models.controlnets.controlnet.ControlNetOutput`] **or** `tuple`: + If `return_dict` is `True`, a [`~models.controlnets.controlnet.ControlNetOutput`] is returned, + otherwise a tuple is returned where the first element is the sample tensor. + """ + # check channel order + channel_order = self.config.controlnet_conditioning_channel_order + + if channel_order == "rgb": + # in rgb order by default + ... + elif channel_order == "bgr": + controlnet_cond = torch.flip(controlnet_cond, dims=[1]) + else: + raise ValueError(f"unknown `controlnet_conditioning_channel_order`: {channel_order}") + + # prepare attention_mask + if attention_mask is not None: + attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # 1. time + timesteps = timestep + if not torch.is_tensor(timesteps): + # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can + # This would be a good case for the `match` statement (Python 3.10+) + is_mps = sample.device.type == "mps" + is_npu = sample.device.type == "npu" + if isinstance(timestep, float): + dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + else: + dtype = torch.int32 if (is_mps or is_npu) else torch.int64 + timesteps = torch.tensor([timesteps], dtype=dtype, device=sample.device) + elif len(timesteps.shape) == 0: + timesteps = timesteps[None].to(sample.device) + + # broadcast to batch dimension in a way that's compatible with ONNX/Core ML + timesteps = timesteps.expand(sample.shape[0]) + + t_emb = self.time_proj(timesteps) + + # timesteps does not contain any weights and will always return f32 tensors + # but time_embedding might actually be running in fp16. so we need to cast here. + # there might be better ways to encapsulate this. + t_emb = t_emb.to(dtype=sample.dtype) + + emb = self.time_embedding(t_emb, timestep_cond) + aug_emb = None + + if self.class_embedding is not None: + if class_labels is None: + raise ValueError("class_labels should be provided when num_class_embeds > 0") + + if self.config.class_embed_type == "timestep": + class_labels = self.time_proj(class_labels) + + class_emb = self.class_embedding(class_labels).to(dtype=self.dtype) + emb = emb + class_emb + + if self.config.addition_embed_type is not None: + if self.config.addition_embed_type == "text": + aug_emb = self.add_embedding(encoder_hidden_states) + + elif self.config.addition_embed_type == "text_time": + if "text_embeds" not in added_cond_kwargs: + raise ValueError( + f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`" + ) + text_embeds = added_cond_kwargs.get("text_embeds") + if "time_ids" not in added_cond_kwargs: + raise ValueError( + f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`" + ) + time_ids = added_cond_kwargs.get("time_ids") + time_embeds = self.add_time_proj(time_ids.flatten()) + time_embeds = time_embeds.reshape((text_embeds.shape[0], -1)) + + add_embeds = torch.concat([text_embeds, time_embeds], dim=-1) + add_embeds = add_embeds.to(emb.dtype) + aug_emb = self.add_embedding(add_embeds) + + emb = emb + aug_emb if aug_emb is not None else emb + + # 2. pre-process + sample = self.conv_in(sample) + + controlnet_cond = self.controlnet_cond_embedding(controlnet_cond) + sample = sample + controlnet_cond + + # 3. down + down_block_res_samples = (sample,) + for downsample_block in self.down_blocks: + if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention: + sample, res_samples = downsample_block( + hidden_states=sample, + temb=emb, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + cross_attention_kwargs=cross_attention_kwargs, + ) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + if self.mid_block is not None: + if hasattr(self.mid_block, "has_cross_attention") and self.mid_block.has_cross_attention: + sample = self.mid_block( + sample, + emb, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + cross_attention_kwargs=cross_attention_kwargs, + ) + else: + sample = self.mid_block(sample, emb) + + # 5. Control net blocks + + controlnet_down_block_res_samples = () + + for down_block_res_sample, controlnet_block in zip(down_block_res_samples, self.controlnet_down_blocks): + down_block_res_sample = controlnet_block(down_block_res_sample) + controlnet_down_block_res_samples = controlnet_down_block_res_samples + (down_block_res_sample,) + + down_block_res_samples = controlnet_down_block_res_samples + + mid_block_res_sample = self.controlnet_mid_block(sample) + + # 6. scaling + if guess_mode and not self.config.global_pool_conditions: + scales = torch.logspace(-1, 0, len(down_block_res_samples) + 1, device=sample.device) # 0.1 to 1.0 + scales = scales * conditioning_scale + down_block_res_samples = [sample * scale for sample, scale in zip(down_block_res_samples, scales)] + mid_block_res_sample = mid_block_res_sample * scales[-1] # last one + else: + down_block_res_samples = [sample * conditioning_scale for sample in down_block_res_samples] + mid_block_res_sample = mid_block_res_sample * conditioning_scale + + if self.config.global_pool_conditions: + down_block_res_samples = [ + torch.mean(sample, dim=(2, 3), keepdim=True) for sample in down_block_res_samples + ] + mid_block_res_sample = torch.mean(mid_block_res_sample, dim=(2, 3), keepdim=True) + + if not return_dict: + return (down_block_res_samples, mid_block_res_sample) + + return ControlNetOutput( + down_block_res_samples=down_block_res_samples, mid_block_res_sample=mid_block_res_sample + ) + + +def zero_module(module): + for p in module.parameters(): + nn.init.zeros_(p) + return module diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_cosmos.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_cosmos.py new file mode 100644 index 0000000000000000000000000000000000000000..e39f8dfb568a02a74f076ab9212af25b8a59f816 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_cosmos.py @@ -0,0 +1,317 @@ +from dataclasses import dataclass +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin +from ...utils import BaseOutput, is_torchvision_available, logging +from ..modeling_utils import ModelMixin +from ..transformers.transformer_cosmos import ( + CosmosEmbedding, + CosmosLearnablePositionalEmbed, + CosmosPatchEmbed, + CosmosRotaryPosEmbed, + CosmosTransformerBlock, +) + + +if is_torchvision_available(): + from torchvision import transforms + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class CosmosControlNetOutput(BaseOutput): + """ + Output of [`CosmosControlNetModel`]. + + Args: + control_block_samples (`list[torch.Tensor]`): + List of control block activations to be injected into transformer blocks. + """ + + control_block_samples: List[torch.Tensor] + + +class CosmosControlNetModel(ModelMixin, ConfigMixin, FromOriginalModelMixin): + r""" + ControlNet for Cosmos Transfer2.5. + + This model duplicates the shared embedding modules from the transformer (patch_embed, time_embed, + learnable_pos_embed, img_context_proj) to enable proper CPU offloading. The forward() method computes everything + internally from raw inputs. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["patch_embed", "patch_embed_base", "time_embed"] + _no_split_modules = ["CosmosTransformerBlock"] + _keep_in_fp32_modules = ["learnable_pos_embed"] + + @register_to_config + def __init__( + self, + n_controlnet_blocks: int = 4, + in_channels: int = 130, + latent_channels: int = 18, # base latent channels (latents + condition_mask) + padding_mask + model_channels: int = 2048, + num_attention_heads: int = 32, + attention_head_dim: int = 128, + mlp_ratio: float = 4.0, + text_embed_dim: int = 1024, + adaln_lora_dim: int = 256, + patch_size: Tuple[int, int, int] = (1, 2, 2), + max_size: Tuple[int, int, int] = (128, 240, 240), + rope_scale: Tuple[float, float, float] = (2.0, 1.0, 1.0), + extra_pos_embed_type: str | None = None, + img_context_dim_in: int | None = None, + img_context_dim_out: int = 2048, + use_crossattn_projection: bool = False, + crossattn_proj_in_channels: int = 1024, + encoder_hidden_states_channels: int = 1024, + ): + super().__init__() + + self.patch_embed = CosmosPatchEmbed(in_channels, model_channels, patch_size, bias=False) + + self.patch_embed_base = CosmosPatchEmbed(latent_channels, model_channels, patch_size, bias=False) + self.time_embed = CosmosEmbedding(model_channels, model_channels) + + self.learnable_pos_embed = None + if extra_pos_embed_type == "learnable": + self.learnable_pos_embed = CosmosLearnablePositionalEmbed( + hidden_size=model_channels, + max_size=max_size, + patch_size=patch_size, + ) + + self.img_context_proj = None + if img_context_dim_in is not None and img_context_dim_in > 0: + self.img_context_proj = nn.Sequential( + nn.Linear(img_context_dim_in, img_context_dim_out, bias=True), + nn.GELU(), + ) + + # Cross-attention projection for text embeddings (same as transformer) + self.crossattn_proj = None + if use_crossattn_projection: + self.crossattn_proj = nn.Sequential( + nn.Linear(crossattn_proj_in_channels, encoder_hidden_states_channels, bias=True), + nn.GELU(), + ) + + # RoPE for both control and base latents + self.rope = CosmosRotaryPosEmbed( + hidden_size=attention_head_dim, max_size=max_size, patch_size=patch_size, rope_scale=rope_scale + ) + + self.control_blocks = nn.ModuleList( + [ + CosmosTransformerBlock( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + cross_attention_dim=text_embed_dim, + mlp_ratio=mlp_ratio, + adaln_lora_dim=adaln_lora_dim, + qk_norm="rms_norm", + out_bias=False, + img_context=img_context_dim_in is not None and img_context_dim_in > 0, + before_proj=(block_idx == 0), + after_proj=True, + ) + for block_idx in range(n_controlnet_blocks) + ] + ) + + self.gradient_checkpointing = False + + def _expand_conditioning_scale(self, conditioning_scale: float | list[float]) -> List[float]: + if isinstance(conditioning_scale, list): + scales = conditioning_scale + else: + scales = [conditioning_scale] * len(self.control_blocks) + + if len(scales) < len(self.control_blocks): + logger.warning( + "Received %d control scales, but control network defines %d blocks. " + "Scales will be trimmed or repeated to match.", + len(scales), + len(self.control_blocks), + ) + scales = (scales * len(self.control_blocks))[: len(self.control_blocks)] + return scales + + def forward( + self, + controls_latents: torch.Tensor, + latents: torch.Tensor, + timestep: torch.Tensor, + encoder_hidden_states: Union[Optional[torch.Tensor], Tuple[Optional[torch.Tensor], Optional[torch.Tensor]]], + condition_mask: torch.Tensor, + conditioning_scale: float | list[float] = 1.0, + padding_mask: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + fps: int | None = None, + return_dict: bool = True, + ) -> Union[CosmosControlNetOutput, Tuple[List[torch.Tensor]]]: + """ + Forward pass for the ControlNet. + + Args: + controls_latents: Control signal latents [B, C, T, H, W] + latents: Base latents from the noising process [B, C, T, H, W] + timestep: Diffusion timestep tensor + encoder_hidden_states: Tuple of (text_context, img_context) or text_context + condition_mask: Conditioning mask [B, 1, T, H, W] + conditioning_scale: Scale factor(s) for control outputs + padding_mask: Padding mask [B, 1, H, W] or None + attention_mask: Optional attention mask or None + fps: Frames per second for RoPE or None + return_dict: Whether to return a CosmosControlNetOutput or a tuple + + Returns: + CosmosControlNetOutput or tuple of control tensors + """ + B, C, T, H, W = controls_latents.shape + + # 1. Prepare control latents + control_hidden_states = controls_latents + vace_in_channels = self.config.in_channels - 1 + if control_hidden_states.shape[1] < vace_in_channels - 1: + pad_C = vace_in_channels - 1 - control_hidden_states.shape[1] + control_hidden_states = torch.cat( + [ + control_hidden_states, + torch.zeros( + (B, pad_C, T, H, W), dtype=control_hidden_states.dtype, device=control_hidden_states.device + ), + ], + dim=1, + ) + + if condition_mask is not None: + control_hidden_states = torch.cat([control_hidden_states, condition_mask], dim=1) + else: + control_hidden_states = torch.cat( + [control_hidden_states, torch.zeros_like(controls_latents[:, :1])], dim=1 + ) + + padding_mask_resized = transforms.functional.resize( + padding_mask, list(control_hidden_states.shape[-2:]), interpolation=transforms.InterpolationMode.NEAREST + ) + control_hidden_states = torch.cat( + [control_hidden_states, padding_mask_resized.unsqueeze(2).repeat(B, 1, T, 1, 1)], dim=1 + ) + + # 2. Prepare base latents (same processing as transformer.forward) + base_hidden_states = latents + if condition_mask is not None: + base_hidden_states = torch.cat([base_hidden_states, condition_mask], dim=1) + + base_padding_mask = transforms.functional.resize( + padding_mask, list(base_hidden_states.shape[-2:]), interpolation=transforms.InterpolationMode.NEAREST + ) + base_hidden_states = torch.cat( + [base_hidden_states, base_padding_mask.unsqueeze(2).repeat(B, 1, T, 1, 1)], dim=1 + ) + + # 3. Generate positional embeddings (shared for both) + image_rotary_emb = self.rope(control_hidden_states, fps=fps) + extra_pos_emb = self.learnable_pos_embed(control_hidden_states) if self.learnable_pos_embed else None + + # 4. Patchify control latents + control_hidden_states = self.patch_embed(control_hidden_states) + control_hidden_states = control_hidden_states.flatten(1, 3) + + # 5. Patchify base latents + p_t, p_h, p_w = self.config.patch_size + post_patch_num_frames = T // p_t + post_patch_height = H // p_h + post_patch_width = W // p_w + + base_hidden_states = self.patch_embed_base(base_hidden_states) + base_hidden_states = base_hidden_states.flatten(1, 3) + + # 6. Time embeddings + if timestep.ndim == 1: + temb, embedded_timestep = self.time_embed(base_hidden_states, timestep) + elif timestep.ndim == 5: + batch_size, _, num_frames, _, _ = latents.shape + assert timestep.shape == (batch_size, 1, num_frames, 1, 1), ( + f"Expected timestep to have shape [B, 1, T, 1, 1], but got {timestep.shape}" + ) + timestep_flat = timestep.flatten() + temb, embedded_timestep = self.time_embed(base_hidden_states, timestep_flat) + temb, embedded_timestep = ( + x.view(batch_size, post_patch_num_frames, 1, 1, -1) + .expand(-1, -1, post_patch_height, post_patch_width, -1) + .flatten(1, 3) + for x in (temb, embedded_timestep) + ) + else: + raise ValueError(f"Expected timestep to have shape [B, 1, T, 1, 1] or [T], but got {timestep.shape}") + + # 7. Process encoder hidden states + if isinstance(encoder_hidden_states, tuple): + text_context, img_context = encoder_hidden_states + else: + text_context = encoder_hidden_states + img_context = None + + # Apply cross-attention projection to text context + if self.crossattn_proj is not None: + text_context = self.crossattn_proj(text_context) + + # Apply cross-attention projection to image context (if provided) + if img_context is not None and self.img_context_proj is not None: + img_context = self.img_context_proj(img_context) + + # Combine text and image context into a single tuple + if self.config.img_context_dim_in is not None and self.config.img_context_dim_in > 0: + processed_encoder_hidden_states = (text_context, img_context) + else: + processed_encoder_hidden_states = text_context + + # 8. Prepare attention mask + if attention_mask is not None: + attention_mask = attention_mask.unsqueeze(1).unsqueeze(1) # [B, 1, 1, S] + + # 9. Run control blocks + scales = self._expand_conditioning_scale(conditioning_scale) + result = [] + for block_idx, (block, scale) in enumerate(zip(self.control_blocks, scales)): + if torch.is_grad_enabled() and self.gradient_checkpointing: + control_hidden_states, control_proj = self._gradient_checkpointing_func( + block, + control_hidden_states, + processed_encoder_hidden_states, + embedded_timestep, + temb, + image_rotary_emb, + extra_pos_emb, + attention_mask, + None, # controlnet_residual + base_hidden_states, + block_idx, + ) + else: + control_hidden_states, control_proj = block( + hidden_states=control_hidden_states, + encoder_hidden_states=processed_encoder_hidden_states, + embedded_timestep=embedded_timestep, + temb=temb, + image_rotary_emb=image_rotary_emb, + extra_pos_emb=extra_pos_emb, + attention_mask=attention_mask, + controlnet_residual=None, + latents=base_hidden_states, + block_idx=block_idx, + ) + result.append(control_proj * scale) + + if not return_dict: + return (result,) + + return CosmosControlNetOutput(control_block_samples=result) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..5bc1a446338ecd7492027fb9c3980470f293c33f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_flax.py @@ -0,0 +1,406 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import flax +import flax.linen as nn +import jax +import jax.numpy as jnp +from flax.core.frozen_dict import FrozenDict + +from ...configuration_utils import ConfigMixin, flax_register_to_config +from ...utils import BaseOutput, logging +from ..embeddings_flax import FlaxTimestepEmbedding, FlaxTimesteps +from ..modeling_flax_utils import FlaxModelMixin +from ..unets.unet_2d_blocks_flax import ( + FlaxCrossAttnDownBlock2D, + FlaxDownBlock2D, + FlaxUNetMidBlock2DCrossAttn, +) + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class FlaxControlNetOutput(BaseOutput): + """ + The output of [`FlaxControlNetModel`]. + + Args: + down_block_res_samples (`jnp.ndarray`): + mid_block_res_sample (`jnp.ndarray`): + """ + + down_block_res_samples: jnp.ndarray + mid_block_res_sample: jnp.ndarray + + +class FlaxControlNetConditioningEmbedding(nn.Module): + conditioning_embedding_channels: int + block_out_channels: tuple[int, ...] = (16, 32, 96, 256) + dtype: jnp.dtype = jnp.float32 + + def setup(self) -> None: + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + self.conv_in = nn.Conv( + self.block_out_channels[0], + kernel_size=(3, 3), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + blocks = [] + for i in range(len(self.block_out_channels) - 1): + channel_in = self.block_out_channels[i] + channel_out = self.block_out_channels[i + 1] + conv1 = nn.Conv( + channel_in, + kernel_size=(3, 3), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + blocks.append(conv1) + conv2 = nn.Conv( + channel_out, + kernel_size=(3, 3), + strides=(2, 2), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + blocks.append(conv2) + self.blocks = blocks + + self.conv_out = nn.Conv( + self.conditioning_embedding_channels, + kernel_size=(3, 3), + padding=((1, 1), (1, 1)), + kernel_init=nn.initializers.zeros_init(), + bias_init=nn.initializers.zeros_init(), + dtype=self.dtype, + ) + + def __call__(self, conditioning: jnp.ndarray) -> jnp.ndarray: + embedding = self.conv_in(conditioning) + embedding = nn.silu(embedding) + + for block in self.blocks: + embedding = block(embedding) + embedding = nn.silu(embedding) + + embedding = self.conv_out(embedding) + + return embedding + + +@flax_register_to_config +class FlaxControlNetModel(nn.Module, FlaxModelMixin, ConfigMixin): + r""" + A ControlNet model. + + This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it’s generic methods + implemented for all models (such as downloading or saving). + + This model is also a Flax Linen [`flax.linen.Module`](https://flax.readthedocs.io/en/latest/flax.linen.html#module) + subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matters related to its + general usage and behavior. + + Inherent JAX features such as the following are supported: + + - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit) + - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation) + - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap) + - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap) + + Parameters: + sample_size (`int`, *optional*): + The size of the input sample. + in_channels (`int`, *optional*, defaults to 4): + The number of channels in the input sample. + down_block_types (`tuple[str]`, *optional*, defaults to `("FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxCrossAttnDownBlock2D", "FlaxDownBlock2D")`): + The tuple of downsample blocks to use. + block_out_channels (`tuple[int]`, *optional*, defaults to `(320, 640, 1280, 1280)`): + The tuple of output channels for each block. + layers_per_block (`int`, *optional*, defaults to 2): + The number of layers per block. + attention_head_dim (`int` or `tuple[int]`, *optional*, defaults to 8): + The dimension of the attention heads. + num_attention_heads (`int` or `tuple[int]`, *optional*): + The number of attention heads. + cross_attention_dim (`int`, *optional*, defaults to 768): + The dimension of the cross attention features. + dropout (`float`, *optional*, defaults to 0): + Dropout probability for down, up and bottleneck blocks. + flip_sin_to_cos (`bool`, *optional*, defaults to `True`): + Whether to flip the sin to cos in the time embedding. + freq_shift (`int`, *optional*, defaults to 0): The frequency shift to apply to the time embedding. + controlnet_conditioning_channel_order (`str`, *optional*, defaults to `rgb`): + The channel order of conditional image. Will convert to `rgb` if it's `bgr`. + conditioning_embedding_out_channels (`tuple`, *optional*, defaults to `(16, 32, 96, 256)`): + The tuple of output channel for each block in the `conditioning_embedding` layer. + """ + + sample_size: int = 32 + in_channels: int = 4 + down_block_types: tuple[str, ...] = ( + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "DownBlock2D", + ) + only_cross_attention: bool | tuple[bool, ...] = False + block_out_channels: tuple[int, ...] = (320, 640, 1280, 1280) + layers_per_block: int = 2 + attention_head_dim: int | tuple[int, ...] = 8 + num_attention_heads: int | tuple[int, ...] | None = None + cross_attention_dim: int = 1280 + dropout: float = 0.0 + use_linear_projection: bool = False + dtype: jnp.dtype = jnp.float32 + flip_sin_to_cos: bool = True + freq_shift: int = 0 + controlnet_conditioning_channel_order: str = "rgb" + conditioning_embedding_out_channels: tuple[int, ...] = (16, 32, 96, 256) + + def init_weights(self, rng: jax.Array) -> FrozenDict: + # init input tensors + sample_shape = (1, self.in_channels, self.sample_size, self.sample_size) + sample = jnp.zeros(sample_shape, dtype=jnp.float32) + timesteps = jnp.ones((1,), dtype=jnp.int32) + encoder_hidden_states = jnp.zeros((1, 1, self.cross_attention_dim), dtype=jnp.float32) + controlnet_cond_shape = (1, 3, self.sample_size * 8, self.sample_size * 8) + controlnet_cond = jnp.zeros(controlnet_cond_shape, dtype=jnp.float32) + + params_rng, dropout_rng = jax.random.split(rng) + rngs = {"params": params_rng, "dropout": dropout_rng} + + return self.init(rngs, sample, timesteps, encoder_hidden_states, controlnet_cond)["params"] + + def setup(self) -> None: + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + block_out_channels = self.block_out_channels + time_embed_dim = block_out_channels[0] * 4 + + # If `num_attention_heads` is not defined (which is the case for most models) + # it will default to `attention_head_dim`. This looks weird upon first reading it and it is. + # The reason for this behavior is to correct for incorrectly named variables that were introduced + # when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 + # Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking + # which is why we correct for the naming here. + num_attention_heads = self.num_attention_heads or self.attention_head_dim + + # input + self.conv_in = nn.Conv( + block_out_channels[0], + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + # time + self.time_proj = FlaxTimesteps( + block_out_channels[0], flip_sin_to_cos=self.flip_sin_to_cos, freq_shift=self.config.freq_shift + ) + self.time_embedding = FlaxTimestepEmbedding(time_embed_dim, dtype=self.dtype) + + self.controlnet_cond_embedding = FlaxControlNetConditioningEmbedding( + conditioning_embedding_channels=block_out_channels[0], + block_out_channels=self.conditioning_embedding_out_channels, + ) + + only_cross_attention = self.only_cross_attention + if isinstance(only_cross_attention, bool): + only_cross_attention = (only_cross_attention,) * len(self.down_block_types) + + if isinstance(num_attention_heads, int): + num_attention_heads = (num_attention_heads,) * len(self.down_block_types) + + # down + down_blocks = [] + controlnet_down_blocks = [] + + output_channel = block_out_channels[0] + + controlnet_block = nn.Conv( + output_channel, + kernel_size=(1, 1), + padding="VALID", + kernel_init=nn.initializers.zeros_init(), + bias_init=nn.initializers.zeros_init(), + dtype=self.dtype, + ) + controlnet_down_blocks.append(controlnet_block) + + for i, down_block_type in enumerate(self.down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + if down_block_type == "CrossAttnDownBlock2D": + down_block = FlaxCrossAttnDownBlock2D( + in_channels=input_channel, + out_channels=output_channel, + dropout=self.dropout, + num_layers=self.layers_per_block, + num_attention_heads=num_attention_heads[i], + add_downsample=not is_final_block, + use_linear_projection=self.use_linear_projection, + only_cross_attention=only_cross_attention[i], + dtype=self.dtype, + ) + else: + down_block = FlaxDownBlock2D( + in_channels=input_channel, + out_channels=output_channel, + dropout=self.dropout, + num_layers=self.layers_per_block, + add_downsample=not is_final_block, + dtype=self.dtype, + ) + + down_blocks.append(down_block) + + for _ in range(self.layers_per_block): + controlnet_block = nn.Conv( + output_channel, + kernel_size=(1, 1), + padding="VALID", + kernel_init=nn.initializers.zeros_init(), + bias_init=nn.initializers.zeros_init(), + dtype=self.dtype, + ) + controlnet_down_blocks.append(controlnet_block) + + if not is_final_block: + controlnet_block = nn.Conv( + output_channel, + kernel_size=(1, 1), + padding="VALID", + kernel_init=nn.initializers.zeros_init(), + bias_init=nn.initializers.zeros_init(), + dtype=self.dtype, + ) + controlnet_down_blocks.append(controlnet_block) + + self.down_blocks = down_blocks + self.controlnet_down_blocks = controlnet_down_blocks + + # mid + mid_block_channel = block_out_channels[-1] + self.mid_block = FlaxUNetMidBlock2DCrossAttn( + in_channels=mid_block_channel, + dropout=self.dropout, + num_attention_heads=num_attention_heads[-1], + use_linear_projection=self.use_linear_projection, + dtype=self.dtype, + ) + + self.controlnet_mid_block = nn.Conv( + mid_block_channel, + kernel_size=(1, 1), + padding="VALID", + kernel_init=nn.initializers.zeros_init(), + bias_init=nn.initializers.zeros_init(), + dtype=self.dtype, + ) + + def __call__( + self, + sample: jnp.ndarray, + timesteps: jnp.ndarray | float | int, + encoder_hidden_states: jnp.ndarray, + controlnet_cond: jnp.ndarray, + conditioning_scale: float = 1.0, + return_dict: bool = True, + train: bool = False, + ) -> FlaxControlNetOutput | tuple[tuple[jnp.ndarray, ...], jnp.ndarray]: + r""" + Args: + sample (`jnp.ndarray`): (batch, channel, height, width) noisy inputs tensor + timestep (`jnp.ndarray` or `float` or `int`): timesteps + encoder_hidden_states (`jnp.ndarray`): (batch_size, sequence_length, hidden_size) encoder hidden states + controlnet_cond (`jnp.ndarray`): (batch, channel, height, width) the conditional input tensor + conditioning_scale (`float`, *optional*, defaults to `1.0`): the scale factor for controlnet outputs + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] instead of + a plain tuple. + train (`bool`, *optional*, defaults to `False`): + Use deterministic functions and disable dropout when not training. + + Returns: + [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] or `tuple`: + [`~models.unets.unet_2d_condition_flax.FlaxUNet2DConditionOutput`] if `return_dict` is True, otherwise + a `tuple`. When returning a tuple, the first element is the sample tensor. + """ + channel_order = self.controlnet_conditioning_channel_order + if channel_order == "bgr": + controlnet_cond = jnp.flip(controlnet_cond, axis=1) + + # 1. time + if not isinstance(timesteps, jnp.ndarray): + timesteps = jnp.array([timesteps], dtype=jnp.int32) + elif isinstance(timesteps, jnp.ndarray) and len(timesteps.shape) == 0: + timesteps = timesteps.astype(dtype=jnp.float32) + timesteps = jnp.expand_dims(timesteps, 0) + + t_emb = self.time_proj(timesteps) + t_emb = self.time_embedding(t_emb) + + # 2. pre-process + sample = jnp.transpose(sample, (0, 2, 3, 1)) + sample = self.conv_in(sample) + + controlnet_cond = jnp.transpose(controlnet_cond, (0, 2, 3, 1)) + controlnet_cond = self.controlnet_cond_embedding(controlnet_cond) + sample += controlnet_cond + + # 3. down + down_block_res_samples = (sample,) + for down_block in self.down_blocks: + if isinstance(down_block, FlaxCrossAttnDownBlock2D): + sample, res_samples = down_block(sample, t_emb, encoder_hidden_states, deterministic=not train) + else: + sample, res_samples = down_block(sample, t_emb, deterministic=not train) + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, t_emb, encoder_hidden_states, deterministic=not train) + + # 5. contronet blocks + controlnet_down_block_res_samples = () + for down_block_res_sample, controlnet_block in zip(down_block_res_samples, self.controlnet_down_blocks): + down_block_res_sample = controlnet_block(down_block_res_sample) + controlnet_down_block_res_samples += (down_block_res_sample,) + + down_block_res_samples = controlnet_down_block_res_samples + + mid_block_res_sample = self.controlnet_mid_block(sample) + + # 6. scaling + down_block_res_samples = [sample * conditioning_scale for sample in down_block_res_samples] + mid_block_res_sample *= conditioning_scale + + if not return_dict: + return (down_block_res_samples, mid_block_res_sample) + + return FlaxControlNetOutput( + down_block_res_samples=down_block_res_samples, mid_block_res_sample=mid_block_res_sample + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_flux.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_flux.py new file mode 100644 index 0000000000000000000000000000000000000000..56482c299c054ee8067dcf29a4926abcbf54c22c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_flux.py @@ -0,0 +1,436 @@ +# Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import ( + BaseOutput, + apply_lora_scale, + logging, +) +from ..attention import AttentionMixin +from ..controlnets.controlnet import ControlNetConditioningEmbedding, zero_module +from ..embeddings import CombinedTimestepGuidanceTextProjEmbeddings, CombinedTimestepTextProjEmbeddings, FluxPosEmbed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..transformers.transformer_flux import FluxSingleTransformerBlock, FluxTransformerBlock + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class FluxControlNetOutput(BaseOutput): + controlnet_block_samples: tuple[torch.Tensor] + controlnet_single_block_samples: tuple[torch.Tensor] + + +class FluxControlNetModel(ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin): + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + patch_size: int = 1, + in_channels: int = 64, + num_layers: int = 19, + num_single_layers: int = 38, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 4096, + pooled_projection_dim: int = 768, + guidance_embeds: bool = False, + axes_dims_rope: list[int] = [16, 56, 56], + num_mode: int = None, + conditioning_embedding_channels: int = None, + ): + super().__init__() + self.out_channels = in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + self.pos_embed = FluxPosEmbed(theta=10000, axes_dim=axes_dims_rope) + text_time_guidance_cls = ( + CombinedTimestepGuidanceTextProjEmbeddings if guidance_embeds else CombinedTimestepTextProjEmbeddings + ) + self.time_text_embed = text_time_guidance_cls( + embedding_dim=self.inner_dim, pooled_projection_dim=pooled_projection_dim + ) + + self.context_embedder = nn.Linear(joint_attention_dim, self.inner_dim) + self.x_embedder = torch.nn.Linear(in_channels, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + FluxTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for i in range(num_layers) + ] + ) + + self.single_transformer_blocks = nn.ModuleList( + [ + FluxSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for i in range(num_single_layers) + ] + ) + + # controlnet_blocks + self.controlnet_blocks = nn.ModuleList([]) + for _ in range(len(self.transformer_blocks)): + self.controlnet_blocks.append(zero_module(nn.Linear(self.inner_dim, self.inner_dim))) + + self.controlnet_single_blocks = nn.ModuleList([]) + for _ in range(len(self.single_transformer_blocks)): + self.controlnet_single_blocks.append(zero_module(nn.Linear(self.inner_dim, self.inner_dim))) + + self.union = num_mode is not None + if self.union: + self.controlnet_mode_embedder = nn.Embedding(num_mode, self.inner_dim) + + if conditioning_embedding_channels is not None: + self.input_hint_block = ControlNetConditioningEmbedding( + conditioning_embedding_channels=conditioning_embedding_channels, block_out_channels=(16, 16, 16, 16) + ) + self.controlnet_x_embedder = torch.nn.Linear(in_channels, self.inner_dim) + else: + self.input_hint_block = None + self.controlnet_x_embedder = zero_module(torch.nn.Linear(in_channels, self.inner_dim)) + + self.gradient_checkpointing = False + + @classmethod + def from_transformer( + cls, + transformer, + num_layers: int = 4, + num_single_layers: int = 10, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + load_weights_from_transformer=True, + ): + config = dict(transformer.config) + config["num_layers"] = num_layers + config["num_single_layers"] = num_single_layers + config["attention_head_dim"] = attention_head_dim + config["num_attention_heads"] = num_attention_heads + + controlnet = cls.from_config(config) + + if load_weights_from_transformer: + controlnet.pos_embed.load_state_dict(transformer.pos_embed.state_dict()) + controlnet.time_text_embed.load_state_dict(transformer.time_text_embed.state_dict()) + controlnet.context_embedder.load_state_dict(transformer.context_embedder.state_dict()) + controlnet.x_embedder.load_state_dict(transformer.x_embedder.state_dict()) + controlnet.transformer_blocks.load_state_dict(transformer.transformer_blocks.state_dict(), strict=False) + controlnet.single_transformer_blocks.load_state_dict( + transformer.single_transformer_blocks.state_dict(), strict=False + ) + + controlnet.controlnet_x_embedder = zero_module(controlnet.controlnet_x_embedder) + + return controlnet + + @apply_lora_scale("joint_attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + controlnet_cond: torch.Tensor, + controlnet_mode: torch.Tensor = None, + conditioning_scale: float = 1.0, + encoder_hidden_states: torch.Tensor = None, + pooled_projections: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + guidance: torch.Tensor = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.FloatTensor | Transformer2DModelOutput: + """ + The [`FluxTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`): + Input `hidden_states`. + controlnet_cond (`torch.Tensor`): + The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`. + controlnet_mode (`torch.Tensor`): + The mode tensor of shape `(batch_size, 1)`. + conditioning_scale (`float`, defaults to `1.0`): + The scale factor for ControlNet outputs. + encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected + from the embeddings of input conditions. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + block_controlnet_hidden_states: (`list` of `torch.Tensor`): + A list of tensors that if specified are added to the residuals of transformer blocks. + joint_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + hidden_states = self.x_embedder(hidden_states) + + if self.input_hint_block is not None: + controlnet_cond = self.input_hint_block(controlnet_cond) + batch_size, channels, height_pw, width_pw = controlnet_cond.shape + height = height_pw // self.config.patch_size + width = width_pw // self.config.patch_size + controlnet_cond = controlnet_cond.reshape( + batch_size, channels, height, self.config.patch_size, width, self.config.patch_size + ) + controlnet_cond = controlnet_cond.permute(0, 2, 4, 1, 3, 5) + controlnet_cond = controlnet_cond.reshape(batch_size, height * width, -1) + # add + hidden_states = hidden_states + self.controlnet_x_embedder(controlnet_cond) + + timestep = timestep.to(hidden_states.dtype) * 1000 + if guidance is not None: + guidance = guidance.to(hidden_states.dtype) * 1000 + else: + guidance = None + temb = ( + self.time_text_embed(timestep, pooled_projections) + if guidance is None + else self.time_text_embed(timestep, guidance, pooled_projections) + ) + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + if txt_ids.ndim == 3: + logger.warning( + "Passing `txt_ids` 3d torch.Tensor is deprecated." + "Please remove the batch dimension and pass it as a 2d torch Tensor" + ) + txt_ids = txt_ids[0] + if img_ids.ndim == 3: + logger.warning( + "Passing `img_ids` 3d torch.Tensor is deprecated." + "Please remove the batch dimension and pass it as a 2d torch Tensor" + ) + img_ids = img_ids[0] + + if self.union: + # union mode + if controlnet_mode is None: + raise ValueError("`controlnet_mode` cannot be `None` when applying ControlNet-Union") + # union mode emb + controlnet_mode_emb = self.controlnet_mode_embedder(controlnet_mode) + encoder_hidden_states = torch.cat([controlnet_mode_emb, encoder_hidden_states], dim=1) + txt_ids = torch.cat([txt_ids[:1], txt_ids], dim=0) + + ids = torch.cat((txt_ids, img_ids), dim=0) + image_rotary_emb = self.pos_embed(ids) + + block_samples = () + for index_block, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + block_samples = block_samples + (hidden_states,) + + single_block_samples = () + for index_block, block in enumerate(self.single_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + single_block_samples = single_block_samples + (hidden_states,) + + # controlnet block + controlnet_block_samples = () + for block_sample, controlnet_block in zip(block_samples, self.controlnet_blocks): + block_sample = controlnet_block(block_sample) + controlnet_block_samples = controlnet_block_samples + (block_sample,) + + controlnet_single_block_samples = () + for single_block_sample, controlnet_block in zip(single_block_samples, self.controlnet_single_blocks): + single_block_sample = controlnet_block(single_block_sample) + controlnet_single_block_samples = controlnet_single_block_samples + (single_block_sample,) + + # scaling + controlnet_block_samples = [sample * conditioning_scale for sample in controlnet_block_samples] + controlnet_single_block_samples = [sample * conditioning_scale for sample in controlnet_single_block_samples] + + controlnet_block_samples = None if len(controlnet_block_samples) == 0 else controlnet_block_samples + controlnet_single_block_samples = ( + None if len(controlnet_single_block_samples) == 0 else controlnet_single_block_samples + ) + + if not return_dict: + return (controlnet_block_samples, controlnet_single_block_samples) + + return FluxControlNetOutput( + controlnet_block_samples=controlnet_block_samples, + controlnet_single_block_samples=controlnet_single_block_samples, + ) + + +class FluxMultiControlNetModel(ModelMixin): + r""" + `FluxMultiControlNetModel` wrapper class for Multi-FluxControlNetModel + + This module is a wrapper for multiple instances of the `FluxControlNetModel`. The `forward()` API is designed to be + compatible with `FluxControlNetModel`. + + Args: + controlnets (`list[FluxControlNetModel]`): + Provides additional conditioning to the unet during the denoising process. You must set multiple + `FluxControlNetModel` as a list. + """ + + def __init__(self, controlnets): + super().__init__() + self.nets = nn.ModuleList(controlnets) + + def forward( + self, + hidden_states: torch.FloatTensor, + controlnet_cond: list[torch.tensor], + controlnet_mode: list[torch.tensor], + conditioning_scale: list[float], + encoder_hidden_states: torch.Tensor = None, + pooled_projections: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + guidance: torch.Tensor = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> FluxControlNetOutput | tuple: + # ControlNet-Union with multiple conditions + # only load one ControlNet for saving memories + if len(self.nets) == 1: + controlnet = self.nets[0] + + for i, (image, mode, scale) in enumerate(zip(controlnet_cond, controlnet_mode, conditioning_scale)): + block_samples, single_block_samples = controlnet( + hidden_states=hidden_states, + controlnet_cond=image, + controlnet_mode=mode[:, None], + conditioning_scale=scale, + timestep=timestep, + guidance=guidance, + pooled_projections=pooled_projections, + encoder_hidden_states=encoder_hidden_states, + txt_ids=txt_ids, + img_ids=img_ids, + joint_attention_kwargs=joint_attention_kwargs, + return_dict=return_dict, + ) + + # merge samples + if i == 0: + control_block_samples = block_samples + control_single_block_samples = single_block_samples + else: + if block_samples is not None and control_block_samples is not None: + control_block_samples = [ + control_block_sample + block_sample + for control_block_sample, block_sample in zip(control_block_samples, block_samples) + ] + if single_block_samples is not None and control_single_block_samples is not None: + control_single_block_samples = [ + control_single_block_sample + block_sample + for control_single_block_sample, block_sample in zip( + control_single_block_samples, single_block_samples + ) + ] + + # Regular Multi-ControlNets + # load all ControlNets into memories + else: + for i, (image, mode, scale, controlnet) in enumerate( + zip(controlnet_cond, controlnet_mode, conditioning_scale, self.nets) + ): + block_samples, single_block_samples = controlnet( + hidden_states=hidden_states, + controlnet_cond=image, + controlnet_mode=mode[:, None], + conditioning_scale=scale, + timestep=timestep, + guidance=guidance, + pooled_projections=pooled_projections, + encoder_hidden_states=encoder_hidden_states, + txt_ids=txt_ids, + img_ids=img_ids, + joint_attention_kwargs=joint_attention_kwargs, + return_dict=return_dict, + ) + + # merge samples + if i == 0: + control_block_samples = block_samples + control_single_block_samples = single_block_samples + else: + if block_samples is not None and control_block_samples is not None: + control_block_samples = [ + control_block_sample + block_sample + for control_block_sample, block_sample in zip(control_block_samples, block_samples) + ] + if single_block_samples is not None and control_single_block_samples is not None: + control_single_block_samples = [ + control_single_block_sample + block_sample + for control_single_block_sample, block_sample in zip( + control_single_block_samples, single_block_samples + ) + ] + + return control_block_samples, control_single_block_samples diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_hunyuan.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_hunyuan.py new file mode 100644 index 0000000000000000000000000000000000000000..6ef92d78dd6e30c471c4c6f41b059be051a65a30 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_hunyuan.py @@ -0,0 +1,400 @@ +# Copyright 2025 HunyuanDiT Authors, Qixun Wang and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import BaseOutput, logging +from ..attention_processor import AttentionProcessor +from ..embeddings import ( + HunyuanCombinedTimestepTextSizeStyleEmbedding, + PatchEmbed, + PixArtAlphaTextProjection, +) +from ..modeling_utils import ModelMixin +from ..transformers.hunyuan_transformer_2d import HunyuanDiTBlock +from .controlnet import zero_module + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class HunyuanControlNetOutput(BaseOutput): + controlnet_block_samples: tuple[torch.Tensor] + + +class HunyuanDiT2DControlNetModel(ModelMixin, ConfigMixin): + @register_to_config + def __init__( + self, + conditioning_channels: int = 3, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: int | None = None, + patch_size: int | None = None, + activation_fn: str = "gelu-approximate", + sample_size=32, + hidden_size=1152, + transformer_num_layers: int = 40, + mlp_ratio: float = 4.0, + cross_attention_dim: int = 1024, + cross_attention_dim_t5: int = 2048, + pooled_projection_dim: int = 1024, + text_len: int = 77, + text_len_t5: int = 256, + use_style_cond_and_image_meta_size: bool = True, + ): + super().__init__() + self.num_heads = num_attention_heads + self.inner_dim = num_attention_heads * attention_head_dim + + self.text_embedder = PixArtAlphaTextProjection( + in_features=cross_attention_dim_t5, + hidden_size=cross_attention_dim_t5 * 4, + out_features=cross_attention_dim, + act_fn="silu_fp32", + ) + + self.text_embedding_padding = nn.Parameter( + torch.randn(text_len + text_len_t5, cross_attention_dim, dtype=torch.float32) + ) + + self.pos_embed = PatchEmbed( + height=sample_size, + width=sample_size, + in_channels=in_channels, + embed_dim=hidden_size, + patch_size=patch_size, + pos_embed_type=None, + ) + + self.time_extra_emb = HunyuanCombinedTimestepTextSizeStyleEmbedding( + hidden_size, + pooled_projection_dim=pooled_projection_dim, + seq_len=text_len_t5, + cross_attention_dim=cross_attention_dim_t5, + use_style_cond_and_image_meta_size=use_style_cond_and_image_meta_size, + ) + + # controlnet_blocks + self.controlnet_blocks = nn.ModuleList([]) + + # HunyuanDiT Blocks + self.blocks = nn.ModuleList( + [ + HunyuanDiTBlock( + dim=self.inner_dim, + num_attention_heads=self.config.num_attention_heads, + activation_fn=activation_fn, + ff_inner_dim=int(self.inner_dim * mlp_ratio), + cross_attention_dim=cross_attention_dim, + qk_norm=True, # See https://huggingface.co/papers/2302.05442 for details. + skip=False, # always False as it is the first half of the model + ) + for layer in range(transformer_num_layers // 2 - 1) + ] + ) + self.input_block = zero_module(nn.Linear(hidden_size, hidden_size)) + for _ in range(len(self.blocks)): + controlnet_block = nn.Linear(hidden_size, hidden_size) + controlnet_block = zero_module(controlnet_block) + self.controlnet_blocks.append(controlnet_block) + + @property + def attn_processors(self) -> dict[str, AttentionProcessor]: + r""" + Returns: + `dict` of attention processors: A dictionary containing all attention processors used in the model with + indexed by its weight name. + """ + # set recursively + processors = {} + + def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: dict[str, AttentionProcessor]): + if hasattr(module, "get_processor"): + processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True) + + for sub_name, child in module.named_children(): + fn_recursive_add_processors(f"{name}.{sub_name}", child, processors) + + return processors + + for name, module in self.named_children(): + fn_recursive_add_processors(name, module, processors) + + return processors + + def set_attn_processor(self, processor: AttentionProcessor | dict[str, AttentionProcessor]): + r""" + Sets the attention processor to use to compute attention. + + Parameters: + processor (`dict` of `AttentionProcessor` or only `AttentionProcessor`): + The instantiated processor class or a dictionary of processor classes that will be set as the processor + for **all** `Attention` layers. If `processor` is a dict, the key needs to define the path to the + corresponding cross attention processor. This is strongly recommended when setting trainable attention + processors. + """ + count = len(self.attn_processors.keys()) + + if isinstance(processor, dict) and len(processor) != count: + raise ValueError( + f"A dict of processors was passed, but the number of processors {len(processor)} does not match the" + f" number of attention layers: {count}. Please make sure to pass {count} processor classes." + ) + + def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor): + if hasattr(module, "set_processor"): + if not isinstance(processor, dict): + module.set_processor(processor) + else: + module.set_processor(processor.pop(f"{name}.processor")) + + for sub_name, child in module.named_children(): + fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor) + + for name, module in self.named_children(): + fn_recursive_attn_processor(name, module, processor) + + @classmethod + def from_transformer( + cls, transformer, conditioning_channels=3, transformer_num_layers=None, load_weights_from_transformer=True + ): + config = transformer.config + activation_fn = config.activation_fn + attention_head_dim = config.attention_head_dim + cross_attention_dim = config.cross_attention_dim + cross_attention_dim_t5 = config.cross_attention_dim_t5 + hidden_size = config.hidden_size + in_channels = config.in_channels + mlp_ratio = config.mlp_ratio + num_attention_heads = config.num_attention_heads + patch_size = config.patch_size + sample_size = config.sample_size + text_len = config.text_len + text_len_t5 = config.text_len_t5 + + conditioning_channels = conditioning_channels + transformer_num_layers = transformer_num_layers or config.transformer_num_layers + + controlnet = cls( + conditioning_channels=conditioning_channels, + transformer_num_layers=transformer_num_layers, + activation_fn=activation_fn, + attention_head_dim=attention_head_dim, + cross_attention_dim=cross_attention_dim, + cross_attention_dim_t5=cross_attention_dim_t5, + hidden_size=hidden_size, + in_channels=in_channels, + mlp_ratio=mlp_ratio, + num_attention_heads=num_attention_heads, + patch_size=patch_size, + sample_size=sample_size, + text_len=text_len, + text_len_t5=text_len_t5, + ) + if load_weights_from_transformer: + key = controlnet.load_state_dict(transformer.state_dict(), strict=False) + logger.warning(f"controlnet load from Hunyuan-DiT. missing_keys: {key[0]}") + return controlnet + + def forward( + self, + hidden_states, + timestep, + controlnet_cond: torch.Tensor, + conditioning_scale: float = 1.0, + encoder_hidden_states=None, + text_embedding_mask=None, + encoder_hidden_states_t5=None, + text_embedding_mask_t5=None, + image_meta_size=None, + style=None, + image_rotary_emb=None, + return_dict=True, + ): + """ + The [`HunyuanDiT2DControlNetModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch size, dim, height, width)`): + The input tensor. + timestep ( `torch.LongTensor`, *optional*): + Used to indicate denoising step. + controlnet_cond ( `torch.Tensor` ): + The conditioning input to ControlNet. + conditioning_scale ( `float` ): + Indicate the conditioning scale. + encoder_hidden_states ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. This is the output of `BertModel`. + text_embedding_mask: torch.Tensor + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output + of `BertModel`. + encoder_hidden_states_t5 ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. This is the output of T5 Text Encoder. + text_embedding_mask_t5: torch.Tensor + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output + of T5 Text Encoder. + image_meta_size (torch.Tensor): + Conditional embedding indicate the image sizes + style: torch.Tensor: + Conditional embedding indicate the style + image_rotary_emb (`torch.Tensor`): + The image rotary embeddings to apply on query and key tensors during attention calculation. + return_dict: bool + Whether to return a dictionary. + """ + + height, width = hidden_states.shape[-2:] + + hidden_states = self.pos_embed(hidden_states) # b,c,H,W -> b, N, C + + # 2. pre-process + hidden_states = hidden_states + self.input_block(self.pos_embed(controlnet_cond)) + + temb = self.time_extra_emb( + timestep, encoder_hidden_states_t5, image_meta_size, style, hidden_dtype=timestep.dtype + ) # [B, D] + + # text projection + batch_size, sequence_length, _ = encoder_hidden_states_t5.shape + encoder_hidden_states_t5 = self.text_embedder( + encoder_hidden_states_t5.view(-1, encoder_hidden_states_t5.shape[-1]) + ) + encoder_hidden_states_t5 = encoder_hidden_states_t5.view(batch_size, sequence_length, -1) + + encoder_hidden_states = torch.cat([encoder_hidden_states, encoder_hidden_states_t5], dim=1) + text_embedding_mask = torch.cat([text_embedding_mask, text_embedding_mask_t5], dim=-1) + text_embedding_mask = text_embedding_mask.unsqueeze(2).bool() + + encoder_hidden_states = torch.where(text_embedding_mask, encoder_hidden_states, self.text_embedding_padding) + + block_res_samples = () + for layer, block in enumerate(self.blocks): + hidden_states = block( + hidden_states, + temb=temb, + encoder_hidden_states=encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + ) # (N, L, D) + + block_res_samples = block_res_samples + (hidden_states,) + + controlnet_block_res_samples = () + for block_res_sample, controlnet_block in zip(block_res_samples, self.controlnet_blocks): + block_res_sample = controlnet_block(block_res_sample) + controlnet_block_res_samples = controlnet_block_res_samples + (block_res_sample,) + + # 6. scaling + controlnet_block_res_samples = [sample * conditioning_scale for sample in controlnet_block_res_samples] + + if not return_dict: + return (controlnet_block_res_samples,) + + return HunyuanControlNetOutput(controlnet_block_samples=controlnet_block_res_samples) + + +class HunyuanDiT2DMultiControlNetModel(ModelMixin): + r""" + `HunyuanDiT2DMultiControlNetModel` wrapper class for Multi-HunyuanDiT2DControlNetModel + + This module is a wrapper for multiple instances of the `HunyuanDiT2DControlNetModel`. The `forward()` API is + designed to be compatible with `HunyuanDiT2DControlNetModel`. + + Args: + controlnets (`list[HunyuanDiT2DControlNetModel]`): + Provides additional conditioning to the unet during the denoising process. You must set multiple + `HunyuanDiT2DControlNetModel` as a list. + """ + + def __init__(self, controlnets): + super().__init__() + self.nets = nn.ModuleList(controlnets) + + def forward( + self, + hidden_states, + timestep, + controlnet_cond: torch.Tensor, + conditioning_scale: float = 1.0, + encoder_hidden_states=None, + text_embedding_mask=None, + encoder_hidden_states_t5=None, + text_embedding_mask_t5=None, + image_meta_size=None, + style=None, + image_rotary_emb=None, + return_dict=True, + ): + """ + The [`HunyuanDiT2DControlNetModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch size, dim, height, width)`): + The input tensor. + timestep ( `torch.LongTensor`, *optional*): + Used to indicate denoising step. + controlnet_cond ( `torch.Tensor` ): + The conditioning input to ControlNet. + conditioning_scale ( `float` ): + Indicate the conditioning scale. + encoder_hidden_states ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. This is the output of `BertModel`. + text_embedding_mask: torch.Tensor + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output + of `BertModel`. + encoder_hidden_states_t5 ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. This is the output of T5 Text Encoder. + text_embedding_mask_t5: torch.Tensor + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output + of T5 Text Encoder. + image_meta_size (torch.Tensor): + Conditional embedding indicate the image sizes + style: torch.Tensor: + Conditional embedding indicate the style + image_rotary_emb (`torch.Tensor`): + The image rotary embeddings to apply on query and key tensors during attention calculation. + return_dict: bool + Whether to return a dictionary. + """ + for i, (image, scale, controlnet) in enumerate(zip(controlnet_cond, conditioning_scale, self.nets)): + block_samples = controlnet( + hidden_states=hidden_states, + timestep=timestep, + controlnet_cond=image, + conditioning_scale=scale, + encoder_hidden_states=encoder_hidden_states, + text_embedding_mask=text_embedding_mask, + encoder_hidden_states_t5=encoder_hidden_states_t5, + text_embedding_mask_t5=text_embedding_mask_t5, + image_meta_size=image_meta_size, + style=style, + image_rotary_emb=image_rotary_emb, + return_dict=return_dict, + ) + + # merge samples + if i == 0: + control_block_samples = block_samples + else: + control_block_samples = [ + control_block_sample + block_sample + for control_block_sample, block_sample in zip(control_block_samples[0], block_samples[0]) + ] + control_block_samples = (control_block_samples,) + + return control_block_samples diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_qwenimage.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_qwenimage.py new file mode 100644 index 0000000000000000000000000000000000000000..cfe7c159ad8959ee02adf6e3830448b02179b9b8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_qwenimage.py @@ -0,0 +1,328 @@ +# Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import ( + BaseOutput, + apply_lora_scale, + deprecate, + logging, +) +from ..attention import AttentionMixin +from ..cache_utils import CacheMixin +from ..controlnets.controlnet import zero_module +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..transformers.transformer_qwenimage import ( + QwenEmbedRope, + QwenImageTransformerBlock, + QwenTimestepProjEmbeddings, + RMSNorm, + compute_text_seq_len_from_mask, +) + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class QwenImageControlNetOutput(BaseOutput): + controlnet_block_samples: tuple[torch.Tensor] + + +class QwenImageControlNetModel( + ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin +): + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + patch_size: int = 2, + in_channels: int = 64, + out_channels: int | None = 16, + num_layers: int = 60, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 3584, + axes_dims_rope: tuple[int, int, int] = (16, 56, 56), + extra_condition_channels: int = 0, # for controlnet-inpainting + ): + super().__init__() + self.out_channels = out_channels or in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + self.pos_embed = QwenEmbedRope(theta=10000, axes_dim=list(axes_dims_rope), scale_rope=True) + + self.time_text_embed = QwenTimestepProjEmbeddings(embedding_dim=self.inner_dim) + + self.txt_norm = RMSNorm(joint_attention_dim, eps=1e-6) + + self.img_in = nn.Linear(in_channels, self.inner_dim) + self.txt_in = nn.Linear(joint_attention_dim, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + QwenImageTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for _ in range(num_layers) + ] + ) + + # controlnet_blocks + self.controlnet_blocks = nn.ModuleList([]) + for _ in range(len(self.transformer_blocks)): + self.controlnet_blocks.append(zero_module(nn.Linear(self.inner_dim, self.inner_dim))) + self.controlnet_x_embedder = zero_module( + torch.nn.Linear(in_channels + extra_condition_channels, self.inner_dim) + ) + + self.gradient_checkpointing = False + + @classmethod + def from_transformer( + cls, + transformer, + num_layers: int = 5, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + load_weights_from_transformer=True, + extra_condition_channels: int = 0, + ): + config = dict(transformer.config) + config["num_layers"] = num_layers + config["attention_head_dim"] = attention_head_dim + config["num_attention_heads"] = num_attention_heads + config["extra_condition_channels"] = extra_condition_channels + + controlnet = cls.from_config(config) + + if load_weights_from_transformer: + controlnet.pos_embed.load_state_dict(transformer.pos_embed.state_dict()) + controlnet.time_text_embed.load_state_dict(transformer.time_text_embed.state_dict()) + controlnet.img_in.load_state_dict(transformer.img_in.state_dict()) + controlnet.txt_in.load_state_dict(transformer.txt_in.state_dict()) + controlnet.transformer_blocks.load_state_dict(transformer.transformer_blocks.state_dict(), strict=False) + controlnet.controlnet_x_embedder = zero_module(controlnet.controlnet_x_embedder) + + return controlnet + + @apply_lora_scale("joint_attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + controlnet_cond: torch.Tensor, + conditioning_scale: float = 1.0, + encoder_hidden_states: torch.Tensor = None, + encoder_hidden_states_mask: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_shapes: list[tuple[int, int, int]] | None = None, + txt_seq_lens: list[int] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.FloatTensor | Transformer2DModelOutput: + """ + The [`QwenImageControlNetModel`] forward method. + + Args: + hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`): + Input `hidden_states`. + controlnet_cond (`torch.Tensor`): + The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`. + conditioning_scale (`float`, defaults to `1.0`): + The scale factor for ControlNet outputs. + encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + encoder_hidden_states_mask (`torch.Tensor` of shape `(batch_size, text_sequence_length)`, *optional*): + Mask for the encoder hidden states. Expected to have 1.0 for valid tokens and 0.0 for padding tokens. + Used in the attention processor to prevent attending to padding tokens. The mask can have any pattern + (not just contiguous valid tokens followed by padding) since it's applied element-wise in attention. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + img_shapes (`list[tuple[int, int, int]]`, *optional*): + Image shapes for RoPE computation. + txt_seq_lens (`list[int]`, *optional*): + **Deprecated**. Not needed anymore, we use `encoder_hidden_states` instead to infer text sequence + length. + joint_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.controlnet.ControlNetOutput`] instead of a plain tuple. + + Returns: + If `return_dict` is True, a [`~models.controlnet.ControlNetOutput`] is returned, otherwise a `tuple` where + the first element is the controlnet block samples. + """ + # Handle deprecated txt_seq_lens parameter + if txt_seq_lens is not None: + deprecate( + "txt_seq_lens", + "0.39.0", + "Passing `txt_seq_lens` to `QwenImageControlNetModel.forward()` is deprecated and will be removed in " + "version 0.39.0. The text sequence length is now automatically inferred from `encoder_hidden_states` " + "and `encoder_hidden_states_mask`.", + standard_warn=False, + ) + + hidden_states = self.img_in(hidden_states) + + # add + hidden_states = hidden_states + self.controlnet_x_embedder(controlnet_cond) + + temb = self.time_text_embed(timestep, hidden_states) + + # Use the encoder_hidden_states sequence length for RoPE computation and normalize mask + text_seq_len, _, encoder_hidden_states_mask = compute_text_seq_len_from_mask( + encoder_hidden_states, encoder_hidden_states_mask + ) + + image_rotary_emb = self.pos_embed(img_shapes, max_txt_seq_len=text_seq_len, device=hidden_states.device) + + timestep = timestep.to(hidden_states.dtype) + encoder_hidden_states = self.txt_norm(encoder_hidden_states) + encoder_hidden_states = self.txt_in(encoder_hidden_states) + + # Construct joint attention mask once to avoid reconstructing in every block + block_attention_kwargs = joint_attention_kwargs.copy() if joint_attention_kwargs is not None else {} + if encoder_hidden_states_mask is not None: + # Build joint mask: [text_mask, all_ones_for_image] + batch_size, image_seq_len = hidden_states.shape[:2] + image_mask = torch.ones((batch_size, image_seq_len), dtype=torch.bool, device=hidden_states.device) + joint_attention_mask = torch.cat([encoder_hidden_states_mask, image_mask], dim=1) + block_attention_kwargs["attention_mask"] = joint_attention_mask + + block_samples = () + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + None, # Don't pass encoder_hidden_states_mask (using attention_mask instead) + temb, + image_rotary_emb, + block_attention_kwargs, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + encoder_hidden_states_mask=None, # Don't pass (using attention_mask instead) + temb=temb, + image_rotary_emb=image_rotary_emb, + joint_attention_kwargs=block_attention_kwargs, + ) + block_samples = block_samples + (hidden_states,) + + # controlnet block + controlnet_block_samples = () + for block_sample, controlnet_block in zip(block_samples, self.controlnet_blocks): + block_sample = controlnet_block(block_sample) + controlnet_block_samples = controlnet_block_samples + (block_sample,) + + # scaling + controlnet_block_samples = [sample * conditioning_scale for sample in controlnet_block_samples] + controlnet_block_samples = None if len(controlnet_block_samples) == 0 else controlnet_block_samples + + if not return_dict: + return controlnet_block_samples + + return QwenImageControlNetOutput( + controlnet_block_samples=controlnet_block_samples, + ) + + +class QwenImageMultiControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin): + r""" + `QwenImageMultiControlNetModel` wrapper class for Multi-QwenImageControlNetModel + + This module is a wrapper for multiple instances of the `QwenImageControlNetModel`. The `forward()` API is designed + to be compatible with `QwenImageControlNetModel`. + + Args: + controlnets (`list[QwenImageControlNetModel]`): + Provides additional conditioning to the unet during the denoising process. You must set multiple + `QwenImageControlNetModel` as a list. + """ + + def __init__(self, controlnets): + super().__init__() + self.nets = nn.ModuleList(controlnets) + + def forward( + self, + hidden_states: torch.FloatTensor, + controlnet_cond: list[torch.tensor], + conditioning_scale: list[float], + encoder_hidden_states: torch.Tensor = None, + encoder_hidden_states_mask: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_shapes: list[tuple[int, int, int]] | None = None, + txt_seq_lens: list[int] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> QwenImageControlNetOutput | tuple: + if txt_seq_lens is not None: + deprecate( + "txt_seq_lens", + "0.39.0", + "Passing `txt_seq_lens` to `QwenImageMultiControlNetModel.forward()` is deprecated and will be " + "removed in version 0.39.0. The text sequence length is now automatically inferred from " + "`encoder_hidden_states` and `encoder_hidden_states_mask`.", + standard_warn=False, + ) + # ControlNet-Union with multiple conditions + # only load one ControlNet for saving memories + if len(self.nets) == 1: + controlnet = self.nets[0] + + for i, (image, scale) in enumerate(zip(controlnet_cond, conditioning_scale)): + block_samples = controlnet( + hidden_states=hidden_states, + controlnet_cond=image, + conditioning_scale=scale, + encoder_hidden_states=encoder_hidden_states, + encoder_hidden_states_mask=encoder_hidden_states_mask, + timestep=timestep, + img_shapes=img_shapes, + joint_attention_kwargs=joint_attention_kwargs, + return_dict=return_dict, + ) + + # merge samples + if i == 0: + control_block_samples = block_samples + else: + if block_samples is not None and control_block_samples is not None: + control_block_samples = [ + control_block_sample + block_sample + for control_block_sample, block_sample in zip(control_block_samples, block_samples) + ] + else: + raise ValueError("QwenImageMultiControlNetModel only supports a single controlnet-union now.") + + return control_block_samples diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sana.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sana.py new file mode 100644 index 0000000000000000000000000000000000000000..29e5591fa2847bb1d1adf3c2d23475b1b1d2b3a2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sana.py @@ -0,0 +1,212 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Any + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import BaseOutput, apply_lora_scale, logging +from ..attention import AttentionMixin +from ..embeddings import PatchEmbed, PixArtAlphaTextProjection +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormSingle, RMSNorm +from ..transformers.sana_transformer import SanaTransformerBlock +from .controlnet import zero_module + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class SanaControlNetOutput(BaseOutput): + controlnet_block_samples: tuple[torch.Tensor] + + +class SanaControlNetModel(ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin): + _supports_gradient_checkpointing = True + _no_split_modules = ["SanaTransformerBlock", "PatchEmbed"] + _skip_layerwise_casting_patterns = ["patch_embed", "norm"] + + @register_to_config + def __init__( + self, + in_channels: int = 32, + out_channels: int | None = 32, + num_attention_heads: int = 70, + attention_head_dim: int = 32, + num_layers: int = 7, + num_cross_attention_heads: int | None = 20, + cross_attention_head_dim: int | None = 112, + cross_attention_dim: int | None = 2240, + caption_channels: int = 2304, + mlp_ratio: float = 2.5, + dropout: float = 0.0, + attention_bias: bool = False, + sample_size: int = 32, + patch_size: int = 1, + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + interpolation_scale: int | None = None, + ) -> None: + super().__init__() + + out_channels = out_channels or in_channels + inner_dim = num_attention_heads * attention_head_dim + + # 1. Patch Embedding + self.patch_embed = PatchEmbed( + height=sample_size, + width=sample_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=inner_dim, + interpolation_scale=interpolation_scale, + pos_embed_type="sincos" if interpolation_scale is not None else None, + ) + + # 2. Additional condition embeddings + self.time_embed = AdaLayerNormSingle(inner_dim) + + self.caption_projection = PixArtAlphaTextProjection(in_features=caption_channels, hidden_size=inner_dim) + self.caption_norm = RMSNorm(inner_dim, eps=1e-5, elementwise_affine=True) + + # 3. Transformer blocks + self.transformer_blocks = nn.ModuleList( + [ + SanaTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + num_cross_attention_heads=num_cross_attention_heads, + cross_attention_head_dim=cross_attention_head_dim, + cross_attention_dim=cross_attention_dim, + attention_bias=attention_bias, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + mlp_ratio=mlp_ratio, + ) + for _ in range(num_layers) + ] + ) + + # controlnet_blocks + self.controlnet_blocks = nn.ModuleList([]) + + self.input_block = zero_module(nn.Linear(inner_dim, inner_dim)) + for _ in range(len(self.transformer_blocks)): + controlnet_block = nn.Linear(inner_dim, inner_dim) + controlnet_block = zero_module(controlnet_block) + self.controlnet_blocks.append(controlnet_block) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: torch.LongTensor, + controlnet_cond: torch.Tensor, + conditioning_scale: float = 1.0, + encoder_attention_mask: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor, ...] | Transformer2DModelOutput: + # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. + # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. + # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. + # expects mask of shape: + # [batch, key_tokens] + # adds singleton query_tokens dimension: + # [batch, 1, key_tokens] + # this helps to broadcast it as a bias over attention scores, which will be in one of the following shapes: + # [batch, heads, query_tokens, key_tokens] (e.g. torch sdp attn) + # [batch * heads, query_tokens, key_tokens] (e.g. xformers or classic attn) + if attention_mask is not None and attention_mask.ndim == 2: + # assume that mask is expressed as: + # (1 = keep, 0 = discard) + # convert mask into a bias that can be added to attention scores: + # (keep = +0, discard = -10000.0) + attention_mask = (1 - attention_mask.to(hidden_states.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # convert encoder_attention_mask to a bias the same way we do for attention_mask + if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2: + encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0 + encoder_attention_mask = encoder_attention_mask.unsqueeze(1) + + # 1. Input + batch_size, num_channels, height, width = hidden_states.shape + p = self.config.patch_size + post_patch_height, post_patch_width = height // p, width // p + + hidden_states = self.patch_embed(hidden_states) + hidden_states = hidden_states + self.input_block(self.patch_embed(controlnet_cond.to(hidden_states.dtype))) + + timestep, embedded_timestep = self.time_embed( + timestep, batch_size=batch_size, hidden_dtype=hidden_states.dtype + ) + + encoder_hidden_states = self.caption_projection(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.shape[-1]) + + encoder_hidden_states = self.caption_norm(encoder_hidden_states) + + # 2. Transformer blocks + block_res_samples = () + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.transformer_blocks: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + timestep, + post_patch_height, + post_patch_width, + ) + block_res_samples = block_res_samples + (hidden_states,) + else: + for block in self.transformer_blocks: + hidden_states = block( + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + timestep, + post_patch_height, + post_patch_width, + ) + block_res_samples = block_res_samples + (hidden_states,) + + # 3. ControlNet blocks + controlnet_block_res_samples = () + for block_res_sample, controlnet_block in zip(block_res_samples, self.controlnet_blocks): + block_res_sample = controlnet_block(block_res_sample) + controlnet_block_res_samples = controlnet_block_res_samples + (block_res_sample,) + + controlnet_block_res_samples = [sample * conditioning_scale for sample in controlnet_block_res_samples] + + if not return_dict: + return (controlnet_block_res_samples,) + + return SanaControlNetOutput(controlnet_block_samples=controlnet_block_res_samples) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sd3.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sd3.py new file mode 100644 index 0000000000000000000000000000000000000000..b8cb97adb41a2d47046e73052de64d788e70f647 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sd3.py @@ -0,0 +1,427 @@ +# Copyright 2025 Stability AI, The HuggingFace Team and The InstantX Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from dataclasses import dataclass +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ..attention import AttentionMixin, JointTransformerBlock +from ..attention_processor import Attention, FusedJointAttnProcessor2_0 +from ..embeddings import CombinedTimestepTextProjEmbeddings, PatchEmbed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..transformers.transformer_sd3 import SD3SingleTransformerBlock +from .controlnet import BaseOutput, zero_module + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class SD3ControlNetOutput(BaseOutput): + controlnet_block_samples: tuple[torch.Tensor] + + +class SD3ControlNetModel(ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): + r""" + ControlNet model for [Stable Diffusion 3](https://huggingface.co/papers/2403.03206). + + Parameters: + sample_size (`int`, defaults to `128`): + The width/height of the latents. This is fixed during training since it is used to learn a number of + position embeddings. + patch_size (`int`, defaults to `2`): + Patch size to turn the input data into small patches. + in_channels (`int`, defaults to `16`): + The number of latent channels in the input. + num_layers (`int`, defaults to `18`): + The number of layers of transformer blocks to use. + attention_head_dim (`int`, defaults to `64`): + The number of channels in each head. + num_attention_heads (`int`, defaults to `18`): + The number of heads to use for multi-head attention. + joint_attention_dim (`int`, defaults to `4096`): + The embedding dimension to use for joint text-image attention. + caption_projection_dim (`int`, defaults to `1152`): + The embedding dimension of caption embeddings. + pooled_projection_dim (`int`, defaults to `2048`): + The embedding dimension of pooled text projections. + out_channels (`int`, defaults to `16`): + The number of latent channels in the output. + pos_embed_max_size (`int`, defaults to `96`): + The maximum latent height/width of positional embeddings. + extra_conditioning_channels (`int`, defaults to `0`): + The number of extra channels to use for conditioning for patch embedding. + dual_attention_layers (`tuple[int, ...]`, defaults to `()`): + The number of dual-stream transformer blocks to use. + qk_norm (`str`, *optional*, defaults to `None`): + The normalization to use for query and key in the attention layer. If `None`, no normalization is used. + pos_embed_type (`str`, defaults to `"sincos"`): + The type of positional embedding to use. Choose between `"sincos"` and `None`. + use_pos_embed (`bool`, defaults to `True`): + Whether to use positional embeddings. + force_zeros_for_pooled_projection (`bool`, defaults to `True`): + Whether to force zeros for pooled projection embeddings. This is handled in the pipelines by reading the + config value of the ControlNet model. + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + sample_size: int = 128, + patch_size: int = 2, + in_channels: int = 16, + num_layers: int = 18, + attention_head_dim: int = 64, + num_attention_heads: int = 18, + joint_attention_dim: int = 4096, + caption_projection_dim: int = 1152, + pooled_projection_dim: int = 2048, + out_channels: int = 16, + pos_embed_max_size: int = 96, + extra_conditioning_channels: int = 0, + dual_attention_layers: tuple[int, ...] = (), + qk_norm: str | None = None, + pos_embed_type: str | None = "sincos", + use_pos_embed: bool = True, + force_zeros_for_pooled_projection: bool = True, + ): + super().__init__() + default_out_channels = in_channels + self.out_channels = out_channels if out_channels is not None else default_out_channels + self.inner_dim = num_attention_heads * attention_head_dim + + if use_pos_embed: + self.pos_embed = PatchEmbed( + height=sample_size, + width=sample_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=self.inner_dim, + pos_embed_max_size=pos_embed_max_size, + pos_embed_type=pos_embed_type, + ) + else: + self.pos_embed = None + self.time_text_embed = CombinedTimestepTextProjEmbeddings( + embedding_dim=self.inner_dim, pooled_projection_dim=pooled_projection_dim + ) + if joint_attention_dim is not None: + self.context_embedder = nn.Linear(joint_attention_dim, caption_projection_dim) + + # `attention_head_dim` is doubled to account for the mixing. + # It needs to crafted when we get the actual checkpoints. + self.transformer_blocks = nn.ModuleList( + [ + JointTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + context_pre_only=False, + qk_norm=qk_norm, + use_dual_attention=True if i in dual_attention_layers else False, + ) + for i in range(num_layers) + ] + ) + else: + self.context_embedder = None + self.transformer_blocks = nn.ModuleList( + [ + SD3SingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for _ in range(num_layers) + ] + ) + + # controlnet_blocks + self.controlnet_blocks = nn.ModuleList([]) + for _ in range(len(self.transformer_blocks)): + controlnet_block = nn.Linear(self.inner_dim, self.inner_dim) + controlnet_block = zero_module(controlnet_block) + self.controlnet_blocks.append(controlnet_block) + pos_embed_input = PatchEmbed( + height=sample_size, + width=sample_size, + patch_size=patch_size, + in_channels=in_channels + extra_conditioning_channels, + embed_dim=self.inner_dim, + pos_embed_type=None, + ) + self.pos_embed_input = zero_module(pos_embed_input) + + self.gradient_checkpointing = False + + # Copied from diffusers.models.unets.unet_3d_condition.UNet3DConditionModel.enable_forward_chunking + def enable_forward_chunking(self, chunk_size: int | None = None, dim: int = 0) -> None: + """ + Sets the attention processor to use [feed forward + chunking](https://huggingface.co/blog/reformer#2-chunked-feed-forward-layers). + + Parameters: + chunk_size (`int`, *optional*): + The chunk size of the feed-forward layers. If not specified, will run feed-forward layer individually + over each tensor of dim=`dim`. + dim (`int`, *optional*, defaults to `0`): + The dimension over which the feed-forward computation should be chunked. Choose between dim=0 (batch) + or dim=1 (sequence length). + """ + if dim not in [0, 1]: + raise ValueError(f"Make sure to set `dim` to either 0 or 1, not {dim}") + + # By default chunk size is 1 + chunk_size = chunk_size or 1 + + def fn_recursive_feed_forward(module: torch.nn.Module, chunk_size: int, dim: int): + if hasattr(module, "set_chunk_feed_forward"): + module.set_chunk_feed_forward(chunk_size=chunk_size, dim=dim) + + for child in module.children(): + fn_recursive_feed_forward(child, chunk_size, dim) + + for module in self.children(): + fn_recursive_feed_forward(module, chunk_size, dim) + + # Copied from diffusers.models.transformers.transformer_sd3.SD3Transformer2DModel.fuse_qkv_projections + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is 🧪 experimental. + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + self.set_attn_processor(FusedJointAttnProcessor2_0()) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + > [!WARNING] > This API is 🧪 experimental. + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + + # Notes: This is for SD3.5 8b controlnet, which shares the pos_embed with the transformer + # we should have handled this in conversion script + def _get_pos_embed_from_transformer(self, transformer): + pos_embed = PatchEmbed( + height=transformer.config.sample_size, + width=transformer.config.sample_size, + patch_size=transformer.config.patch_size, + in_channels=transformer.config.in_channels, + embed_dim=transformer.inner_dim, + pos_embed_max_size=transformer.config.pos_embed_max_size, + ) + pos_embed.load_state_dict(transformer.pos_embed.state_dict(), strict=True) + return pos_embed + + @classmethod + def from_transformer( + cls, transformer, num_layers=12, num_extra_conditioning_channels=1, load_weights_from_transformer=True + ): + config = transformer.config + config["num_layers"] = num_layers or config.num_layers + config["extra_conditioning_channels"] = num_extra_conditioning_channels + controlnet = cls.from_config(config) + + if load_weights_from_transformer: + controlnet.pos_embed.load_state_dict(transformer.pos_embed.state_dict()) + controlnet.time_text_embed.load_state_dict(transformer.time_text_embed.state_dict()) + controlnet.context_embedder.load_state_dict(transformer.context_embedder.state_dict()) + controlnet.transformer_blocks.load_state_dict(transformer.transformer_blocks.state_dict(), strict=False) + + controlnet.pos_embed_input = zero_module(controlnet.pos_embed_input) + + return controlnet + + @apply_lora_scale("joint_attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + controlnet_cond: torch.Tensor, + conditioning_scale: float = 1.0, + encoder_hidden_states: torch.Tensor = None, + pooled_projections: torch.Tensor = None, + timestep: torch.LongTensor = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.Tensor | Transformer2DModelOutput: + """ + The [`SD3Transformer2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch size, channel, height, width)`): + Input `hidden_states`. + controlnet_cond (`torch.Tensor`): + The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`. + conditioning_scale (`float`, defaults to `1.0`): + The scale factor for ControlNet outputs. + encoder_hidden_states (`torch.Tensor` of shape `(batch size, sequence_len, embed_dims)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + pooled_projections (`torch.Tensor` of shape `(batch_size, projection_dim)`): Embeddings projected + from the embeddings of input conditions. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + joint_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + if self.pos_embed is not None and hidden_states.ndim != 4: + raise ValueError("hidden_states must be 4D when pos_embed is used") + + # SD3.5 8b controlnet does not have a `pos_embed`, + # it use the `pos_embed` from the transformer to process input before passing to controlnet + elif self.pos_embed is None and hidden_states.ndim != 3: + raise ValueError("hidden_states must be 3D when pos_embed is not used") + + if self.context_embedder is not None and encoder_hidden_states is None: + raise ValueError("encoder_hidden_states must be provided when context_embedder is used") + # SD3.5 8b controlnet does not have a `context_embedder`, it does not use `encoder_hidden_states` + elif self.context_embedder is None and encoder_hidden_states is not None: + raise ValueError("encoder_hidden_states should not be provided when context_embedder is not used") + + if self.pos_embed is not None: + hidden_states = self.pos_embed(hidden_states) # takes care of adding positional embeddings too. + + temb = self.time_text_embed(timestep, pooled_projections) + + if self.context_embedder is not None: + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + # add + hidden_states = hidden_states + self.pos_embed_input(controlnet_cond) + + block_res_samples = () + + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + if self.context_embedder is not None: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + ) + else: + # SD3.5 8b controlnet use single transformer block, which does not use `encoder_hidden_states` + hidden_states = self._gradient_checkpointing_func(block, hidden_states, temb) + + else: + if self.context_embedder is not None: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, encoder_hidden_states=encoder_hidden_states, temb=temb + ) + else: + # SD3.5 8b controlnet use single transformer block, which does not use `encoder_hidden_states` + hidden_states = block(hidden_states, temb) + + block_res_samples = block_res_samples + (hidden_states,) + + controlnet_block_res_samples = () + for block_res_sample, controlnet_block in zip(block_res_samples, self.controlnet_blocks): + block_res_sample = controlnet_block(block_res_sample) + controlnet_block_res_samples = controlnet_block_res_samples + (block_res_sample,) + + # 6. scaling + controlnet_block_res_samples = [sample * conditioning_scale for sample in controlnet_block_res_samples] + + if not return_dict: + return (controlnet_block_res_samples,) + + return SD3ControlNetOutput(controlnet_block_samples=controlnet_block_res_samples) + + +class SD3MultiControlNetModel(ModelMixin): + r""" + `SD3ControlNetModel` wrapper class for Multi-SD3ControlNet + + This module is a wrapper for multiple instances of the `SD3ControlNetModel`. The `forward()` API is designed to be + compatible with `SD3ControlNetModel`. + + Args: + controlnets (`list[SD3ControlNetModel]`): + Provides additional conditioning to the unet during the denoising process. You must set multiple + `SD3ControlNetModel` as a list. + """ + + def __init__(self, controlnets): + super().__init__() + self.nets = nn.ModuleList(controlnets) + + def forward( + self, + hidden_states: torch.Tensor, + controlnet_cond: list[torch.tensor], + conditioning_scale: list[float], + pooled_projections: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + timestep: torch.LongTensor = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> SD3ControlNetOutput | tuple: + for i, (image, scale, controlnet) in enumerate(zip(controlnet_cond, conditioning_scale, self.nets)): + block_samples = controlnet( + hidden_states=hidden_states, + timestep=timestep, + encoder_hidden_states=encoder_hidden_states, + pooled_projections=pooled_projections, + controlnet_cond=image, + conditioning_scale=scale, + joint_attention_kwargs=joint_attention_kwargs, + return_dict=return_dict, + ) + + # merge samples + if i == 0: + control_block_samples = block_samples + else: + control_block_samples = [ + control_block_sample + block_sample + for control_block_sample, block_sample in zip(control_block_samples[0], block_samples[0]) + ] + control_block_samples = (tuple(control_block_samples),) + + return control_block_samples diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sparsectrl.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sparsectrl.py new file mode 100644 index 0000000000000000000000000000000000000000..7da627fe6dd4acb69574f15cba7f362a880e8e6b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_sparsectrl.py @@ -0,0 +1,725 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Any + +import torch +from torch import nn +from torch.nn import functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin +from ...utils import BaseOutput, logging +from ..attention import AttentionMixin +from ..attention_processor import ( + ADDED_KV_ATTENTION_PROCESSORS, + CROSS_ATTENTION_PROCESSORS, + AttnAddedKVProcessor, + AttnProcessor, +) +from ..embeddings import TimestepEmbedding, Timesteps +from ..modeling_utils import ModelMixin +from ..unets.unet_2d_blocks import UNetMidBlock2DCrossAttn +from ..unets.unet_2d_condition import UNet2DConditionModel +from ..unets.unet_motion_model import CrossAttnDownBlockMotion, DownBlockMotion + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class SparseControlNetOutput(BaseOutput): + """ + The output of [`SparseControlNetModel`]. + + Args: + down_block_res_samples (`tuple[torch.Tensor]`): + A tuple of downsample activations at different resolutions for each downsampling block. Each tensor should + be of shape `(batch_size, channel * resolution, height //resolution, width // resolution)`. Output can be + used to condition the original UNet's downsampling activations. + mid_down_block_re_sample (`torch.Tensor`): + The activation of the middle block (the lowest sample resolution). Each tensor should be of shape + `(batch_size, channel * lowest_resolution, height // lowest_resolution, width // lowest_resolution)`. + Output can be used to condition the original UNet's middle block activation. + """ + + down_block_res_samples: tuple[torch.Tensor] + mid_block_res_sample: torch.Tensor + + +class SparseControlNetConditioningEmbedding(nn.Module): + def __init__( + self, + conditioning_embedding_channels: int, + conditioning_channels: int = 3, + block_out_channels: tuple[int, ...] = (16, 32, 96, 256), + ): + super().__init__() + + self.conv_in = nn.Conv2d(conditioning_channels, block_out_channels[0], kernel_size=3, padding=1) + self.blocks = nn.ModuleList([]) + + for i in range(len(block_out_channels) - 1): + channel_in = block_out_channels[i] + channel_out = block_out_channels[i + 1] + self.blocks.append(nn.Conv2d(channel_in, channel_in, kernel_size=3, padding=1)) + self.blocks.append(nn.Conv2d(channel_in, channel_out, kernel_size=3, padding=1, stride=2)) + + self.conv_out = zero_module( + nn.Conv2d(block_out_channels[-1], conditioning_embedding_channels, kernel_size=3, padding=1) + ) + + def forward(self, conditioning: torch.Tensor) -> torch.Tensor: + embedding = self.conv_in(conditioning) + embedding = F.silu(embedding) + + for block in self.blocks: + embedding = block(embedding) + embedding = F.silu(embedding) + + embedding = self.conv_out(embedding) + return embedding + + +class SparseControlNetModel(ModelMixin, AttentionMixin, ConfigMixin, FromOriginalModelMixin): + """ + A SparseControlNet model as described in [SparseCtrl: Adding Sparse Controls to Text-to-Video Diffusion + Models](https://huggingface.co/papers/2311.16933). + + Args: + in_channels (`int`, defaults to 4): + The number of channels in the input sample. + conditioning_channels (`int`, defaults to 4): + The number of input channels in the controlnet conditional embedding module. If + `concat_condition_embedding` is True, the value provided here is incremented by 1. + flip_sin_to_cos (`bool`, defaults to `True`): + Whether to flip the sin to cos in the time embedding. + freq_shift (`int`, defaults to 0): + The frequency shift to apply to the time embedding. + down_block_types (`tuple[str]`, defaults to `("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")`): + The tuple of downsample blocks to use. + only_cross_attention (`bool | tuple[bool]`, defaults to `False`): + block_out_channels (`tuple[int]`, defaults to `(320, 640, 1280, 1280)`): + The tuple of output channels for each block. + layers_per_block (`int`, defaults to 2): + The number of layers per block. + downsample_padding (`int`, defaults to 1): + The padding to use for the downsampling convolution. + mid_block_scale_factor (`float`, defaults to 1): + The scale factor to use for the mid block. + act_fn (`str`, defaults to "silu"): + The activation function to use. + norm_num_groups (`int`, *optional*, defaults to 32): + The number of groups to use for the normalization. If None, normalization and activation layers is skipped + in post-processing. + norm_eps (`float`, defaults to 1e-5): + The epsilon to use for the normalization. + cross_attention_dim (`int`, defaults to 1280): + The dimension of the cross attention features. + transformer_layers_per_block (`int` or `tuple[int]`, *optional*, defaults to 1): + The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`]. Only relevant for + [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.CrossAttnUpBlock2D`], + [`~models.unet_2d_blocks.UNetMidBlock2DCrossAttn`]. + transformer_layers_per_mid_block (`int` or `tuple[int]`, *optional*, defaults to 1): + The number of transformer layers to use in each layer in the middle block. + attention_head_dim (`int` or `tuple[int]`, defaults to 8): + The dimension of the attention heads. + num_attention_heads (`int` or `tuple[int]`, *optional*): + The number of heads to use for multi-head attention. + use_linear_projection (`bool`, defaults to `False`): + upcast_attention (`bool`, defaults to `False`): + resnet_time_scale_shift (`str`, defaults to `"default"`): + Time scale shift config for ResNet blocks (see `ResnetBlock2D`). Choose from `default` or `scale_shift`. + conditioning_embedding_out_channels (`tuple[int]`, defaults to `(16, 32, 96, 256)`): + The tuple of output channel for each block in the `conditioning_embedding` layer. + global_pool_conditions (`bool`, defaults to `False`): + TODO(Patrick) - unused parameter + controlnet_conditioning_channel_order (`str`, defaults to `rgb`): + motion_max_seq_length (`int`, defaults to `32`): + The maximum sequence length to use in the motion module. + motion_num_attention_heads (`int` or `tuple[int]`, defaults to `8`): + The number of heads to use in each attention layer of the motion module. + concat_conditioning_mask (`bool`, defaults to `True`): + use_simplified_condition_embedding (`bool`, defaults to `True`): + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + in_channels: int = 4, + conditioning_channels: int = 4, + flip_sin_to_cos: bool = True, + freq_shift: int = 0, + down_block_types: tuple[str, ...] = ( + "CrossAttnDownBlockMotion", + "CrossAttnDownBlockMotion", + "CrossAttnDownBlockMotion", + "DownBlockMotion", + ), + only_cross_attention: bool | tuple[bool] = False, + block_out_channels: tuple[int, ...] = (320, 640, 1280, 1280), + layers_per_block: int = 2, + downsample_padding: int = 1, + mid_block_scale_factor: float = 1, + act_fn: str = "silu", + norm_num_groups: int | None = 32, + norm_eps: float = 1e-5, + cross_attention_dim: int = 768, + transformer_layers_per_block: int | tuple[int, ...] = 1, + transformer_layers_per_mid_block: int | tuple[int] | None = None, + temporal_transformer_layers_per_block: int | tuple[int, ...] = 1, + attention_head_dim: int | tuple[int, ...] = 8, + num_attention_heads: int | tuple[int, ...] | None = None, + use_linear_projection: bool = False, + upcast_attention: bool = False, + resnet_time_scale_shift: str = "default", + conditioning_embedding_out_channels: tuple[int, ...] | None = (16, 32, 96, 256), + global_pool_conditions: bool = False, + controlnet_conditioning_channel_order: str = "rgb", + motion_max_seq_length: int = 32, + motion_num_attention_heads: int = 8, + concat_conditioning_mask: bool = True, + use_simplified_condition_embedding: bool = True, + ): + super().__init__() + self.use_simplified_condition_embedding = use_simplified_condition_embedding + + # If `num_attention_heads` is not defined (which is the case for most models) + # it will default to `attention_head_dim`. This looks weird upon first reading it and it is. + # The reason for this behavior is to correct for incorrectly named variables that were introduced + # when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 + # Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking + # which is why we correct for the naming here. + num_attention_heads = num_attention_heads or attention_head_dim + + # Check inputs + if len(block_out_channels) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: {block_out_channels}. `down_block_types`: {down_block_types}." + ) + + if not isinstance(only_cross_attention, bool) and len(only_cross_attention) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `only_cross_attention` as `down_block_types`. `only_cross_attention`: {only_cross_attention}. `down_block_types`: {down_block_types}." + ) + + if not isinstance(num_attention_heads, int) and len(num_attention_heads) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: {num_attention_heads}. `down_block_types`: {down_block_types}." + ) + + if isinstance(transformer_layers_per_block, int): + transformer_layers_per_block = [transformer_layers_per_block] * len(down_block_types) + if isinstance(temporal_transformer_layers_per_block, int): + temporal_transformer_layers_per_block = [temporal_transformer_layers_per_block] * len(down_block_types) + + # input + conv_in_kernel = 3 + conv_in_padding = (conv_in_kernel - 1) // 2 + self.conv_in = nn.Conv2d( + in_channels, block_out_channels[0], kernel_size=conv_in_kernel, padding=conv_in_padding + ) + + if concat_conditioning_mask: + conditioning_channels = conditioning_channels + 1 + + self.concat_conditioning_mask = concat_conditioning_mask + + # control net conditioning embedding + if use_simplified_condition_embedding: + self.controlnet_cond_embedding = zero_module( + nn.Conv2d(conditioning_channels, block_out_channels[0], kernel_size=3, padding=1) + ) + else: + self.controlnet_cond_embedding = SparseControlNetConditioningEmbedding( + conditioning_embedding_channels=block_out_channels[0], + block_out_channels=conditioning_embedding_out_channels, + conditioning_channels=conditioning_channels, + ) + + # time + time_embed_dim = block_out_channels[0] * 4 + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding( + timestep_input_dim, + time_embed_dim, + act_fn=act_fn, + ) + + self.down_blocks = nn.ModuleList([]) + self.controlnet_down_blocks = nn.ModuleList([]) + + if isinstance(cross_attention_dim, int): + cross_attention_dim = (cross_attention_dim,) * len(down_block_types) + + if isinstance(only_cross_attention, bool): + only_cross_attention = [only_cross_attention] * len(down_block_types) + + if isinstance(attention_head_dim, int): + attention_head_dim = (attention_head_dim,) * len(down_block_types) + + if isinstance(num_attention_heads, int): + num_attention_heads = (num_attention_heads,) * len(down_block_types) + + if isinstance(motion_num_attention_heads, int): + motion_num_attention_heads = (motion_num_attention_heads,) * len(down_block_types) + + # down + output_channel = block_out_channels[0] + + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + if down_block_type == "CrossAttnDownBlockMotion": + down_block = CrossAttnDownBlockMotion( + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + dropout=0, + num_layers=layers_per_block, + transformer_layers_per_block=transformer_layers_per_block[i], + resnet_eps=norm_eps, + resnet_time_scale_shift=resnet_time_scale_shift, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + resnet_pre_norm=True, + num_attention_heads=num_attention_heads[i], + cross_attention_dim=cross_attention_dim[i], + add_downsample=not is_final_block, + dual_cross_attention=False, + use_linear_projection=use_linear_projection, + only_cross_attention=only_cross_attention[i], + upcast_attention=upcast_attention, + temporal_num_attention_heads=motion_num_attention_heads[i], + temporal_max_seq_length=motion_max_seq_length, + temporal_transformer_layers_per_block=temporal_transformer_layers_per_block[i], + temporal_double_self_attention=False, + ) + elif down_block_type == "DownBlockMotion": + down_block = DownBlockMotion( + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + dropout=0, + num_layers=layers_per_block, + resnet_eps=norm_eps, + resnet_time_scale_shift=resnet_time_scale_shift, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + resnet_pre_norm=True, + add_downsample=not is_final_block, + temporal_num_attention_heads=motion_num_attention_heads[i], + temporal_max_seq_length=motion_max_seq_length, + temporal_transformer_layers_per_block=temporal_transformer_layers_per_block[i], + temporal_double_self_attention=False, + ) + else: + raise ValueError( + "Invalid `block_type` encountered. Must be one of `CrossAttnDownBlockMotion` or `DownBlockMotion`" + ) + + self.down_blocks.append(down_block) + + for _ in range(layers_per_block): + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + if not is_final_block: + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + # mid + mid_block_channels = block_out_channels[-1] + + controlnet_block = nn.Conv2d(mid_block_channels, mid_block_channels, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_mid_block = controlnet_block + + if transformer_layers_per_mid_block is None: + transformer_layers_per_mid_block = ( + transformer_layers_per_block[-1] if isinstance(transformer_layers_per_block[-1], int) else 1 + ) + + self.mid_block = UNetMidBlock2DCrossAttn( + in_channels=mid_block_channels, + temb_channels=time_embed_dim, + dropout=0, + num_layers=1, + transformer_layers_per_block=transformer_layers_per_mid_block, + resnet_eps=norm_eps, + resnet_time_scale_shift=resnet_time_scale_shift, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + resnet_pre_norm=True, + num_attention_heads=num_attention_heads[-1], + output_scale_factor=mid_block_scale_factor, + cross_attention_dim=cross_attention_dim[-1], + dual_cross_attention=False, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + attention_type="default", + ) + + @classmethod + def from_unet( + cls, + unet: UNet2DConditionModel, + controlnet_conditioning_channel_order: str = "rgb", + conditioning_embedding_out_channels: tuple[int, ...] | None = (16, 32, 96, 256), + load_weights_from_unet: bool = True, + conditioning_channels: int = 3, + ) -> "SparseControlNetModel": + r""" + Instantiate a [`SparseControlNetModel`] from [`UNet2DConditionModel`]. + + Parameters: + unet (`UNet2DConditionModel`): + The UNet model weights to copy to the [`SparseControlNetModel`]. All configuration options are also + copied where applicable. + """ + transformer_layers_per_block = ( + unet.config.transformer_layers_per_block if "transformer_layers_per_block" in unet.config else 1 + ) + down_block_types = unet.config.down_block_types + + for i in range(len(down_block_types)): + if "CrossAttn" in down_block_types[i]: + down_block_types[i] = "CrossAttnDownBlockMotion" + elif "Down" in down_block_types[i]: + down_block_types[i] = "DownBlockMotion" + else: + raise ValueError("Invalid `block_type` encountered. Must be a cross-attention or down block") + + controlnet = cls( + in_channels=unet.config.in_channels, + conditioning_channels=conditioning_channels, + flip_sin_to_cos=unet.config.flip_sin_to_cos, + freq_shift=unet.config.freq_shift, + down_block_types=unet.config.down_block_types, + only_cross_attention=unet.config.only_cross_attention, + block_out_channels=unet.config.block_out_channels, + layers_per_block=unet.config.layers_per_block, + downsample_padding=unet.config.downsample_padding, + mid_block_scale_factor=unet.config.mid_block_scale_factor, + act_fn=unet.config.act_fn, + norm_num_groups=unet.config.norm_num_groups, + norm_eps=unet.config.norm_eps, + cross_attention_dim=unet.config.cross_attention_dim, + transformer_layers_per_block=transformer_layers_per_block, + attention_head_dim=unet.config.attention_head_dim, + num_attention_heads=unet.config.num_attention_heads, + use_linear_projection=unet.config.use_linear_projection, + upcast_attention=unet.config.upcast_attention, + resnet_time_scale_shift=unet.config.resnet_time_scale_shift, + conditioning_embedding_out_channels=conditioning_embedding_out_channels, + controlnet_conditioning_channel_order=controlnet_conditioning_channel_order, + ) + + if load_weights_from_unet: + controlnet.conv_in.load_state_dict(unet.conv_in.state_dict(), strict=False) + controlnet.time_proj.load_state_dict(unet.time_proj.state_dict(), strict=False) + controlnet.time_embedding.load_state_dict(unet.time_embedding.state_dict(), strict=False) + controlnet.down_blocks.load_state_dict(unet.down_blocks.state_dict(), strict=False) + controlnet.mid_block.load_state_dict(unet.mid_block.state_dict(), strict=False) + + return controlnet + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_default_attn_processor + def set_default_attn_processor(self): + """ + Disables custom attention processors and sets the default attention implementation. + """ + if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnAddedKVProcessor() + elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnProcessor() + else: + raise ValueError( + f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}" + ) + + self.set_attn_processor(processor) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_attention_slice + def set_attention_slice(self, slice_size: str | int | list[int]) -> None: + r""" + Enable sliced attention computation. + + When this option is enabled, the attention module splits the input tensor in slices to compute attention in + several steps. This is useful for saving some memory in exchange for a small decrease in speed. + + Args: + slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`): + When `"auto"`, input to the attention heads is halved, so attention is computed in two steps. If + `"max"`, maximum amount of memory is saved by running only one slice at a time. If a number is + provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim` + must be a multiple of `slice_size`. + """ + sliceable_head_dims = [] + + def fn_recursive_retrieve_sliceable_dims(module: torch.nn.Module): + if hasattr(module, "set_attention_slice"): + sliceable_head_dims.append(module.sliceable_head_dim) + + for child in module.children(): + fn_recursive_retrieve_sliceable_dims(child) + + # retrieve number of attention layers + for module in self.children(): + fn_recursive_retrieve_sliceable_dims(module) + + num_sliceable_layers = len(sliceable_head_dims) + + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = [dim // 2 for dim in sliceable_head_dims] + elif slice_size == "max": + # make smallest slice possible + slice_size = num_sliceable_layers * [1] + + slice_size = num_sliceable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size + + if len(slice_size) != len(sliceable_head_dims): + raise ValueError( + f"You have provided {len(slice_size)}, but {self.config} has {len(sliceable_head_dims)} different" + f" attention layers. Make sure to match `len(slice_size)` to be {len(sliceable_head_dims)}." + ) + + for i in range(len(slice_size)): + size = slice_size[i] + dim = sliceable_head_dims[i] + if size is not None and size > dim: + raise ValueError(f"size {size} has to be smaller or equal to {dim}.") + + # Recursively walk through all the children. + # Any children which exposes the set_attention_slice method + # gets the message + def fn_recursive_set_attention_slice(module: torch.nn.Module, slice_size: list[int]): + if hasattr(module, "set_attention_slice"): + module.set_attention_slice(slice_size.pop()) + + for child in module.children(): + fn_recursive_set_attention_slice(child, slice_size) + + reversed_slice_size = list(reversed(slice_size)) + for module in self.children(): + fn_recursive_set_attention_slice(module, reversed_slice_size) + + def forward( + self, + sample: torch.Tensor, + timestep: torch.Tensor | float | int, + encoder_hidden_states: torch.Tensor, + controlnet_cond: torch.Tensor, + conditioning_scale: float = 1.0, + timestep_cond: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + cross_attention_kwargs: dict[str, Any] | None = None, + conditioning_mask: torch.Tensor | None = None, + guess_mode: bool = False, + return_dict: bool = True, + ) -> SparseControlNetOutput | tuple[tuple[torch.Tensor, ...], torch.Tensor]: + """ + The [`SparseControlNetModel`] forward method. + + Args: + sample (`torch.Tensor`): + The noisy input tensor. + timestep (`torch.Tensor | float | int`): + The number of timesteps to denoise an input. + encoder_hidden_states (`torch.Tensor`): + The encoder hidden states. + controlnet_cond (`torch.Tensor`): + The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`. + conditioning_scale (`float`, defaults to `1.0`): + The scale factor for ControlNet outputs. + class_labels (`torch.Tensor`, *optional*, defaults to `None`): + Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings. + timestep_cond (`torch.Tensor`, *optional*, defaults to `None`): + Additional conditional embeddings for timestep. If provided, the embeddings will be summed with the + timestep_embedding passed through the `self.time_embedding` layer to obtain the final timestep + embeddings. + attention_mask (`torch.Tensor`, *optional*, defaults to `None`): + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask + is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large + negative values to the attention scores corresponding to "discard" tokens. + added_cond_kwargs (`dict`): + Additional conditions for the Stable Diffusion XL UNet. + cross_attention_kwargs (`dict[str]`, *optional*, defaults to `None`): + A kwargs dictionary that if specified is passed along to the `AttnProcessor`. + guess_mode (`bool`, defaults to `False`): + In this mode, the ControlNet encoder tries its best to recognize the input content of the input even if + you remove all prompts. A `guidance_scale` between 3.0 and 5.0 is recommended. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~models.controlnet.ControlNetOutput`] instead of a plain tuple. + Returns: + [`~models.controlnet.ControlNetOutput`] **or** `tuple`: + If `return_dict` is `True`, a [`~models.controlnet.ControlNetOutput`] is returned, otherwise a tuple is + returned where the first element is the sample tensor. + """ + sample_batch_size, sample_channels, sample_num_frames, sample_height, sample_width = sample.shape + sample = torch.zeros_like(sample) + + # check channel order + channel_order = self.config.controlnet_conditioning_channel_order + + if channel_order == "rgb": + # in rgb order by default + ... + elif channel_order == "bgr": + controlnet_cond = torch.flip(controlnet_cond, dims=[1]) + else: + raise ValueError(f"unknown `controlnet_conditioning_channel_order`: {channel_order}") + + # prepare attention_mask + if attention_mask is not None: + attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # 1. time + timesteps = timestep + if not torch.is_tensor(timesteps): + # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can + # This would be a good case for the `match` statement (Python 3.10+) + is_mps = sample.device.type == "mps" + is_npu = sample.device.type == "npu" + if isinstance(timestep, float): + dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + else: + dtype = torch.int32 if (is_mps or is_npu) else torch.int64 + timesteps = torch.tensor([timesteps], dtype=dtype, device=sample.device) + elif len(timesteps.shape) == 0: + timesteps = timesteps[None].to(sample.device) + + # broadcast to batch dimension in a way that's compatible with ONNX/Core ML + timesteps = timesteps.expand(sample.shape[0]) + + t_emb = self.time_proj(timesteps) + + # timesteps does not contain any weights and will always return f32 tensors + # but time_embedding might actually be running in fp16. so we need to cast here. + # there might be better ways to encapsulate this. + t_emb = t_emb.to(dtype=sample.dtype) + + emb = self.time_embedding(t_emb, timestep_cond) + emb = emb.repeat_interleave(sample_num_frames, dim=0, output_size=emb.shape[0] * sample_num_frames) + + # 2. pre-process + batch_size, channels, num_frames, height, width = sample.shape + + sample = sample.permute(0, 2, 1, 3, 4).reshape(batch_size * num_frames, channels, height, width) + sample = self.conv_in(sample) + + batch_frames, channels, height, width = sample.shape + sample = sample[:, None].reshape(sample_batch_size, sample_num_frames, channels, height, width) + + if self.concat_conditioning_mask: + controlnet_cond = torch.cat([controlnet_cond, conditioning_mask], dim=1) + + batch_size, channels, num_frames, height, width = controlnet_cond.shape + controlnet_cond = controlnet_cond.permute(0, 2, 1, 3, 4).reshape( + batch_size * num_frames, channels, height, width + ) + controlnet_cond = self.controlnet_cond_embedding(controlnet_cond) + batch_frames, channels, height, width = controlnet_cond.shape + controlnet_cond = controlnet_cond[:, None].reshape(batch_size, num_frames, channels, height, width) + + sample = sample + controlnet_cond + + batch_size, num_frames, channels, height, width = sample.shape + sample = sample.reshape(sample_batch_size * sample_num_frames, channels, height, width) + + # 3. down + down_block_res_samples = (sample,) + for downsample_block in self.down_blocks: + if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention: + sample, res_samples = downsample_block( + hidden_states=sample, + temb=emb, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + num_frames=num_frames, + cross_attention_kwargs=cross_attention_kwargs, + ) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb, num_frames=num_frames) + + down_block_res_samples += res_samples + + # 4. mid + if self.mid_block is not None: + if hasattr(self.mid_block, "has_cross_attention") and self.mid_block.has_cross_attention: + sample = self.mid_block( + sample, + emb, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + cross_attention_kwargs=cross_attention_kwargs, + ) + else: + sample = self.mid_block(sample, emb) + + # 5. Control net blocks + controlnet_down_block_res_samples = () + + for down_block_res_sample, controlnet_block in zip(down_block_res_samples, self.controlnet_down_blocks): + down_block_res_sample = controlnet_block(down_block_res_sample) + controlnet_down_block_res_samples = controlnet_down_block_res_samples + (down_block_res_sample,) + + down_block_res_samples = controlnet_down_block_res_samples + mid_block_res_sample = self.controlnet_mid_block(sample) + + # 6. scaling + if guess_mode and not self.config.global_pool_conditions: + scales = torch.logspace(-1, 0, len(down_block_res_samples) + 1, device=sample.device) # 0.1 to 1.0 + scales = scales * conditioning_scale + down_block_res_samples = [sample * scale for sample, scale in zip(down_block_res_samples, scales)] + mid_block_res_sample = mid_block_res_sample * scales[-1] # last one + else: + down_block_res_samples = [sample * conditioning_scale for sample in down_block_res_samples] + mid_block_res_sample = mid_block_res_sample * conditioning_scale + + if self.config.global_pool_conditions: + down_block_res_samples = [ + torch.mean(sample, dim=(2, 3), keepdim=True) for sample in down_block_res_samples + ] + mid_block_res_sample = torch.mean(mid_block_res_sample, dim=(2, 3), keepdim=True) + + if not return_dict: + return (down_block_res_samples, mid_block_res_sample) + + return SparseControlNetOutput( + down_block_res_samples=down_block_res_samples, mid_block_res_sample=mid_block_res_sample + ) + + +# Copied from diffusers.models.controlnets.controlnet.zero_module +def zero_module(module: nn.Module) -> nn.Module: + for p in module.parameters(): + nn.init.zeros_(p) + return module diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_union.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_union.py new file mode 100644 index 0000000000000000000000000000000000000000..8e434ba1f250b802722650ade7e93abdde3e94fb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_union.py @@ -0,0 +1,781 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders.single_file_model import FromOriginalModelMixin +from ...utils import logging +from ..attention import AttentionMixin +from ..attention_processor import ( + ADDED_KV_ATTENTION_PROCESSORS, + CROSS_ATTENTION_PROCESSORS, + AttnAddedKVProcessor, + AttnProcessor, +) +from ..embeddings import TextImageTimeEmbedding, TextTimeEmbedding, TimestepEmbedding, Timesteps +from ..modeling_utils import ModelMixin +from ..unets.unet_2d_blocks import ( + UNetMidBlock2DCrossAttn, + get_down_block, +) +from ..unets.unet_2d_condition import UNet2DConditionModel +from .controlnet import ControlNetConditioningEmbedding, ControlNetOutput, zero_module + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class QuickGELU(nn.Module): + """ + Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs + """ + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return input * torch.sigmoid(1.702 * input) + + +class ResidualAttentionMlp(nn.Module): + def __init__(self, d_model: int): + super().__init__() + self.c_fc = nn.Linear(d_model, d_model * 4) + self.gelu = QuickGELU() + self.c_proj = nn.Linear(d_model * 4, d_model) + + def forward(self, x: torch.Tensor): + x = self.c_fc(x) + x = self.gelu(x) + x = self.c_proj(x) + return x + + +class ResidualAttentionBlock(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): + super().__init__() + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = ResidualAttentionMlp(d_model) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, x: torch.Tensor): + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class ControlNetUnionModel(ModelMixin, AttentionMixin, ConfigMixin, FromOriginalModelMixin): + """ + A ControlNetUnion model. + + Args: + in_channels (`int`, defaults to 4): + The number of channels in the input sample. + flip_sin_to_cos (`bool`, defaults to `True`): + Whether to flip the sin to cos in the time embedding. + freq_shift (`int`, defaults to 0): + The frequency shift to apply to the time embedding. + down_block_types (`tuple[str]`, defaults to `("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")`): + The tuple of downsample blocks to use. + only_cross_attention (`bool | tuple[bool]`, defaults to `False`): + block_out_channels (`tuple[int]`, defaults to `(320, 640, 1280, 1280)`): + The tuple of output channels for each block. + layers_per_block (`int`, defaults to 2): + The number of layers per block. + downsample_padding (`int`, defaults to 1): + The padding to use for the downsampling convolution. + mid_block_scale_factor (`float`, defaults to 1): + The scale factor to use for the mid block. + act_fn (`str`, defaults to "silu"): + The activation function to use. + norm_num_groups (`int`, *optional*, defaults to 32): + The number of groups to use for the normalization. If None, normalization and activation layers is skipped + in post-processing. + norm_eps (`float`, defaults to 1e-5): + The epsilon to use for the normalization. + cross_attention_dim (`int`, defaults to 1280): + The dimension of the cross attention features. + transformer_layers_per_block (`int` or `tuple[int]`, *optional*, defaults to 1): + The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`]. Only relevant for + [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.CrossAttnUpBlock2D`], + [`~models.unet_2d_blocks.UNetMidBlock2DCrossAttn`]. + encoder_hid_dim (`int`, *optional*, defaults to None): + If `encoder_hid_dim_type` is defined, `encoder_hidden_states` will be projected from `encoder_hid_dim` + dimension to `cross_attention_dim`. + encoder_hid_dim_type (`str`, *optional*, defaults to `None`): + If given, the `encoder_hidden_states` and potentially other embeddings are down-projected to text + embeddings of dimension `cross_attention` according to `encoder_hid_dim_type`. + attention_head_dim (`int | tuple[int]`, defaults to 8): + The dimension of the attention heads. + use_linear_projection (`bool`, defaults to `False`): + class_embed_type (`str`, *optional*, defaults to `None`): + The type of class embedding to use which is ultimately summed with the time embeddings. Choose from None, + `"timestep"`, `"identity"`, `"projection"`, or `"simple_projection"`. + addition_embed_type (`str`, *optional*, defaults to `None`): + Configures an optional embedding which will be summed with the time embeddings. Choose from `None` or + "text". "text" will use the `TextTimeEmbedding` layer. + num_class_embeds (`int`, *optional*, defaults to 0): + Input dimension of the learnable embedding matrix to be projected to `time_embed_dim`, when performing + class conditioning with `class_embed_type` equal to `None`. + upcast_attention (`bool`, defaults to `False`): + resnet_time_scale_shift (`str`, defaults to `"default"`): + Time scale shift config for ResNet blocks (see `ResnetBlock2D`). Choose from `default` or `scale_shift`. + projection_class_embeddings_input_dim (`int`, *optional*, defaults to `None`): + The dimension of the `class_labels` input when `class_embed_type="projection"`. Required when + `class_embed_type="projection"`. + controlnet_conditioning_channel_order (`str`, defaults to `"rgb"`): + The channel order of conditional image. Will convert to `rgb` if it's `bgr`. + conditioning_embedding_out_channels (`tuple[int]`, *optional*, defaults to `(48, 96, 192, 384)`): + The tuple of output channel for each block in the `conditioning_embedding` layer. + global_pool_conditions (`bool`, defaults to `False`): + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + in_channels: int = 4, + conditioning_channels: int = 3, + flip_sin_to_cos: bool = True, + freq_shift: int = 0, + down_block_types: tuple[str, ...] = ( + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "DownBlock2D", + ), + only_cross_attention: bool | tuple[bool] = False, + block_out_channels: tuple[int, ...] = (320, 640, 1280, 1280), + layers_per_block: int = 2, + downsample_padding: int = 1, + mid_block_scale_factor: float = 1, + act_fn: str = "silu", + norm_num_groups: int | None = 32, + norm_eps: float = 1e-5, + cross_attention_dim: int = 1280, + transformer_layers_per_block: int | tuple[int, ...] = 1, + encoder_hid_dim: int | None = None, + encoder_hid_dim_type: str | None = None, + attention_head_dim: int | tuple[int, ...] = 8, + num_attention_heads: int | tuple[int, ...] | None = None, + use_linear_projection: bool = False, + class_embed_type: str | None = None, + addition_embed_type: str | None = None, + addition_time_embed_dim: int | None = None, + num_class_embeds: int | None = None, + upcast_attention: bool = False, + resnet_time_scale_shift: str = "default", + projection_class_embeddings_input_dim: int | None = None, + controlnet_conditioning_channel_order: str = "rgb", + conditioning_embedding_out_channels: tuple[int, ...] | None = (48, 96, 192, 384), + global_pool_conditions: bool = False, + addition_embed_type_num_heads: int = 64, + num_control_type: int = 6, + num_trans_channel: int = 320, + num_trans_head: int = 8, + num_trans_layer: int = 1, + num_proj_channel: int = 320, + ): + super().__init__() + + # If `num_attention_heads` is not defined (which is the case for most models) + # it will default to `attention_head_dim`. This looks weird upon first reading it and it is. + # The reason for this behavior is to correct for incorrectly named variables that were introduced + # when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 + # Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking + # which is why we correct for the naming here. + num_attention_heads = num_attention_heads or attention_head_dim + + # Check inputs + if len(block_out_channels) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: {block_out_channels}. `down_block_types`: {down_block_types}." + ) + + if not isinstance(only_cross_attention, bool) and len(only_cross_attention) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `only_cross_attention` as `down_block_types`. `only_cross_attention`: {only_cross_attention}. `down_block_types`: {down_block_types}." + ) + + if not isinstance(num_attention_heads, int) and len(num_attention_heads) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: {num_attention_heads}. `down_block_types`: {down_block_types}." + ) + + if isinstance(transformer_layers_per_block, int): + transformer_layers_per_block = [transformer_layers_per_block] * len(down_block_types) + + # input + conv_in_kernel = 3 + conv_in_padding = (conv_in_kernel - 1) // 2 + self.conv_in = nn.Conv2d( + in_channels, block_out_channels[0], kernel_size=conv_in_kernel, padding=conv_in_padding + ) + + # time + time_embed_dim = block_out_channels[0] * 4 + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + self.time_embedding = TimestepEmbedding( + timestep_input_dim, + time_embed_dim, + act_fn=act_fn, + ) + + if encoder_hid_dim_type is not None: + raise ValueError(f"encoder_hid_dim_type: {encoder_hid_dim_type} must be None.") + else: + self.encoder_hid_proj = None + + # class embedding + if class_embed_type is None and num_class_embeds is not None: + self.class_embedding = nn.Embedding(num_class_embeds, time_embed_dim) + elif class_embed_type == "timestep": + self.class_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + elif class_embed_type == "identity": + self.class_embedding = nn.Identity(time_embed_dim, time_embed_dim) + elif class_embed_type == "projection": + if projection_class_embeddings_input_dim is None: + raise ValueError( + "`class_embed_type`: 'projection' requires `projection_class_embeddings_input_dim` be set" + ) + # The projection `class_embed_type` is the same as the timestep `class_embed_type` except + # 1. the `class_labels` inputs are not first converted to sinusoidal embeddings + # 2. it projects from an arbitrary input dimension. + # + # Note that `TimestepEmbedding` is quite general, being mainly linear layers and activations. + # When used for embedding actual timesteps, the timesteps are first converted to sinusoidal embeddings. + # As a result, `TimestepEmbedding` can be passed arbitrary vectors. + self.class_embedding = TimestepEmbedding(projection_class_embeddings_input_dim, time_embed_dim) + else: + self.class_embedding = None + + if addition_embed_type == "text": + if encoder_hid_dim is not None: + text_time_embedding_from_dim = encoder_hid_dim + else: + text_time_embedding_from_dim = cross_attention_dim + + self.add_embedding = TextTimeEmbedding( + text_time_embedding_from_dim, time_embed_dim, num_heads=addition_embed_type_num_heads + ) + elif addition_embed_type == "text_image": + # text_embed_dim and image_embed_dim DON'T have to be `cross_attention_dim`. To not clutter the __init__ too much + # they are set to `cross_attention_dim` here as this is exactly the required dimension for the currently only use + # case when `addition_embed_type == "text_image"` (Kandinsky 2.1)` + self.add_embedding = TextImageTimeEmbedding( + text_embed_dim=cross_attention_dim, image_embed_dim=cross_attention_dim, time_embed_dim=time_embed_dim + ) + elif addition_embed_type == "text_time": + self.add_time_proj = Timesteps(addition_time_embed_dim, flip_sin_to_cos, freq_shift) + self.add_embedding = TimestepEmbedding(projection_class_embeddings_input_dim, time_embed_dim) + + elif addition_embed_type is not None: + raise ValueError(f"addition_embed_type: {addition_embed_type} must be None, 'text' or 'text_image'.") + + # control net conditioning embedding + self.controlnet_cond_embedding = ControlNetConditioningEmbedding( + conditioning_embedding_channels=block_out_channels[0], + block_out_channels=conditioning_embedding_out_channels, + conditioning_channels=conditioning_channels, + ) + + task_scale_factor = num_trans_channel**0.5 + self.task_embedding = nn.Parameter(task_scale_factor * torch.randn(num_control_type, num_trans_channel)) + self.transformer_layes = nn.ModuleList( + [ResidualAttentionBlock(num_trans_channel, num_trans_head) for _ in range(num_trans_layer)] + ) + self.spatial_ch_projs = zero_module(nn.Linear(num_trans_channel, num_proj_channel)) + self.control_type_proj = Timesteps(addition_time_embed_dim, flip_sin_to_cos, freq_shift) + self.control_add_embedding = TimestepEmbedding(addition_time_embed_dim * num_control_type, time_embed_dim) + + self.down_blocks = nn.ModuleList([]) + self.controlnet_down_blocks = nn.ModuleList([]) + + if isinstance(only_cross_attention, bool): + only_cross_attention = [only_cross_attention] * len(down_block_types) + + if isinstance(attention_head_dim, int): + attention_head_dim = (attention_head_dim,) * len(down_block_types) + + if isinstance(num_attention_heads, int): + num_attention_heads = (num_attention_heads,) * len(down_block_types) + + # down + output_channel = block_out_channels[0] + + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + transformer_layers_per_block=transformer_layers_per_block[i], + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + cross_attention_dim=cross_attention_dim, + num_attention_heads=num_attention_heads[i], + attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel, + downsample_padding=downsample_padding, + use_linear_projection=use_linear_projection, + only_cross_attention=only_cross_attention[i], + upcast_attention=upcast_attention, + resnet_time_scale_shift=resnet_time_scale_shift, + ) + self.down_blocks.append(down_block) + + for _ in range(layers_per_block): + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + if not is_final_block: + controlnet_block = nn.Conv2d(output_channel, output_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_down_blocks.append(controlnet_block) + + # mid + mid_block_channel = block_out_channels[-1] + + controlnet_block = nn.Conv2d(mid_block_channel, mid_block_channel, kernel_size=1) + controlnet_block = zero_module(controlnet_block) + self.controlnet_mid_block = controlnet_block + + self.mid_block = UNetMidBlock2DCrossAttn( + transformer_layers_per_block=transformer_layers_per_block[-1], + in_channels=mid_block_channel, + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift=resnet_time_scale_shift, + cross_attention_dim=cross_attention_dim, + num_attention_heads=num_attention_heads[-1], + resnet_groups=norm_num_groups, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + ) + + @classmethod + def from_unet( + cls, + unet: UNet2DConditionModel, + controlnet_conditioning_channel_order: str = "rgb", + conditioning_embedding_out_channels: tuple[int, ...] | None = (16, 32, 96, 256), + load_weights_from_unet: bool = True, + ): + r""" + Instantiate a [`ControlNetUnionModel`] from [`UNet2DConditionModel`]. + + Parameters: + unet (`UNet2DConditionModel`): + The UNet model weights to copy to the [`ControlNetUnionModel`]. All configuration options are also + copied where applicable. + """ + transformer_layers_per_block = ( + unet.config.transformer_layers_per_block if "transformer_layers_per_block" in unet.config else 1 + ) + encoder_hid_dim = unet.config.encoder_hid_dim if "encoder_hid_dim" in unet.config else None + encoder_hid_dim_type = unet.config.encoder_hid_dim_type if "encoder_hid_dim_type" in unet.config else None + addition_embed_type = unet.config.addition_embed_type if "addition_embed_type" in unet.config else None + addition_time_embed_dim = ( + unet.config.addition_time_embed_dim if "addition_time_embed_dim" in unet.config else None + ) + + controlnet = cls( + encoder_hid_dim=encoder_hid_dim, + encoder_hid_dim_type=encoder_hid_dim_type, + addition_embed_type=addition_embed_type, + addition_time_embed_dim=addition_time_embed_dim, + transformer_layers_per_block=transformer_layers_per_block, + in_channels=unet.config.in_channels, + flip_sin_to_cos=unet.config.flip_sin_to_cos, + freq_shift=unet.config.freq_shift, + down_block_types=unet.config.down_block_types, + only_cross_attention=unet.config.only_cross_attention, + block_out_channels=unet.config.block_out_channels, + layers_per_block=unet.config.layers_per_block, + downsample_padding=unet.config.downsample_padding, + mid_block_scale_factor=unet.config.mid_block_scale_factor, + act_fn=unet.config.act_fn, + norm_num_groups=unet.config.norm_num_groups, + norm_eps=unet.config.norm_eps, + cross_attention_dim=unet.config.cross_attention_dim, + attention_head_dim=unet.config.attention_head_dim, + num_attention_heads=unet.config.num_attention_heads, + use_linear_projection=unet.config.use_linear_projection, + class_embed_type=unet.config.class_embed_type, + num_class_embeds=unet.config.num_class_embeds, + upcast_attention=unet.config.upcast_attention, + resnet_time_scale_shift=unet.config.resnet_time_scale_shift, + projection_class_embeddings_input_dim=unet.config.projection_class_embeddings_input_dim, + controlnet_conditioning_channel_order=controlnet_conditioning_channel_order, + conditioning_embedding_out_channels=conditioning_embedding_out_channels, + ) + + if load_weights_from_unet: + controlnet.conv_in.load_state_dict(unet.conv_in.state_dict()) + controlnet.time_proj.load_state_dict(unet.time_proj.state_dict()) + controlnet.time_embedding.load_state_dict(unet.time_embedding.state_dict()) + + if controlnet.class_embedding: + controlnet.class_embedding.load_state_dict(unet.class_embedding.state_dict()) + + controlnet.down_blocks.load_state_dict(unet.down_blocks.state_dict(), strict=False) + controlnet.mid_block.load_state_dict(unet.mid_block.state_dict(), strict=False) + + return controlnet + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_default_attn_processor + def set_default_attn_processor(self): + """ + Disables custom attention processors and sets the default attention implementation. + """ + if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnAddedKVProcessor() + elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnProcessor() + else: + raise ValueError( + f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}" + ) + + self.set_attn_processor(processor) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_attention_slice + def set_attention_slice(self, slice_size: str | int | list[int]) -> None: + r""" + Enable sliced attention computation. + + When this option is enabled, the attention module splits the input tensor in slices to compute attention in + several steps. This is useful for saving some memory in exchange for a small decrease in speed. + + Args: + slice_size (`str` or `int` or `list(int)`, *optional*, defaults to `"auto"`): + When `"auto"`, input to the attention heads is halved, so attention is computed in two steps. If + `"max"`, maximum amount of memory is saved by running only one slice at a time. If a number is + provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim` + must be a multiple of `slice_size`. + """ + sliceable_head_dims = [] + + def fn_recursive_retrieve_sliceable_dims(module: torch.nn.Module): + if hasattr(module, "set_attention_slice"): + sliceable_head_dims.append(module.sliceable_head_dim) + + for child in module.children(): + fn_recursive_retrieve_sliceable_dims(child) + + # retrieve number of attention layers + for module in self.children(): + fn_recursive_retrieve_sliceable_dims(module) + + num_sliceable_layers = len(sliceable_head_dims) + + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = [dim // 2 for dim in sliceable_head_dims] + elif slice_size == "max": + # make smallest slice possible + slice_size = num_sliceable_layers * [1] + + slice_size = num_sliceable_layers * [slice_size] if not isinstance(slice_size, list) else slice_size + + if len(slice_size) != len(sliceable_head_dims): + raise ValueError( + f"You have provided {len(slice_size)}, but {self.config} has {len(sliceable_head_dims)} different" + f" attention layers. Make sure to match `len(slice_size)` to be {len(sliceable_head_dims)}." + ) + + for i in range(len(slice_size)): + size = slice_size[i] + dim = sliceable_head_dims[i] + if size is not None and size > dim: + raise ValueError(f"size {size} has to be smaller or equal to {dim}.") + + # Recursively walk through all the children. + # Any children which exposes the set_attention_slice method + # gets the message + def fn_recursive_set_attention_slice(module: torch.nn.Module, slice_size: list[int]): + if hasattr(module, "set_attention_slice"): + module.set_attention_slice(slice_size.pop()) + + for child in module.children(): + fn_recursive_set_attention_slice(child, slice_size) + + reversed_slice_size = list(reversed(slice_size)) + for module in self.children(): + fn_recursive_set_attention_slice(module, reversed_slice_size) + + def forward( + self, + sample: torch.Tensor, + timestep: torch.Tensor | float | int, + encoder_hidden_states: torch.Tensor, + controlnet_cond: list[torch.Tensor], + control_type: torch.Tensor, + control_type_idx: list[int], + conditioning_scale: float | list[float] = 1.0, + class_labels: torch.Tensor | None = None, + timestep_cond: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + added_cond_kwargs: dict[str, torch.Tensor] | None = None, + cross_attention_kwargs: dict[str, Any] | None = None, + from_multi: bool = False, + guess_mode: bool = False, + return_dict: bool = True, + ) -> ControlNetOutput | tuple[tuple[torch.Tensor, ...], torch.Tensor]: + """ + The [`ControlNetUnionModel`] forward method. + + Args: + sample (`torch.Tensor`): + The noisy input tensor. + timestep (`torch.Tensor | float | int`): + The number of timesteps to denoise an input. + encoder_hidden_states (`torch.Tensor`): + The encoder hidden states. + controlnet_cond (`list[torch.Tensor]`): + The conditional input tensors. + control_type (`torch.Tensor`): + A tensor of shape `(batch, num_control_type)` with values `0` or `1` depending on whether the control + type is used. + control_type_idx (`list[int]`): + The indices of `control_type`. + conditioning_scale (`float`, defaults to `1.0`): + The scale factor for ControlNet outputs. + class_labels (`torch.Tensor`, *optional*, defaults to `None`): + Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings. + timestep_cond (`torch.Tensor`, *optional*, defaults to `None`): + Additional conditional embeddings for timestep. If provided, the embeddings will be summed with the + timestep_embedding passed through the `self.time_embedding` layer to obtain the final timestep + embeddings. + attention_mask (`torch.Tensor`, *optional*, defaults to `None`): + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask + is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large + negative values to the attention scores corresponding to "discard" tokens. + added_cond_kwargs (`dict`): + Additional conditions for the Stable Diffusion XL UNet. + cross_attention_kwargs (`dict[str]`, *optional*, defaults to `None`): + A kwargs dictionary that if specified is passed along to the `AttnProcessor`. + from_multi (`bool`, defaults to `False`): + Use standard scaling when called from `MultiControlNetUnionModel`. + guess_mode (`bool`, defaults to `False`): + In this mode, the ControlNet encoder tries its best to recognize the input content of the input even if + you remove all prompts. A `guidance_scale` between 3.0 and 5.0 is recommended. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~models.controlnet.ControlNetOutput`] instead of a plain tuple. + + Returns: + [`~models.controlnet.ControlNetOutput`] **or** `tuple`: + If `return_dict` is `True`, a [`~models.controlnet.ControlNetOutput`] is returned, otherwise a tuple is + returned where the first element is the sample tensor. + """ + if isinstance(conditioning_scale, float): + conditioning_scale = [conditioning_scale] * len(controlnet_cond) + + # check channel order + channel_order = self.config.controlnet_conditioning_channel_order + + if channel_order != "rgb": + raise ValueError(f"unknown `controlnet_conditioning_channel_order`: {channel_order}") + + # prepare attention_mask + if attention_mask is not None: + attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # 1. time + timesteps = timestep + if not torch.is_tensor(timesteps): + # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can + # This would be a good case for the `match` statement (Python 3.10+) + is_mps = sample.device.type == "mps" + is_npu = sample.device.type == "npu" + if isinstance(timestep, float): + dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + else: + dtype = torch.int32 if (is_mps or is_npu) else torch.int64 + timesteps = torch.tensor([timesteps], dtype=dtype, device=sample.device) + elif len(timesteps.shape) == 0: + timesteps = timesteps[None].to(sample.device) + + # broadcast to batch dimension in a way that's compatible with ONNX/Core ML + timesteps = timesteps.expand(sample.shape[0]) + + t_emb = self.time_proj(timesteps) + + # timesteps does not contain any weights and will always return f32 tensors + # but time_embedding might actually be running in fp16. so we need to cast here. + # there might be better ways to encapsulate this. + t_emb = t_emb.to(dtype=sample.dtype) + + emb = self.time_embedding(t_emb, timestep_cond) + aug_emb = None + + if self.class_embedding is not None: + if class_labels is None: + raise ValueError("class_labels should be provided when num_class_embeds > 0") + + if self.config.class_embed_type == "timestep": + class_labels = self.time_proj(class_labels) + + class_emb = self.class_embedding(class_labels).to(dtype=self.dtype) + emb = emb + class_emb + + if self.config.addition_embed_type is not None: + if self.config.addition_embed_type == "text": + aug_emb = self.add_embedding(encoder_hidden_states) + + elif self.config.addition_embed_type == "text_time": + if "text_embeds" not in added_cond_kwargs: + raise ValueError( + f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`" + ) + text_embeds = added_cond_kwargs.get("text_embeds") + if "time_ids" not in added_cond_kwargs: + raise ValueError( + f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`" + ) + time_ids = added_cond_kwargs.get("time_ids") + time_embeds = self.add_time_proj(time_ids.flatten()) + time_embeds = time_embeds.reshape((text_embeds.shape[0], -1)) + + add_embeds = torch.concat([text_embeds, time_embeds], dim=-1) + add_embeds = add_embeds.to(emb.dtype) + aug_emb = self.add_embedding(add_embeds) + + control_embeds = self.control_type_proj(control_type.flatten()) + control_embeds = control_embeds.reshape((t_emb.shape[0], -1)) + control_embeds = control_embeds.to(emb.dtype) + control_emb = self.control_add_embedding(control_embeds) + emb = emb + control_emb + emb = emb + aug_emb if aug_emb is not None else emb + + # 2. pre-process + sample = self.conv_in(sample) + + inputs = [] + condition_list = [] + + for cond, control_idx, scale in zip(controlnet_cond, control_type_idx, conditioning_scale): + condition = self.controlnet_cond_embedding(cond) + feat_seq = torch.mean(condition, dim=(2, 3)) + feat_seq = feat_seq + self.task_embedding[control_idx] + if from_multi or len(control_type_idx) == 1: + inputs.append(feat_seq.unsqueeze(1)) + condition_list.append(condition) + else: + inputs.append(feat_seq.unsqueeze(1) * scale) + condition_list.append(condition * scale) + + condition = sample + feat_seq = torch.mean(condition, dim=(2, 3)) + inputs.append(feat_seq.unsqueeze(1)) + condition_list.append(condition) + + x = torch.cat(inputs, dim=1) + for layer in self.transformer_layes: + x = layer(x) + + controlnet_cond_fuser = sample * 0.0 + for (idx, condition), scale in zip(enumerate(condition_list[:-1]), conditioning_scale): + alpha = self.spatial_ch_projs(x[:, idx]) + alpha = alpha.unsqueeze(-1).unsqueeze(-1) + if from_multi or len(control_type_idx) == 1: + controlnet_cond_fuser += condition + alpha + else: + controlnet_cond_fuser += condition + alpha * scale + + sample = sample + controlnet_cond_fuser + + # 3. down + down_block_res_samples = (sample,) + for downsample_block in self.down_blocks: + if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention: + sample, res_samples = downsample_block( + hidden_states=sample, + temb=emb, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + cross_attention_kwargs=cross_attention_kwargs, + ) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + if self.mid_block is not None: + sample = self.mid_block( + sample, + emb, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + cross_attention_kwargs=cross_attention_kwargs, + ) + + # 5. Control net blocks + controlnet_down_block_res_samples = () + + for down_block_res_sample, controlnet_block in zip(down_block_res_samples, self.controlnet_down_blocks): + down_block_res_sample = controlnet_block(down_block_res_sample) + controlnet_down_block_res_samples = controlnet_down_block_res_samples + (down_block_res_sample,) + + down_block_res_samples = controlnet_down_block_res_samples + + mid_block_res_sample = self.controlnet_mid_block(sample) + + # 6. scaling + if guess_mode and not self.config.global_pool_conditions: + scales = torch.logspace(-1, 0, len(down_block_res_samples) + 1, device=sample.device) # 0.1 to 1.0 + if from_multi or len(control_type_idx) == 1: + scales = scales * conditioning_scale[0] + down_block_res_samples = [sample * scale for sample, scale in zip(down_block_res_samples, scales)] + mid_block_res_sample = mid_block_res_sample * scales[-1] # last one + elif from_multi or len(control_type_idx) == 1: + down_block_res_samples = [sample * conditioning_scale[0] for sample in down_block_res_samples] + mid_block_res_sample = mid_block_res_sample * conditioning_scale[0] + + if self.config.global_pool_conditions: + down_block_res_samples = [ + torch.mean(sample, dim=(2, 3), keepdim=True) for sample in down_block_res_samples + ] + mid_block_res_sample = torch.mean(mid_block_res_sample, dim=(2, 3), keepdim=True) + + if not return_dict: + return (down_block_res_samples, mid_block_res_sample) + + return ControlNetOutput( + down_block_res_samples=down_block_res_samples, mid_block_res_sample=mid_block_res_sample + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_xs.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_xs.py new file mode 100644 index 0000000000000000000000000000000000000000..6221d4878de9925f1fdcce912de9a6f52a5aa545 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_xs.py @@ -0,0 +1,1838 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from math import gcd +from typing import Any + +import torch +from torch import Tensor, nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import BaseOutput, logging +from ...utils.torch_utils import apply_freeu +from ..attention import AttentionMixin +from ..attention_processor import ( + ADDED_KV_ATTENTION_PROCESSORS, + CROSS_ATTENTION_PROCESSORS, + Attention, + AttnAddedKVProcessor, + AttnProcessor, + FusedAttnProcessor2_0, +) +from ..embeddings import TimestepEmbedding, Timesteps +from ..modeling_utils import ModelMixin +from ..unets.unet_2d_blocks import ( + CrossAttnDownBlock2D, + CrossAttnUpBlock2D, + Downsample2D, + ResnetBlock2D, + Transformer2DModel, + UNetMidBlock2DCrossAttn, + Upsample2D, +) +from ..unets.unet_2d_condition import UNet2DConditionModel +from .controlnet import ControlNetConditioningEmbedding + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class ControlNetXSOutput(BaseOutput): + """ + The output of [`UNetControlNetXSModel`]. + + Args: + sample (`Tensor` of shape `(batch_size, num_channels, height, width)`): + The output of the `UNetControlNetXSModel`. Unlike `ControlNetOutput` this is NOT to be added to the base + model output, but is already the final output. + """ + + sample: Tensor = None + + +class DownBlockControlNetXSAdapter(nn.Module): + """Components that together with corresponding components from the base model will form a + `ControlNetXSCrossAttnDownBlock2D`""" + + def __init__( + self, + resnets: nn.ModuleList, + base_to_ctrl: nn.ModuleList, + ctrl_to_base: nn.ModuleList, + attentions: nn.ModuleList | None = None, + downsampler: nn.Conv2d | None = None, + ): + super().__init__() + self.resnets = resnets + self.base_to_ctrl = base_to_ctrl + self.ctrl_to_base = ctrl_to_base + self.attentions = attentions + self.downsamplers = downsampler + + +class MidBlockControlNetXSAdapter(nn.Module): + """Components that together with corresponding components from the base model will form a + `ControlNetXSCrossAttnMidBlock2D`""" + + def __init__(self, midblock: UNetMidBlock2DCrossAttn, base_to_ctrl: nn.ModuleList, ctrl_to_base: nn.ModuleList): + super().__init__() + self.midblock = midblock + self.base_to_ctrl = base_to_ctrl + self.ctrl_to_base = ctrl_to_base + + +class UpBlockControlNetXSAdapter(nn.Module): + """Components that together with corresponding components from the base model will form a `ControlNetXSCrossAttnUpBlock2D`""" + + def __init__(self, ctrl_to_base: nn.ModuleList): + super().__init__() + self.ctrl_to_base = ctrl_to_base + + +def get_down_block_adapter( + base_in_channels: int, + base_out_channels: int, + ctrl_in_channels: int, + ctrl_out_channels: int, + temb_channels: int, + max_norm_num_groups: int | None = 32, + has_crossattn=True, + transformer_layers_per_block: int | tuple[int] | None = 1, + num_attention_heads: int | None = 1, + cross_attention_dim: int | None = 1024, + add_downsample: bool = True, + upcast_attention: bool | None = False, + use_linear_projection: bool | None = True, +): + num_layers = 2 # only support sd + sdxl + + resnets = [] + attentions = [] + ctrl_to_base = [] + base_to_ctrl = [] + + if isinstance(transformer_layers_per_block, int): + transformer_layers_per_block = [transformer_layers_per_block] * num_layers + + for i in range(num_layers): + base_in_channels = base_in_channels if i == 0 else base_out_channels + ctrl_in_channels = ctrl_in_channels if i == 0 else ctrl_out_channels + + # Before the resnet/attention application, information is concatted from base to control. + # Concat doesn't require change in number of channels + base_to_ctrl.append(make_zero_conv(base_in_channels, base_in_channels)) + + resnets.append( + ResnetBlock2D( + in_channels=ctrl_in_channels + base_in_channels, # information from base is concatted to ctrl + out_channels=ctrl_out_channels, + temb_channels=temb_channels, + groups=find_largest_factor(ctrl_in_channels + base_in_channels, max_factor=max_norm_num_groups), + groups_out=find_largest_factor(ctrl_out_channels, max_factor=max_norm_num_groups), + eps=1e-5, + ) + ) + + if has_crossattn: + attentions.append( + Transformer2DModel( + num_attention_heads, + ctrl_out_channels // num_attention_heads, + in_channels=ctrl_out_channels, + num_layers=transformer_layers_per_block[i], + cross_attention_dim=cross_attention_dim, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + norm_num_groups=find_largest_factor(ctrl_out_channels, max_factor=max_norm_num_groups), + ) + ) + + # After the resnet/attention application, information is added from control to base + # Addition requires change in number of channels + ctrl_to_base.append(make_zero_conv(ctrl_out_channels, base_out_channels)) + + if add_downsample: + # Before the downsampler application, information is concatted from base to control + # Concat doesn't require change in number of channels + base_to_ctrl.append(make_zero_conv(base_out_channels, base_out_channels)) + + downsamplers = Downsample2D( + ctrl_out_channels + base_out_channels, use_conv=True, out_channels=ctrl_out_channels, name="op" + ) + + # After the downsampler application, information is added from control to base + # Addition requires change in number of channels + ctrl_to_base.append(make_zero_conv(ctrl_out_channels, base_out_channels)) + else: + downsamplers = None + + down_block_components = DownBlockControlNetXSAdapter( + resnets=nn.ModuleList(resnets), + base_to_ctrl=nn.ModuleList(base_to_ctrl), + ctrl_to_base=nn.ModuleList(ctrl_to_base), + ) + + if has_crossattn: + down_block_components.attentions = nn.ModuleList(attentions) + if downsamplers is not None: + down_block_components.downsamplers = downsamplers + + return down_block_components + + +def get_mid_block_adapter( + base_channels: int, + ctrl_channels: int, + temb_channels: int | None = None, + max_norm_num_groups: int | None = 32, + transformer_layers_per_block: int = 1, + num_attention_heads: int | None = 1, + cross_attention_dim: int | None = 1024, + upcast_attention: bool = False, + use_linear_projection: bool = True, +): + # Before the midblock application, information is concatted from base to control. + # Concat doesn't require change in number of channels + base_to_ctrl = make_zero_conv(base_channels, base_channels) + + midblock = UNetMidBlock2DCrossAttn( + transformer_layers_per_block=transformer_layers_per_block, + in_channels=ctrl_channels + base_channels, + out_channels=ctrl_channels, + temb_channels=temb_channels, + # number or norm groups must divide both in_channels and out_channels + resnet_groups=find_largest_factor(gcd(ctrl_channels, ctrl_channels + base_channels), max_norm_num_groups), + cross_attention_dim=cross_attention_dim, + num_attention_heads=num_attention_heads, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + ) + + # After the midblock application, information is added from control to base + # Addition requires change in number of channels + ctrl_to_base = make_zero_conv(ctrl_channels, base_channels) + + return MidBlockControlNetXSAdapter(base_to_ctrl=base_to_ctrl, midblock=midblock, ctrl_to_base=ctrl_to_base) + + +def get_up_block_adapter( + out_channels: int, + prev_output_channel: int, + ctrl_skip_channels: list[int], +): + ctrl_to_base = [] + num_layers = 3 # only support sd + sdxl + for i in range(num_layers): + resnet_in_channels = prev_output_channel if i == 0 else out_channels + ctrl_to_base.append(make_zero_conv(ctrl_skip_channels[i], resnet_in_channels)) + + return UpBlockControlNetXSAdapter(ctrl_to_base=nn.ModuleList(ctrl_to_base)) + + +class ControlNetXSAdapter(ModelMixin, AttentionMixin, ConfigMixin): + r""" + A `ControlNetXSAdapter` model. To use it, pass it into a `UNetControlNetXSModel` (together with a + `UNet2DConditionModel` base model). + + This model inherits from [`ModelMixin`] and [`ConfigMixin`]. Check the superclass documentation for it's generic + methods implemented for all models (such as downloading or saving). + + Like `UNetControlNetXSModel`, `ControlNetXSAdapter` is compatible with StableDiffusion and StableDiffusion-XL. It's + default parameters are compatible with StableDiffusion. + + Parameters: + conditioning_channels (`int`, defaults to 3): + Number of channels of conditioning input (e.g. an image) + conditioning_channel_order (`str`, defaults to `"rgb"`): + The channel order of conditional image. Will convert to `rgb` if it's `bgr`. + conditioning_embedding_out_channels (`tuple[int]`, defaults to `(16, 32, 96, 256)`): + The tuple of output channels for each block in the `controlnet_cond_embedding` layer. + time_embedding_mix (`float`, defaults to 1.0): + If 0, then only the control adapters's time embedding is used. If 1, then only the base unet's time + embedding is used. Otherwise, both are combined. + learn_time_embedding (`bool`, defaults to `False`): + Whether a time embedding should be learned. If yes, `UNetControlNetXSModel` will combine the time + embeddings of the base model and the control adapter. If no, `UNetControlNetXSModel` will use the base + model's time embedding. + num_attention_heads (`list[int]`, defaults to `[4]`): + The number of attention heads. + block_out_channels (`list[int]`, defaults to `[4, 8, 16, 16]`): + The tuple of output channels for each block. + base_block_out_channels (`list[int]`, defaults to `[320, 640, 1280, 1280]`): + The tuple of output channels for each block in the base unet. + cross_attention_dim (`int`, defaults to 1024): + The dimension of the cross attention features. + down_block_types (`list[str]`, defaults to `["CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"]`): + The tuple of downsample blocks to use. + sample_size (`int`, defaults to 96): + Height and width of input/output sample. + transformer_layers_per_block (`int | tuple[int]`, defaults to 1): + The number of transformer blocks of type [`~models.attention.BasicTransformerBlock`]. Only relevant for + [`~models.unet_2d_blocks.CrossAttnDownBlock2D`], [`~models.unet_2d_blocks.UNetMidBlock2DCrossAttn`]. + upcast_attention (`bool`, defaults to `True`): + Whether the attention computation should always be upcasted. + max_norm_num_groups (`int`, defaults to 32): + Maximum number of groups in group normal. The actual number will be the largest divisor of the respective + channels, that is <= max_norm_num_groups. + """ + + @register_to_config + def __init__( + self, + conditioning_channels: int = 3, + conditioning_channel_order: str = "rgb", + conditioning_embedding_out_channels: tuple[int] = (16, 32, 96, 256), + time_embedding_mix: float = 1.0, + learn_time_embedding: bool = False, + num_attention_heads: int | tuple[int] = 4, + block_out_channels: tuple[int] = (4, 8, 16, 16), + base_block_out_channels: tuple[int] = (320, 640, 1280, 1280), + cross_attention_dim: int = 1024, + down_block_types: tuple[str] = ( + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "DownBlock2D", + ), + sample_size: int | None = 96, + transformer_layers_per_block: int | tuple[int] = 1, + upcast_attention: bool = True, + max_norm_num_groups: int = 32, + use_linear_projection: bool = True, + ): + super().__init__() + + time_embedding_input_dim = base_block_out_channels[0] + time_embedding_dim = base_block_out_channels[0] * 4 + + # Check inputs + if conditioning_channel_order not in ["rgb", "bgr"]: + raise ValueError(f"unknown `conditioning_channel_order`: {conditioning_channel_order}") + + if len(block_out_channels) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: {block_out_channels}. `down_block_types`: {down_block_types}." + ) + + if not isinstance(transformer_layers_per_block, (list, tuple)): + transformer_layers_per_block = [transformer_layers_per_block] * len(down_block_types) + if not isinstance(cross_attention_dim, (list, tuple)): + cross_attention_dim = [cross_attention_dim] * len(down_block_types) + # see https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 for why `ControlNetXSAdapter` takes `num_attention_heads` instead of `attention_head_dim` + if not isinstance(num_attention_heads, (list, tuple)): + num_attention_heads = [num_attention_heads] * len(down_block_types) + + if len(num_attention_heads) != len(down_block_types): + raise ValueError( + f"Must provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: {num_attention_heads}. `down_block_types`: {down_block_types}." + ) + + # 5 - Create conditioning hint embedding + self.controlnet_cond_embedding = ControlNetConditioningEmbedding( + conditioning_embedding_channels=block_out_channels[0], + block_out_channels=conditioning_embedding_out_channels, + conditioning_channels=conditioning_channels, + ) + + # time + if learn_time_embedding: + self.time_embedding = TimestepEmbedding(time_embedding_input_dim, time_embedding_dim) + else: + self.time_embedding = None + + self.down_blocks = nn.ModuleList([]) + self.up_connections = nn.ModuleList([]) + + # input + self.conv_in = nn.Conv2d(4, block_out_channels[0], kernel_size=3, padding=1) + self.control_to_base_for_conv_in = make_zero_conv(block_out_channels[0], base_block_out_channels[0]) + + # down + base_out_channels = base_block_out_channels[0] + ctrl_out_channels = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + base_in_channels = base_out_channels + base_out_channels = base_block_out_channels[i] + ctrl_in_channels = ctrl_out_channels + ctrl_out_channels = block_out_channels[i] + has_crossattn = "CrossAttn" in down_block_type + is_final_block = i == len(down_block_types) - 1 + + self.down_blocks.append( + get_down_block_adapter( + base_in_channels=base_in_channels, + base_out_channels=base_out_channels, + ctrl_in_channels=ctrl_in_channels, + ctrl_out_channels=ctrl_out_channels, + temb_channels=time_embedding_dim, + max_norm_num_groups=max_norm_num_groups, + has_crossattn=has_crossattn, + transformer_layers_per_block=transformer_layers_per_block[i], + num_attention_heads=num_attention_heads[i], + cross_attention_dim=cross_attention_dim[i], + add_downsample=not is_final_block, + upcast_attention=upcast_attention, + use_linear_projection=use_linear_projection, + ) + ) + + # mid + self.mid_block = get_mid_block_adapter( + base_channels=base_block_out_channels[-1], + ctrl_channels=block_out_channels[-1], + temb_channels=time_embedding_dim, + transformer_layers_per_block=transformer_layers_per_block[-1], + num_attention_heads=num_attention_heads[-1], + cross_attention_dim=cross_attention_dim[-1], + upcast_attention=upcast_attention, + use_linear_projection=use_linear_projection, + ) + + # up + # The skip connection channels are the output of the conv_in and of all the down subblocks + ctrl_skip_channels = [block_out_channels[0]] + for i, out_channels in enumerate(block_out_channels): + number_of_subblocks = ( + 3 if i < len(block_out_channels) - 1 else 2 + ) # every block has 3 subblocks, except last one, which has 2 as it has no downsampler + ctrl_skip_channels.extend([out_channels] * number_of_subblocks) + + reversed_base_block_out_channels = list(reversed(base_block_out_channels)) + + base_out_channels = reversed_base_block_out_channels[0] + for i in range(len(down_block_types)): + prev_base_output_channel = base_out_channels + base_out_channels = reversed_base_block_out_channels[i] + ctrl_skip_channels_ = [ctrl_skip_channels.pop() for _ in range(3)] + + self.up_connections.append( + get_up_block_adapter( + out_channels=base_out_channels, + prev_output_channel=prev_base_output_channel, + ctrl_skip_channels=ctrl_skip_channels_, + ) + ) + + @classmethod + def from_unet( + cls, + unet: UNet2DConditionModel, + size_ratio: float | None = None, + block_out_channels: list[int] | None = None, + num_attention_heads: list[int] | None = None, + learn_time_embedding: bool = False, + time_embedding_mix: int = 1.0, + conditioning_channels: int = 3, + conditioning_channel_order: str = "rgb", + conditioning_embedding_out_channels: tuple[int] = (16, 32, 96, 256), + ): + r""" + Instantiate a [`ControlNetXSAdapter`] from a [`UNet2DConditionModel`]. + + Parameters: + unet (`UNet2DConditionModel`): + The UNet model we want to control. The dimensions of the ControlNetXSAdapter will be adapted to it. + size_ratio (float, *optional*, defaults to `None`): + When given, block_out_channels is set to a fraction of the base model's block_out_channels. Either this + or `block_out_channels` must be given. + block_out_channels (`list[int]`, *optional*, defaults to `None`): + Down blocks output channels in control model. Either this or `size_ratio` must be given. + num_attention_heads (`list[int]`, *optional*, defaults to `None`): + The dimension of the attention heads. The naming seems a bit confusing and it is, see + https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 for why. + learn_time_embedding (`bool`, defaults to `False`): + Whether the `ControlNetXSAdapter` should learn a time embedding. + time_embedding_mix (`float`, defaults to 1.0): + If 0, then only the control adapter's time embedding is used. If 1, then only the base unet's time + embedding is used. Otherwise, both are combined. + conditioning_channels (`int`, defaults to 3): + Number of channels of conditioning input (e.g. an image) + conditioning_channel_order (`str`, defaults to `"rgb"`): + The channel order of conditional image. Will convert to `rgb` if it's `bgr`. + conditioning_embedding_out_channels (`tuple[int]`, defaults to `(16, 32, 96, 256)`): + The tuple of output channel for each block in the `controlnet_cond_embedding` layer. + """ + + # Check input + fixed_size = block_out_channels is not None + relative_size = size_ratio is not None + if not (fixed_size ^ relative_size): + raise ValueError( + "Pass exactly one of `block_out_channels` (for absolute sizing) or `size_ratio` (for relative sizing)." + ) + + # Create model + block_out_channels = block_out_channels or [int(b * size_ratio) for b in unet.config.block_out_channels] + if num_attention_heads is None: + # The naming seems a bit confusing and it is, see https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 for why. + num_attention_heads = unet.config.attention_head_dim + + model = cls( + conditioning_channels=conditioning_channels, + conditioning_channel_order=conditioning_channel_order, + conditioning_embedding_out_channels=conditioning_embedding_out_channels, + time_embedding_mix=time_embedding_mix, + learn_time_embedding=learn_time_embedding, + num_attention_heads=num_attention_heads, + block_out_channels=block_out_channels, + base_block_out_channels=unet.config.block_out_channels, + cross_attention_dim=unet.config.cross_attention_dim, + down_block_types=unet.config.down_block_types, + sample_size=unet.config.sample_size, + transformer_layers_per_block=unet.config.transformer_layers_per_block, + upcast_attention=unet.config.upcast_attention, + max_norm_num_groups=unet.config.norm_num_groups, + use_linear_projection=unet.config.use_linear_projection, + ) + + # ensure that the ControlNetXSAdapter is the same dtype as the UNet2DConditionModel + model.to(unet.dtype) + + return model + + def forward(self, *args, **kwargs): + raise ValueError( + "A ControlNetXSAdapter cannot be run by itself. Use it together with a UNet2DConditionModel to instantiate a UNetControlNetXSModel." + ) + + +class UNetControlNetXSModel(ModelMixin, AttentionMixin, ConfigMixin): + r""" + A UNet fused with a ControlNet-XS adapter model + + This model inherits from [`ModelMixin`] and [`ConfigMixin`]. Check the superclass documentation for it's generic + methods implemented for all models (such as downloading or saving). + + `UNetControlNetXSModel` is compatible with StableDiffusion and StableDiffusion-XL. It's default parameters are + compatible with StableDiffusion. + + It's parameters are either passed to the underlying `UNet2DConditionModel` or used exactly like in + `ControlNetXSAdapter` . See their documentation for details. + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + # unet configs + sample_size: int | None = 96, + down_block_types: tuple[str] = ( + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "DownBlock2D", + ), + up_block_types: tuple[str] = ("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"), + block_out_channels: tuple[int] = (320, 640, 1280, 1280), + norm_num_groups: int | None = 32, + cross_attention_dim: int | tuple[int] = 1024, + transformer_layers_per_block: int | tuple[int] = 1, + num_attention_heads: int | tuple[int] = 8, + addition_embed_type: str | None = None, + addition_time_embed_dim: int | None = None, + upcast_attention: bool = True, + use_linear_projection: bool = True, + time_cond_proj_dim: int | None = None, + projection_class_embeddings_input_dim: int | None = None, + # additional controlnet configs + time_embedding_mix: float = 1.0, + ctrl_conditioning_channels: int = 3, + ctrl_conditioning_embedding_out_channels: tuple[int] = (16, 32, 96, 256), + ctrl_conditioning_channel_order: str = "rgb", + ctrl_learn_time_embedding: bool = False, + ctrl_block_out_channels: tuple[int] = (4, 8, 16, 16), + ctrl_num_attention_heads: int | tuple[int] = 4, + ctrl_max_norm_num_groups: int = 32, + ): + super().__init__() + + if time_embedding_mix < 0 or time_embedding_mix > 1: + raise ValueError("`time_embedding_mix` needs to be between 0 and 1.") + if time_embedding_mix < 1 and not ctrl_learn_time_embedding: + raise ValueError("To use `time_embedding_mix` < 1, `ctrl_learn_time_embedding` must be `True`") + + if addition_embed_type is not None and addition_embed_type != "text_time": + raise ValueError( + "As `UNetControlNetXSModel` currently only supports StableDiffusion and StableDiffusion-XL, `addition_embed_type` must be `None` or `'text_time'`." + ) + + if not isinstance(transformer_layers_per_block, (list, tuple)): + transformer_layers_per_block = [transformer_layers_per_block] * len(down_block_types) + if not isinstance(cross_attention_dim, (list, tuple)): + cross_attention_dim = [cross_attention_dim] * len(down_block_types) + if not isinstance(num_attention_heads, (list, tuple)): + num_attention_heads = [num_attention_heads] * len(down_block_types) + if not isinstance(ctrl_num_attention_heads, (list, tuple)): + ctrl_num_attention_heads = [ctrl_num_attention_heads] * len(down_block_types) + + base_num_attention_heads = num_attention_heads + + self.in_channels = 4 + + # # Input + self.base_conv_in = nn.Conv2d(4, block_out_channels[0], kernel_size=3, padding=1) + self.controlnet_cond_embedding = ControlNetConditioningEmbedding( + conditioning_embedding_channels=ctrl_block_out_channels[0], + block_out_channels=ctrl_conditioning_embedding_out_channels, + conditioning_channels=ctrl_conditioning_channels, + ) + self.ctrl_conv_in = nn.Conv2d(4, ctrl_block_out_channels[0], kernel_size=3, padding=1) + self.control_to_base_for_conv_in = make_zero_conv(ctrl_block_out_channels[0], block_out_channels[0]) + + # # Time + time_embed_input_dim = block_out_channels[0] + time_embed_dim = block_out_channels[0] * 4 + + self.base_time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos=True, downscale_freq_shift=0) + self.base_time_embedding = TimestepEmbedding( + time_embed_input_dim, + time_embed_dim, + cond_proj_dim=time_cond_proj_dim, + ) + if ctrl_learn_time_embedding: + self.ctrl_time_embedding = TimestepEmbedding( + in_channels=time_embed_input_dim, time_embed_dim=time_embed_dim + ) + else: + self.ctrl_time_embedding = None + + if addition_embed_type is None: + self.base_add_time_proj = None + self.base_add_embedding = None + else: + self.base_add_time_proj = Timesteps(addition_time_embed_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.base_add_embedding = TimestepEmbedding(projection_class_embeddings_input_dim, time_embed_dim) + + # # Create down blocks + down_blocks = [] + base_out_channels = block_out_channels[0] + ctrl_out_channels = ctrl_block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + base_in_channels = base_out_channels + base_out_channels = block_out_channels[i] + ctrl_in_channels = ctrl_out_channels + ctrl_out_channels = ctrl_block_out_channels[i] + has_crossattn = "CrossAttn" in down_block_type + is_final_block = i == len(down_block_types) - 1 + + down_blocks.append( + ControlNetXSCrossAttnDownBlock2D( + base_in_channels=base_in_channels, + base_out_channels=base_out_channels, + ctrl_in_channels=ctrl_in_channels, + ctrl_out_channels=ctrl_out_channels, + temb_channels=time_embed_dim, + norm_num_groups=norm_num_groups, + ctrl_max_norm_num_groups=ctrl_max_norm_num_groups, + has_crossattn=has_crossattn, + transformer_layers_per_block=transformer_layers_per_block[i], + base_num_attention_heads=base_num_attention_heads[i], + ctrl_num_attention_heads=ctrl_num_attention_heads[i], + cross_attention_dim=cross_attention_dim[i], + add_downsample=not is_final_block, + upcast_attention=upcast_attention, + use_linear_projection=use_linear_projection, + ) + ) + + # # Create mid block + self.mid_block = ControlNetXSCrossAttnMidBlock2D( + base_channels=block_out_channels[-1], + ctrl_channels=ctrl_block_out_channels[-1], + temb_channels=time_embed_dim, + norm_num_groups=norm_num_groups, + ctrl_max_norm_num_groups=ctrl_max_norm_num_groups, + transformer_layers_per_block=transformer_layers_per_block[-1], + base_num_attention_heads=base_num_attention_heads[-1], + ctrl_num_attention_heads=ctrl_num_attention_heads[-1], + cross_attention_dim=cross_attention_dim[-1], + upcast_attention=upcast_attention, + use_linear_projection=use_linear_projection, + ) + + # # Create up blocks + up_blocks = [] + rev_transformer_layers_per_block = list(reversed(transformer_layers_per_block)) + rev_num_attention_heads = list(reversed(base_num_attention_heads)) + rev_cross_attention_dim = list(reversed(cross_attention_dim)) + + # The skip connection channels are the output of the conv_in and of all the down subblocks + ctrl_skip_channels = [ctrl_block_out_channels[0]] + for i, out_channels in enumerate(ctrl_block_out_channels): + number_of_subblocks = ( + 3 if i < len(ctrl_block_out_channels) - 1 else 2 + ) # every block has 3 subblocks, except last one, which has 2 as it has no downsampler + ctrl_skip_channels.extend([out_channels] * number_of_subblocks) + + reversed_block_out_channels = list(reversed(block_out_channels)) + + out_channels = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = out_channels + out_channels = reversed_block_out_channels[i] + in_channels = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + ctrl_skip_channels_ = [ctrl_skip_channels.pop() for _ in range(3)] + + has_crossattn = "CrossAttn" in up_block_type + is_final_block = i == len(block_out_channels) - 1 + + up_blocks.append( + ControlNetXSCrossAttnUpBlock2D( + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + ctrl_skip_channels=ctrl_skip_channels_, + temb_channels=time_embed_dim, + resolution_idx=i, + has_crossattn=has_crossattn, + transformer_layers_per_block=rev_transformer_layers_per_block[i], + num_attention_heads=rev_num_attention_heads[i], + cross_attention_dim=rev_cross_attention_dim[i], + add_upsample=not is_final_block, + upcast_attention=upcast_attention, + norm_num_groups=norm_num_groups, + use_linear_projection=use_linear_projection, + ) + ) + + self.down_blocks = nn.ModuleList(down_blocks) + self.up_blocks = nn.ModuleList(up_blocks) + + self.base_conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=norm_num_groups) + self.base_conv_act = nn.SiLU() + self.base_conv_out = nn.Conv2d(block_out_channels[0], 4, kernel_size=3, padding=1) + + @classmethod + def from_unet( + cls, + unet: UNet2DConditionModel, + controlnet: ControlNetXSAdapter | None = None, + size_ratio: float | None = None, + ctrl_block_out_channels: list[float] | None = None, + time_embedding_mix: float | None = None, + ctrl_optional_kwargs: dict | None = None, + ): + r""" + Instantiate a [`UNetControlNetXSModel`] from a [`UNet2DConditionModel`] and an optional [`ControlNetXSAdapter`] + . + + Parameters: + unet (`UNet2DConditionModel`): + The UNet model we want to control. + controlnet (`ControlNetXSAdapter`): + The ControlNet-XS adapter with which the UNet will be fused. If none is given, a new ControlNet-XS + adapter will be created. + size_ratio (float, *optional*, defaults to `None`): + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. + ctrl_block_out_channels (`list[int]`, *optional*, defaults to `None`): + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details, + where this parameter is called `block_out_channels`. + time_embedding_mix (`float`, *optional*, defaults to None): + Used to construct the controlnet if none is given. See [`ControlNetXSAdapter.from_unet`] for details. + ctrl_optional_kwargs (`Dict`, *optional*, defaults to `None`): + Passed to the `init` of the new controlnet if no controlnet was given. + """ + if controlnet is None: + controlnet = ControlNetXSAdapter.from_unet( + unet, size_ratio, ctrl_block_out_channels, **ctrl_optional_kwargs + ) + else: + if any( + o is not None for o in (size_ratio, ctrl_block_out_channels, time_embedding_mix, ctrl_optional_kwargs) + ): + raise ValueError( + "When a controlnet is passed, none of these parameters should be passed: size_ratio, ctrl_block_out_channels, time_embedding_mix, ctrl_optional_kwargs." + ) + + # # get params + params_for_unet = [ + "sample_size", + "down_block_types", + "up_block_types", + "block_out_channels", + "norm_num_groups", + "cross_attention_dim", + "transformer_layers_per_block", + "addition_embed_type", + "addition_time_embed_dim", + "upcast_attention", + "use_linear_projection", + "time_cond_proj_dim", + "projection_class_embeddings_input_dim", + ] + params_for_unet = {k: v for k, v in unet.config.items() if k in params_for_unet} + # The naming seems a bit confusing and it is, see https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131 for why. + params_for_unet["num_attention_heads"] = unet.config.attention_head_dim + + params_for_controlnet = [ + "conditioning_channels", + "conditioning_embedding_out_channels", + "conditioning_channel_order", + "learn_time_embedding", + "block_out_channels", + "num_attention_heads", + "max_norm_num_groups", + ] + params_for_controlnet = {"ctrl_" + k: v for k, v in controlnet.config.items() if k in params_for_controlnet} + params_for_controlnet["time_embedding_mix"] = controlnet.config.time_embedding_mix + + # # create model + model = cls.from_config({**params_for_unet, **params_for_controlnet}) + + # # load weights + # from unet + modules_from_unet = [ + "time_embedding", + "conv_in", + "conv_norm_out", + "conv_out", + ] + for m in modules_from_unet: + getattr(model, "base_" + m).load_state_dict(getattr(unet, m).state_dict()) + + optional_modules_from_unet = [ + "add_time_proj", + "add_embedding", + ] + for m in optional_modules_from_unet: + if hasattr(unet, m) and getattr(unet, m) is not None: + getattr(model, "base_" + m).load_state_dict(getattr(unet, m).state_dict()) + + # from controlnet + model.controlnet_cond_embedding.load_state_dict(controlnet.controlnet_cond_embedding.state_dict()) + model.ctrl_conv_in.load_state_dict(controlnet.conv_in.state_dict()) + if controlnet.time_embedding is not None: + model.ctrl_time_embedding.load_state_dict(controlnet.time_embedding.state_dict()) + model.control_to_base_for_conv_in.load_state_dict(controlnet.control_to_base_for_conv_in.state_dict()) + + # from both + model.down_blocks = nn.ModuleList( + ControlNetXSCrossAttnDownBlock2D.from_modules(b, c) + for b, c in zip(unet.down_blocks, controlnet.down_blocks) + ) + model.mid_block = ControlNetXSCrossAttnMidBlock2D.from_modules(unet.mid_block, controlnet.mid_block) + model.up_blocks = nn.ModuleList( + ControlNetXSCrossAttnUpBlock2D.from_modules(b, c) + for b, c in zip(unet.up_blocks, controlnet.up_connections) + ) + + # ensure that the UNetControlNetXSModel is the same dtype as the UNet2DConditionModel + model.to(unet.dtype) + + return model + + def freeze_unet_params(self) -> None: + """Freeze the weights of the parts belonging to the base UNet2DConditionModel, and leave everything else unfrozen for fine + tuning.""" + # Freeze everything + for param in self.parameters(): + param.requires_grad = True + + # Unfreeze ControlNetXSAdapter + base_parts = [ + "base_time_proj", + "base_time_embedding", + "base_add_time_proj", + "base_add_embedding", + "base_conv_in", + "base_conv_norm_out", + "base_conv_act", + "base_conv_out", + ] + base_parts = [getattr(self, part) for part in base_parts if getattr(self, part) is not None] + for part in base_parts: + for param in part.parameters(): + param.requires_grad = False + + for d in self.down_blocks: + d.freeze_base_params() + self.mid_block.freeze_base_params() + for u in self.up_blocks: + u.freeze_base_params() + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_default_attn_processor + def set_default_attn_processor(self): + """ + Disables custom attention processors and sets the default attention implementation. + """ + if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnAddedKVProcessor() + elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnProcessor() + else: + raise ValueError( + f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}" + ) + + self.set_attn_processor(processor) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.enable_freeu + def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): + r"""Enables the FreeU mechanism from https://huggingface.co/papers/2309.11497. + + The suffixes after the scaling factors represent the stage blocks where they are being applied. + + Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of values that + are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. + + Args: + s1 (`float`): + Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to + mitigate the "oversmoothing effect" in the enhanced denoising process. + s2 (`float`): + Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to + mitigate the "oversmoothing effect" in the enhanced denoising process. + b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. + b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. + """ + for i, upsample_block in enumerate(self.up_blocks): + setattr(upsample_block, "s1", s1) + setattr(upsample_block, "s2", s2) + setattr(upsample_block, "b1", b1) + setattr(upsample_block, "b2", b2) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.disable_freeu + def disable_freeu(self): + """Disables the FreeU mechanism.""" + freeu_keys = {"s1", "s2", "b1", "b2"} + for i, upsample_block in enumerate(self.up_blocks): + for k in freeu_keys: + if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None: + setattr(upsample_block, k, None) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is 🧪 experimental. + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + self.set_attn_processor(FusedAttnProcessor2_0()) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + > [!WARNING] > This API is 🧪 experimental. + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + + def forward( + self, + sample: Tensor, + timestep: torch.Tensor | float | int, + encoder_hidden_states: torch.Tensor, + controlnet_cond: torch.Tensor | None = None, + conditioning_scale: float | None = 1.0, + class_labels: torch.Tensor | None = None, + timestep_cond: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + cross_attention_kwargs: dict[str, Any] | None = None, + added_cond_kwargs: dict[str, torch.Tensor] | None = None, + return_dict: bool = True, + apply_control: bool = True, + ) -> ControlNetXSOutput | tuple: + """ + The [`ControlNetXSModel`] forward method. + + Args: + sample (`Tensor`): + The noisy input tensor. + timestep (`torch.Tensor | float | int`): + The number of timesteps to denoise an input. + encoder_hidden_states (`torch.Tensor`): + The encoder hidden states. + controlnet_cond (`Tensor`): + The conditional input tensor of shape `(batch_size, sequence_length, hidden_size)`. + conditioning_scale (`float`, defaults to `1.0`): + How much the control model affects the base model outputs. + class_labels (`torch.Tensor`, *optional*, defaults to `None`): + Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings. + timestep_cond (`torch.Tensor`, *optional*, defaults to `None`): + Additional conditional embeddings for timestep. If provided, the embeddings will be summed with the + timestep_embedding passed through the `self.time_embedding` layer to obtain the final timestep + embeddings. + attention_mask (`torch.Tensor`, *optional*, defaults to `None`): + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask + is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large + negative values to the attention scores corresponding to "discard" tokens. + cross_attention_kwargs (`dict[str]`, *optional*, defaults to `None`): + A kwargs dictionary that if specified is passed along to the `AttnProcessor`. + added_cond_kwargs (`dict`): + Additional conditions for the Stable Diffusion XL UNet. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~models.controlnets.controlnet.ControlNetOutput`] instead of a plain + tuple. + apply_control (`bool`, defaults to `True`): + If `False`, the input is run only through the base model. + + Returns: + [`~models.controlnetxs.ControlNetXSOutput`] **or** `tuple`: + If `return_dict` is `True`, a [`~models.controlnetxs.ControlNetXSOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + + # check channel order + if self.config.ctrl_conditioning_channel_order == "bgr": + controlnet_cond = torch.flip(controlnet_cond, dims=[1]) + + # prepare attention_mask + if attention_mask is not None: + attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # 1. time + timesteps = timestep + if not torch.is_tensor(timesteps): + # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can + # This would be a good case for the `match` statement (Python 3.10+) + is_mps = sample.device.type == "mps" + is_npu = sample.device.type == "npu" + if isinstance(timestep, float): + dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + else: + dtype = torch.int32 if (is_mps or is_npu) else torch.int64 + timesteps = torch.tensor([timesteps], dtype=dtype, device=sample.device) + elif len(timesteps.shape) == 0: + timesteps = timesteps[None].to(sample.device) + + # broadcast to batch dimension in a way that's compatible with ONNX/Core ML + timesteps = timesteps.expand(sample.shape[0]) + + t_emb = self.base_time_proj(timesteps) + + # timesteps does not contain any weights and will always return f32 tensors + # but time_embedding might actually be running in fp16. so we need to cast here. + # there might be better ways to encapsulate this. + t_emb = t_emb.to(dtype=sample.dtype) + + if self.config.ctrl_learn_time_embedding and apply_control: + ctrl_temb = self.ctrl_time_embedding(t_emb, timestep_cond) + base_temb = self.base_time_embedding(t_emb, timestep_cond) + interpolation_param = self.config.time_embedding_mix**0.3 + + temb = ctrl_temb * interpolation_param + base_temb * (1 - interpolation_param) + else: + temb = self.base_time_embedding(t_emb) + + # added time & text embeddings + aug_emb = None + + if self.config.addition_embed_type is None: + pass + elif self.config.addition_embed_type == "text_time": + # SDXL - style + if "text_embeds" not in added_cond_kwargs: + raise ValueError( + f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`" + ) + text_embeds = added_cond_kwargs.get("text_embeds") + if "time_ids" not in added_cond_kwargs: + raise ValueError( + f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`" + ) + time_ids = added_cond_kwargs.get("time_ids") + time_embeds = self.base_add_time_proj(time_ids.flatten()) + time_embeds = time_embeds.reshape((text_embeds.shape[0], -1)) + add_embeds = torch.concat([text_embeds, time_embeds], dim=-1) + add_embeds = add_embeds.to(temb.dtype) + aug_emb = self.base_add_embedding(add_embeds) + else: + raise ValueError( + f"ControlNet-XS currently only supports StableDiffusion and StableDiffusion-XL, so addition_embed_type = {self.config.addition_embed_type} is currently not supported." + ) + + temb = temb + aug_emb if aug_emb is not None else temb + + # text embeddings + cemb = encoder_hidden_states + + # Preparation + h_ctrl = h_base = sample + hs_base, hs_ctrl = [], [] + + # Cross Control + guided_hint = self.controlnet_cond_embedding(controlnet_cond) + + # 1 - conv in & down + + h_base = self.base_conv_in(h_base) + h_ctrl = self.ctrl_conv_in(h_ctrl) + if guided_hint is not None: + h_ctrl += guided_hint + if apply_control: + h_base = h_base + self.control_to_base_for_conv_in(h_ctrl) * conditioning_scale # add ctrl -> base + + hs_base.append(h_base) + hs_ctrl.append(h_ctrl) + + for down in self.down_blocks: + h_base, h_ctrl, residual_hb, residual_hc = down( + hidden_states_base=h_base, + hidden_states_ctrl=h_ctrl, + temb=temb, + encoder_hidden_states=cemb, + conditioning_scale=conditioning_scale, + cross_attention_kwargs=cross_attention_kwargs, + attention_mask=attention_mask, + apply_control=apply_control, + ) + hs_base.extend(residual_hb) + hs_ctrl.extend(residual_hc) + + # 2 - mid + h_base, h_ctrl = self.mid_block( + hidden_states_base=h_base, + hidden_states_ctrl=h_ctrl, + temb=temb, + encoder_hidden_states=cemb, + conditioning_scale=conditioning_scale, + cross_attention_kwargs=cross_attention_kwargs, + attention_mask=attention_mask, + apply_control=apply_control, + ) + + # 3 - up + for up in self.up_blocks: + n_resnets = len(up.resnets) + skips_hb = hs_base[-n_resnets:] + skips_hc = hs_ctrl[-n_resnets:] + hs_base = hs_base[:-n_resnets] + hs_ctrl = hs_ctrl[:-n_resnets] + h_base = up( + hidden_states=h_base, + res_hidden_states_tuple_base=skips_hb, + res_hidden_states_tuple_ctrl=skips_hc, + temb=temb, + encoder_hidden_states=cemb, + conditioning_scale=conditioning_scale, + cross_attention_kwargs=cross_attention_kwargs, + attention_mask=attention_mask, + apply_control=apply_control, + ) + + # 4 - conv out + h_base = self.base_conv_norm_out(h_base) + h_base = self.base_conv_act(h_base) + h_base = self.base_conv_out(h_base) + + if not return_dict: + return (h_base,) + + return ControlNetXSOutput(sample=h_base) + + +class ControlNetXSCrossAttnDownBlock2D(nn.Module): + def __init__( + self, + base_in_channels: int, + base_out_channels: int, + ctrl_in_channels: int, + ctrl_out_channels: int, + temb_channels: int, + norm_num_groups: int = 32, + ctrl_max_norm_num_groups: int = 32, + has_crossattn=True, + transformer_layers_per_block: int | tuple[int] | None = 1, + base_num_attention_heads: int | None = 1, + ctrl_num_attention_heads: int | None = 1, + cross_attention_dim: int | None = 1024, + add_downsample: bool = True, + upcast_attention: bool | None = False, + use_linear_projection: bool | None = True, + ): + super().__init__() + base_resnets = [] + base_attentions = [] + ctrl_resnets = [] + ctrl_attentions = [] + ctrl_to_base = [] + base_to_ctrl = [] + + num_layers = 2 # only support sd + sdxl + + if isinstance(transformer_layers_per_block, int): + transformer_layers_per_block = [transformer_layers_per_block] * num_layers + + for i in range(num_layers): + base_in_channels = base_in_channels if i == 0 else base_out_channels + ctrl_in_channels = ctrl_in_channels if i == 0 else ctrl_out_channels + + # Before the resnet/attention application, information is concatted from base to control. + # Concat doesn't require change in number of channels + base_to_ctrl.append(make_zero_conv(base_in_channels, base_in_channels)) + + base_resnets.append( + ResnetBlock2D( + in_channels=base_in_channels, + out_channels=base_out_channels, + temb_channels=temb_channels, + groups=norm_num_groups, + ) + ) + ctrl_resnets.append( + ResnetBlock2D( + in_channels=ctrl_in_channels + base_in_channels, # information from base is concatted to ctrl + out_channels=ctrl_out_channels, + temb_channels=temb_channels, + groups=find_largest_factor( + ctrl_in_channels + base_in_channels, max_factor=ctrl_max_norm_num_groups + ), + groups_out=find_largest_factor(ctrl_out_channels, max_factor=ctrl_max_norm_num_groups), + eps=1e-5, + ) + ) + + if has_crossattn: + base_attentions.append( + Transformer2DModel( + base_num_attention_heads, + base_out_channels // base_num_attention_heads, + in_channels=base_out_channels, + num_layers=transformer_layers_per_block[i], + cross_attention_dim=cross_attention_dim, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + norm_num_groups=norm_num_groups, + ) + ) + ctrl_attentions.append( + Transformer2DModel( + ctrl_num_attention_heads, + ctrl_out_channels // ctrl_num_attention_heads, + in_channels=ctrl_out_channels, + num_layers=transformer_layers_per_block[i], + cross_attention_dim=cross_attention_dim, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + norm_num_groups=find_largest_factor(ctrl_out_channels, max_factor=ctrl_max_norm_num_groups), + ) + ) + + # After the resnet/attention application, information is added from control to base + # Addition requires change in number of channels + ctrl_to_base.append(make_zero_conv(ctrl_out_channels, base_out_channels)) + + if add_downsample: + # Before the downsampler application, information is concatted from base to control + # Concat doesn't require change in number of channels + base_to_ctrl.append(make_zero_conv(base_out_channels, base_out_channels)) + + self.base_downsamplers = Downsample2D( + base_out_channels, use_conv=True, out_channels=base_out_channels, name="op" + ) + self.ctrl_downsamplers = Downsample2D( + ctrl_out_channels + base_out_channels, use_conv=True, out_channels=ctrl_out_channels, name="op" + ) + + # After the downsampler application, information is added from control to base + # Addition requires change in number of channels + ctrl_to_base.append(make_zero_conv(ctrl_out_channels, base_out_channels)) + else: + self.base_downsamplers = None + self.ctrl_downsamplers = None + + self.base_resnets = nn.ModuleList(base_resnets) + self.ctrl_resnets = nn.ModuleList(ctrl_resnets) + self.base_attentions = nn.ModuleList(base_attentions) if has_crossattn else [None] * num_layers + self.ctrl_attentions = nn.ModuleList(ctrl_attentions) if has_crossattn else [None] * num_layers + self.base_to_ctrl = nn.ModuleList(base_to_ctrl) + self.ctrl_to_base = nn.ModuleList(ctrl_to_base) + + self.gradient_checkpointing = False + + @classmethod + def from_modules(cls, base_downblock: CrossAttnDownBlock2D, ctrl_downblock: DownBlockControlNetXSAdapter): + # get params + def get_first_cross_attention(block): + return block.attentions[0].transformer_blocks[0].attn2 + + base_in_channels = base_downblock.resnets[0].in_channels + base_out_channels = base_downblock.resnets[0].out_channels + ctrl_in_channels = ( + ctrl_downblock.resnets[0].in_channels - base_in_channels + ) # base channels are concatted to ctrl channels in init + ctrl_out_channels = ctrl_downblock.resnets[0].out_channels + temb_channels = base_downblock.resnets[0].time_emb_proj.in_features + num_groups = base_downblock.resnets[0].norm1.num_groups + ctrl_num_groups = ctrl_downblock.resnets[0].norm1.num_groups + if hasattr(base_downblock, "attentions"): + has_crossattn = True + transformer_layers_per_block = len(base_downblock.attentions[0].transformer_blocks) + base_num_attention_heads = get_first_cross_attention(base_downblock).heads + ctrl_num_attention_heads = get_first_cross_attention(ctrl_downblock).heads + cross_attention_dim = get_first_cross_attention(base_downblock).cross_attention_dim + upcast_attention = get_first_cross_attention(base_downblock).upcast_attention + use_linear_projection = base_downblock.attentions[0].use_linear_projection + else: + has_crossattn = False + transformer_layers_per_block = None + base_num_attention_heads = None + ctrl_num_attention_heads = None + cross_attention_dim = None + upcast_attention = None + use_linear_projection = None + add_downsample = base_downblock.downsamplers is not None + + # create model + model = cls( + base_in_channels=base_in_channels, + base_out_channels=base_out_channels, + ctrl_in_channels=ctrl_in_channels, + ctrl_out_channels=ctrl_out_channels, + temb_channels=temb_channels, + norm_num_groups=num_groups, + ctrl_max_norm_num_groups=ctrl_num_groups, + has_crossattn=has_crossattn, + transformer_layers_per_block=transformer_layers_per_block, + base_num_attention_heads=base_num_attention_heads, + ctrl_num_attention_heads=ctrl_num_attention_heads, + cross_attention_dim=cross_attention_dim, + add_downsample=add_downsample, + upcast_attention=upcast_attention, + use_linear_projection=use_linear_projection, + ) + + # # load weights + model.base_resnets.load_state_dict(base_downblock.resnets.state_dict()) + model.ctrl_resnets.load_state_dict(ctrl_downblock.resnets.state_dict()) + if has_crossattn: + model.base_attentions.load_state_dict(base_downblock.attentions.state_dict()) + model.ctrl_attentions.load_state_dict(ctrl_downblock.attentions.state_dict()) + if add_downsample: + model.base_downsamplers.load_state_dict(base_downblock.downsamplers[0].state_dict()) + model.ctrl_downsamplers.load_state_dict(ctrl_downblock.downsamplers.state_dict()) + model.base_to_ctrl.load_state_dict(ctrl_downblock.base_to_ctrl.state_dict()) + model.ctrl_to_base.load_state_dict(ctrl_downblock.ctrl_to_base.state_dict()) + + return model + + def freeze_base_params(self) -> None: + """Freeze the weights of the parts belonging to the base UNet2DConditionModel, and leave everything else unfrozen for fine + tuning.""" + # Unfreeze everything + for param in self.parameters(): + param.requires_grad = True + + # Freeze base part + base_parts = [self.base_resnets] + if isinstance(self.base_attentions, nn.ModuleList): # attentions can be a list of Nones + base_parts.append(self.base_attentions) + if self.base_downsamplers is not None: + base_parts.append(self.base_downsamplers) + for part in base_parts: + for param in part.parameters(): + param.requires_grad = False + + def forward( + self, + hidden_states_base: Tensor, + temb: Tensor, + encoder_hidden_states: Tensor | None = None, + hidden_states_ctrl: Tensor | None = None, + conditioning_scale: float | None = 1.0, + attention_mask: Tensor | None = None, + cross_attention_kwargs: dict[str, Any] | None = None, + encoder_attention_mask: Tensor | None = None, + apply_control: bool = True, + ) -> tuple[Tensor, Tensor, tuple[Tensor, ...], tuple[Tensor, ...]]: + if cross_attention_kwargs is not None: + if cross_attention_kwargs.get("scale", None) is not None: + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") + + h_base = hidden_states_base + h_ctrl = hidden_states_ctrl + + base_output_states = () + ctrl_output_states = () + + base_blocks = list(zip(self.base_resnets, self.base_attentions)) + ctrl_blocks = list(zip(self.ctrl_resnets, self.ctrl_attentions)) + + for (b_res, b_attn), (c_res, c_attn), b2c, c2b in zip( + base_blocks, ctrl_blocks, self.base_to_ctrl, self.ctrl_to_base + ): + # concat base -> ctrl + if apply_control: + h_ctrl = torch.cat([h_ctrl, b2c(h_base)], dim=1) + + # apply base subblock + if torch.is_grad_enabled() and self.gradient_checkpointing: + h_base = self._gradient_checkpointing_func(b_res, h_base, temb) + else: + h_base = b_res(h_base, temb) + + if b_attn is not None: + h_base = b_attn( + h_base, + encoder_hidden_states=encoder_hidden_states, + cross_attention_kwargs=cross_attention_kwargs, + attention_mask=attention_mask, + encoder_attention_mask=encoder_attention_mask, + return_dict=False, + )[0] + + # apply ctrl subblock + if apply_control: + if torch.is_grad_enabled() and self.gradient_checkpointing: + h_ctrl = self._gradient_checkpointing_func(c_res, h_ctrl, temb) + else: + h_ctrl = c_res(h_ctrl, temb) + if c_attn is not None: + h_ctrl = c_attn( + h_ctrl, + encoder_hidden_states=encoder_hidden_states, + cross_attention_kwargs=cross_attention_kwargs, + attention_mask=attention_mask, + encoder_attention_mask=encoder_attention_mask, + return_dict=False, + )[0] + + # add ctrl -> base + if apply_control: + h_base = h_base + c2b(h_ctrl) * conditioning_scale + + base_output_states = base_output_states + (h_base,) + ctrl_output_states = ctrl_output_states + (h_ctrl,) + + if self.base_downsamplers is not None: # if we have a base_downsampler, then also a ctrl_downsampler + b2c = self.base_to_ctrl[-1] + c2b = self.ctrl_to_base[-1] + + # concat base -> ctrl + if apply_control: + h_ctrl = torch.cat([h_ctrl, b2c(h_base)], dim=1) + # apply base subblock + h_base = self.base_downsamplers(h_base) + # apply ctrl subblock + if apply_control: + h_ctrl = self.ctrl_downsamplers(h_ctrl) + # add ctrl -> base + if apply_control: + h_base = h_base + c2b(h_ctrl) * conditioning_scale + + base_output_states = base_output_states + (h_base,) + ctrl_output_states = ctrl_output_states + (h_ctrl,) + + return h_base, h_ctrl, base_output_states, ctrl_output_states + + +class ControlNetXSCrossAttnMidBlock2D(nn.Module): + def __init__( + self, + base_channels: int, + ctrl_channels: int, + temb_channels: int | None = None, + norm_num_groups: int = 32, + ctrl_max_norm_num_groups: int = 32, + transformer_layers_per_block: int = 1, + base_num_attention_heads: int | None = 1, + ctrl_num_attention_heads: int | None = 1, + cross_attention_dim: int | None = 1024, + upcast_attention: bool = False, + use_linear_projection: bool | None = True, + ): + super().__init__() + + # Before the midblock application, information is concatted from base to control. + # Concat doesn't require change in number of channels + self.base_to_ctrl = make_zero_conv(base_channels, base_channels) + + self.base_midblock = UNetMidBlock2DCrossAttn( + transformer_layers_per_block=transformer_layers_per_block, + in_channels=base_channels, + temb_channels=temb_channels, + resnet_groups=norm_num_groups, + cross_attention_dim=cross_attention_dim, + num_attention_heads=base_num_attention_heads, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + ) + + self.ctrl_midblock = UNetMidBlock2DCrossAttn( + transformer_layers_per_block=transformer_layers_per_block, + in_channels=ctrl_channels + base_channels, + out_channels=ctrl_channels, + temb_channels=temb_channels, + # number or norm groups must divide both in_channels and out_channels + resnet_groups=find_largest_factor( + gcd(ctrl_channels, ctrl_channels + base_channels), ctrl_max_norm_num_groups + ), + cross_attention_dim=cross_attention_dim, + num_attention_heads=ctrl_num_attention_heads, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + ) + + # After the midblock application, information is added from control to base + # Addition requires change in number of channels + self.ctrl_to_base = make_zero_conv(ctrl_channels, base_channels) + + self.gradient_checkpointing = False + + @classmethod + def from_modules( + cls, + base_midblock: UNetMidBlock2DCrossAttn, + ctrl_midblock: MidBlockControlNetXSAdapter, + ): + base_to_ctrl = ctrl_midblock.base_to_ctrl + ctrl_to_base = ctrl_midblock.ctrl_to_base + ctrl_midblock = ctrl_midblock.midblock + + # get params + def get_first_cross_attention(midblock): + return midblock.attentions[0].transformer_blocks[0].attn2 + + base_channels = ctrl_to_base.out_channels + ctrl_channels = ctrl_to_base.in_channels + transformer_layers_per_block = len(base_midblock.attentions[0].transformer_blocks) + temb_channels = base_midblock.resnets[0].time_emb_proj.in_features + num_groups = base_midblock.resnets[0].norm1.num_groups + ctrl_num_groups = ctrl_midblock.resnets[0].norm1.num_groups + base_num_attention_heads = get_first_cross_attention(base_midblock).heads + ctrl_num_attention_heads = get_first_cross_attention(ctrl_midblock).heads + cross_attention_dim = get_first_cross_attention(base_midblock).cross_attention_dim + upcast_attention = get_first_cross_attention(base_midblock).upcast_attention + use_linear_projection = base_midblock.attentions[0].use_linear_projection + + # create model + model = cls( + base_channels=base_channels, + ctrl_channels=ctrl_channels, + temb_channels=temb_channels, + norm_num_groups=num_groups, + ctrl_max_norm_num_groups=ctrl_num_groups, + transformer_layers_per_block=transformer_layers_per_block, + base_num_attention_heads=base_num_attention_heads, + ctrl_num_attention_heads=ctrl_num_attention_heads, + cross_attention_dim=cross_attention_dim, + upcast_attention=upcast_attention, + use_linear_projection=use_linear_projection, + ) + + # load weights + model.base_to_ctrl.load_state_dict(base_to_ctrl.state_dict()) + model.base_midblock.load_state_dict(base_midblock.state_dict()) + model.ctrl_midblock.load_state_dict(ctrl_midblock.state_dict()) + model.ctrl_to_base.load_state_dict(ctrl_to_base.state_dict()) + + return model + + def freeze_base_params(self) -> None: + """Freeze the weights of the parts belonging to the base UNet2DConditionModel, and leave everything else unfrozen for fine + tuning.""" + # Unfreeze everything + for param in self.parameters(): + param.requires_grad = True + + # Freeze base part + for param in self.base_midblock.parameters(): + param.requires_grad = False + + def forward( + self, + hidden_states_base: Tensor, + temb: Tensor, + encoder_hidden_states: Tensor, + hidden_states_ctrl: Tensor | None = None, + conditioning_scale: float | None = 1.0, + cross_attention_kwargs: dict[str, Any] | None = None, + attention_mask: Tensor | None = None, + encoder_attention_mask: Tensor | None = None, + apply_control: bool = True, + ) -> tuple[Tensor, Tensor]: + if cross_attention_kwargs is not None: + if cross_attention_kwargs.get("scale", None) is not None: + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") + + h_base = hidden_states_base + h_ctrl = hidden_states_ctrl + + joint_args = { + "temb": temb, + "encoder_hidden_states": encoder_hidden_states, + "attention_mask": attention_mask, + "cross_attention_kwargs": cross_attention_kwargs, + "encoder_attention_mask": encoder_attention_mask, + } + + if apply_control: + h_ctrl = torch.cat([h_ctrl, self.base_to_ctrl(h_base)], dim=1) # concat base -> ctrl + h_base = self.base_midblock(h_base, **joint_args) # apply base mid block + if apply_control: + h_ctrl = self.ctrl_midblock(h_ctrl, **joint_args) # apply ctrl mid block + h_base = h_base + self.ctrl_to_base(h_ctrl) * conditioning_scale # add ctrl -> base + + return h_base, h_ctrl + + +class ControlNetXSCrossAttnUpBlock2D(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + prev_output_channel: int, + ctrl_skip_channels: list[int], + temb_channels: int, + norm_num_groups: int = 32, + resolution_idx: int | None = None, + has_crossattn=True, + transformer_layers_per_block: int = 1, + num_attention_heads: int = 1, + cross_attention_dim: int = 1024, + add_upsample: bool = True, + upcast_attention: bool = False, + use_linear_projection: bool | None = True, + ): + super().__init__() + resnets = [] + attentions = [] + ctrl_to_base = [] + + num_layers = 3 # only support sd + sdxl + + self.has_cross_attention = has_crossattn + self.num_attention_heads = num_attention_heads + + if isinstance(transformer_layers_per_block, int): + transformer_layers_per_block = [transformer_layers_per_block] * num_layers + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + ctrl_to_base.append(make_zero_conv(ctrl_skip_channels[i], resnet_in_channels)) + + resnets.append( + ResnetBlock2D( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + groups=norm_num_groups, + ) + ) + + if has_crossattn: + attentions.append( + Transformer2DModel( + num_attention_heads, + out_channels // num_attention_heads, + in_channels=out_channels, + num_layers=transformer_layers_per_block[i], + cross_attention_dim=cross_attention_dim, + use_linear_projection=use_linear_projection, + upcast_attention=upcast_attention, + norm_num_groups=norm_num_groups, + ) + ) + + self.resnets = nn.ModuleList(resnets) + self.attentions = nn.ModuleList(attentions) if has_crossattn else [None] * num_layers + self.ctrl_to_base = nn.ModuleList(ctrl_to_base) + + if add_upsample: + self.upsamplers = Upsample2D(out_channels, use_conv=True, out_channels=out_channels) + else: + self.upsamplers = None + + self.gradient_checkpointing = False + self.resolution_idx = resolution_idx + + @classmethod + def from_modules(cls, base_upblock: CrossAttnUpBlock2D, ctrl_upblock: UpBlockControlNetXSAdapter): + ctrl_to_base_skip_connections = ctrl_upblock.ctrl_to_base + + # get params + def get_first_cross_attention(block): + return block.attentions[0].transformer_blocks[0].attn2 + + out_channels = base_upblock.resnets[0].out_channels + in_channels = base_upblock.resnets[-1].in_channels - out_channels + prev_output_channels = base_upblock.resnets[0].in_channels - out_channels + ctrl_skip_channelss = [c.in_channels for c in ctrl_to_base_skip_connections] + temb_channels = base_upblock.resnets[0].time_emb_proj.in_features + num_groups = base_upblock.resnets[0].norm1.num_groups + resolution_idx = base_upblock.resolution_idx + if hasattr(base_upblock, "attentions"): + has_crossattn = True + transformer_layers_per_block = len(base_upblock.attentions[0].transformer_blocks) + num_attention_heads = get_first_cross_attention(base_upblock).heads + cross_attention_dim = get_first_cross_attention(base_upblock).cross_attention_dim + upcast_attention = get_first_cross_attention(base_upblock).upcast_attention + use_linear_projection = base_upblock.attentions[0].use_linear_projection + else: + has_crossattn = False + transformer_layers_per_block = None + num_attention_heads = None + cross_attention_dim = None + upcast_attention = None + use_linear_projection = None + add_upsample = base_upblock.upsamplers is not None + + # create model + model = cls( + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channels, + ctrl_skip_channels=ctrl_skip_channelss, + temb_channels=temb_channels, + norm_num_groups=num_groups, + resolution_idx=resolution_idx, + has_crossattn=has_crossattn, + transformer_layers_per_block=transformer_layers_per_block, + num_attention_heads=num_attention_heads, + cross_attention_dim=cross_attention_dim, + add_upsample=add_upsample, + upcast_attention=upcast_attention, + use_linear_projection=use_linear_projection, + ) + + # load weights + model.resnets.load_state_dict(base_upblock.resnets.state_dict()) + if has_crossattn: + model.attentions.load_state_dict(base_upblock.attentions.state_dict()) + if add_upsample: + model.upsamplers.load_state_dict(base_upblock.upsamplers[0].state_dict()) + model.ctrl_to_base.load_state_dict(ctrl_to_base_skip_connections.state_dict()) + + return model + + def freeze_base_params(self) -> None: + """Freeze the weights of the parts belonging to the base UNet2DConditionModel, and leave everything else unfrozen for fine + tuning.""" + # Unfreeze everything + for param in self.parameters(): + param.requires_grad = True + + # Freeze base part + base_parts = [self.resnets] + if isinstance(self.attentions, nn.ModuleList): # attentions can be a list of Nones + base_parts.append(self.attentions) + if self.upsamplers is not None: + base_parts.append(self.upsamplers) + for part in base_parts: + for param in part.parameters(): + param.requires_grad = False + + def forward( + self, + hidden_states: Tensor, + res_hidden_states_tuple_base: tuple[Tensor, ...], + res_hidden_states_tuple_ctrl: tuple[Tensor, ...], + temb: Tensor, + encoder_hidden_states: Tensor | None = None, + conditioning_scale: float | None = 1.0, + cross_attention_kwargs: dict[str, Any] | None = None, + attention_mask: Tensor | None = None, + upsample_size: int | None = None, + encoder_attention_mask: Tensor | None = None, + apply_control: bool = True, + ) -> Tensor: + if cross_attention_kwargs is not None: + if cross_attention_kwargs.get("scale", None) is not None: + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") + + is_freeu_enabled = ( + getattr(self, "s1", None) + and getattr(self, "s2", None) + and getattr(self, "b1", None) + and getattr(self, "b2", None) + ) + + def maybe_apply_freeu_to_subblock(hidden_states, res_h_base): + # FreeU: Only operate on the first two stages + if is_freeu_enabled: + return apply_freeu( + self.resolution_idx, + hidden_states, + res_h_base, + s1=self.s1, + s2=self.s2, + b1=self.b1, + b2=self.b2, + ) + else: + return hidden_states, res_h_base + + for resnet, attn, c2b, res_h_base, res_h_ctrl in zip( + self.resnets, + self.attentions, + self.ctrl_to_base, + reversed(res_hidden_states_tuple_base), + reversed(res_hidden_states_tuple_ctrl), + ): + if apply_control: + hidden_states += c2b(res_h_ctrl) * conditioning_scale + + hidden_states, res_h_base = maybe_apply_freeu_to_subblock(hidden_states, res_h_base) + hidden_states = torch.cat([hidden_states, res_h_base], dim=1) + + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func(resnet, hidden_states, temb) + else: + hidden_states = resnet(hidden_states, temb) + + if attn is not None: + hidden_states = attn( + hidden_states, + encoder_hidden_states=encoder_hidden_states, + cross_attention_kwargs=cross_attention_kwargs, + attention_mask=attention_mask, + encoder_attention_mask=encoder_attention_mask, + return_dict=False, + )[0] + + if self.upsamplers is not None: + hidden_states = self.upsamplers(hidden_states, upsample_size) + + return hidden_states + + +def make_zero_conv(in_channels, out_channels=None): + return zero_module(nn.Conv2d(in_channels, out_channels, 1, padding=0)) + + +def zero_module(module): + for p in module.parameters(): + nn.init.zeros_(p) + return module + + +def find_largest_factor(number, max_factor): + factor = max_factor + if factor >= number: + return number + while factor != 0: + residual = number % factor + if residual == 0: + return factor + factor -= 1 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_z_image.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_z_image.py new file mode 100644 index 0000000000000000000000000000000000000000..85fa0d365547bf0162808d7312ed346eb815d918 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/controlnet_z_image.py @@ -0,0 +1,845 @@ +# Copyright 2025 Alibaba Z-Image Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Literal + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.utils.rnn import pad_sequence + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...loaders.single_file_model import FromOriginalModelMixin +from ...models.attention_processor import Attention +from ...models.normalization import RMSNorm +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention_dispatch import dispatch_attention_fn +from ..controlnets.controlnet import zero_module +from ..modeling_utils import ModelMixin + + +ADALN_EMBED_DIM = 256 +SEQ_MULTI_OF = 32 + + +# Copied from diffusers.models.transformers.transformer_z_image.TimestepEmbedder +class TimestepEmbedder(nn.Module): + def __init__(self, out_size, mid_size=None, frequency_embedding_size=256): + super().__init__() + if mid_size is None: + mid_size = out_size + self.mlp = nn.Sequential( + nn.Linear(frequency_embedding_size, mid_size, bias=True), + nn.SiLU(), + nn.Linear(mid_size, out_size, bias=True), + ) + + self.frequency_embedding_size = frequency_embedding_size + + @staticmethod + def timestep_embedding(t, dim, max_period=10000): + with torch.amp.autocast("cuda", enabled=False): + half = dim // 2 + freqs = torch.exp( + -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=t.device) / half + ) + args = t[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + return embedding + + def forward(self, t): + t_freq = self.timestep_embedding(t, self.frequency_embedding_size) + weight_dtype = self.mlp[0].weight.dtype + compute_dtype = getattr(self.mlp[0], "compute_dtype", None) + if weight_dtype.is_floating_point: + t_freq = t_freq.to(weight_dtype) + elif compute_dtype is not None: + t_freq = t_freq.to(compute_dtype) + t_emb = self.mlp(t_freq) + return t_emb + + +# Copied from diffusers.models.transformers.transformer_z_image.ZSingleStreamAttnProcessor +class ZSingleStreamAttnProcessor: + """ + Processor for Z-Image single stream attention that adapts the existing Attention class to match the behavior of the + original Z-ImageAttention module. + """ + + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "ZSingleStreamAttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to version 2.0 or higher." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + freqs_cis: torch.Tensor | None = None, + ) -> torch.Tensor: + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + # Apply Norms + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE + def apply_rotary_emb(x_in: torch.Tensor, freqs_cis: torch.Tensor) -> torch.Tensor: + with torch.amp.autocast("cuda", enabled=False): + x = torch.view_as_complex(x_in.float().reshape(*x_in.shape[:-1], -1, 2)) + freqs_cis = freqs_cis.unsqueeze(2) + x_out = torch.view_as_real(x * freqs_cis).flatten(3) + return x_out.type_as(x_in) # todo + + if freqs_cis is not None: + query = apply_rotary_emb(query, freqs_cis) + key = apply_rotary_emb(key, freqs_cis) + + # Cast to correct dtype + dtype = query.dtype + query, key = query.to(dtype), key.to(dtype) + + # From [batch, seq_len] to [batch, 1, 1, seq_len] -> broadcast to [batch, heads, seq_len, seq_len] + if attention_mask is not None and attention_mask.ndim == 2: + attention_mask = attention_mask[:, None, None, :] + + # Compute joint attention + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + + # Reshape back + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(dtype) + + output = attn.to_out[0](hidden_states) + if len(attn.to_out) > 1: # dropout + output = attn.to_out[1](output) + + return output + + +# Copied from diffusers.models.transformers.transformer_z_image.FeedForward +class FeedForward(nn.Module): + def __init__(self, dim: int, hidden_dim: int): + super().__init__() + self.w1 = nn.Linear(dim, hidden_dim, bias=False) + self.w2 = nn.Linear(hidden_dim, dim, bias=False) + self.w3 = nn.Linear(dim, hidden_dim, bias=False) + + def _forward_silu_gating(self, x1, x3): + return F.silu(x1) * x3 + + def forward(self, x): + return self.w2(self._forward_silu_gating(self.w1(x), self.w3(x))) + + +# Copied from diffusers.models.transformers.transformer_z_image.select_per_token +def select_per_token( + value_noisy: torch.Tensor, + value_clean: torch.Tensor, + noise_mask: torch.Tensor, + seq_len: int, +) -> torch.Tensor: + noise_mask_expanded = noise_mask.unsqueeze(-1) # (batch, seq_len, 1) + return torch.where( + noise_mask_expanded == 1, + value_noisy.unsqueeze(1).expand(-1, seq_len, -1), + value_clean.unsqueeze(1).expand(-1, seq_len, -1), + ) + + +@maybe_allow_in_graph +# Copied from diffusers.models.transformers.transformer_z_image.ZImageTransformerBlock +class ZImageTransformerBlock(nn.Module): + def __init__( + self, + layer_id: int, + dim: int, + n_heads: int, + n_kv_heads: int, + norm_eps: float, + qk_norm: bool, + modulation=True, + ): + super().__init__() + self.dim = dim + self.head_dim = dim // n_heads + + # Refactored to use diffusers Attention with custom processor + # Original Z-Image params: dim, n_heads, n_kv_heads, qk_norm + self.attention = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=dim // n_heads, + heads=n_heads, + qk_norm="rms_norm" if qk_norm else None, + eps=1e-5, + bias=False, + out_bias=False, + processor=ZSingleStreamAttnProcessor(), + ) + + self.feed_forward = FeedForward(dim=dim, hidden_dim=int(dim / 3 * 8)) + self.layer_id = layer_id + + self.attention_norm1 = RMSNorm(dim, eps=norm_eps) + self.ffn_norm1 = RMSNorm(dim, eps=norm_eps) + + self.attention_norm2 = RMSNorm(dim, eps=norm_eps) + self.ffn_norm2 = RMSNorm(dim, eps=norm_eps) + + self.modulation = modulation + if modulation: + self.adaLN_modulation = nn.Sequential(nn.Linear(min(dim, ADALN_EMBED_DIM), 4 * dim, bias=True)) + + def forward( + self, + x: torch.Tensor, + attn_mask: torch.Tensor, + freqs_cis: torch.Tensor, + adaln_input: torch.Tensor | None = None, + noise_mask: torch.Tensor | None = None, + adaln_noisy: torch.Tensor | None = None, + adaln_clean: torch.Tensor | None = None, + ): + if self.modulation: + seq_len = x.shape[1] + + if noise_mask is not None: + # Per-token modulation: different modulation for noisy/clean tokens + mod_noisy = self.adaLN_modulation(adaln_noisy) + mod_clean = self.adaLN_modulation(adaln_clean) + + scale_msa_noisy, gate_msa_noisy, scale_mlp_noisy, gate_mlp_noisy = mod_noisy.chunk(4, dim=1) + scale_msa_clean, gate_msa_clean, scale_mlp_clean, gate_mlp_clean = mod_clean.chunk(4, dim=1) + + gate_msa_noisy, gate_mlp_noisy = gate_msa_noisy.tanh(), gate_mlp_noisy.tanh() + gate_msa_clean, gate_mlp_clean = gate_msa_clean.tanh(), gate_mlp_clean.tanh() + + scale_msa_noisy, scale_mlp_noisy = 1.0 + scale_msa_noisy, 1.0 + scale_mlp_noisy + scale_msa_clean, scale_mlp_clean = 1.0 + scale_msa_clean, 1.0 + scale_mlp_clean + + scale_msa = select_per_token(scale_msa_noisy, scale_msa_clean, noise_mask, seq_len) + scale_mlp = select_per_token(scale_mlp_noisy, scale_mlp_clean, noise_mask, seq_len) + gate_msa = select_per_token(gate_msa_noisy, gate_msa_clean, noise_mask, seq_len) + gate_mlp = select_per_token(gate_mlp_noisy, gate_mlp_clean, noise_mask, seq_len) + else: + # Global modulation: same modulation for all tokens (avoid double select) + mod = self.adaLN_modulation(adaln_input) + scale_msa, gate_msa, scale_mlp, gate_mlp = mod.unsqueeze(1).chunk(4, dim=2) + gate_msa, gate_mlp = gate_msa.tanh(), gate_mlp.tanh() + scale_msa, scale_mlp = 1.0 + scale_msa, 1.0 + scale_mlp + + # Attention block + attn_out = self.attention( + self.attention_norm1(x) * scale_msa, attention_mask=attn_mask, freqs_cis=freqs_cis + ) + x = x + gate_msa * self.attention_norm2(attn_out) + + # FFN block + x = x + gate_mlp * self.ffn_norm2(self.feed_forward(self.ffn_norm1(x) * scale_mlp)) + else: + # Attention block + attn_out = self.attention(self.attention_norm1(x), attention_mask=attn_mask, freqs_cis=freqs_cis) + x = x + self.attention_norm2(attn_out) + + # FFN block + x = x + self.ffn_norm2(self.feed_forward(self.ffn_norm1(x))) + + return x + + +# Copied from diffusers.models.transformers.transformer_z_image.RopeEmbedder +class RopeEmbedder: + def __init__( + self, + theta: float = 256.0, + axes_dims: list[int] = (16, 56, 56), + axes_lens: list[int] = (64, 128, 128), + ): + self.theta = theta + self.axes_dims = axes_dims + self.axes_lens = axes_lens + assert len(axes_dims) == len(axes_lens), "axes_dims and axes_lens must have the same length" + self.freqs_cis = None + + @staticmethod + def precompute_freqs_cis(dim: list[int], end: list[int], theta: float = 256.0): + with torch.device("cpu"): + freqs_cis = [] + for i, (d, e) in enumerate(zip(dim, end)): + freqs = 1.0 / (theta ** (torch.arange(0, d, 2, dtype=torch.float64, device="cpu") / d)) + timestep = torch.arange(e, device=freqs.device, dtype=torch.float64) + freqs = torch.outer(timestep, freqs).float() + freqs_cis_i = torch.polar(torch.ones_like(freqs), freqs).to(torch.complex64) # complex64 + freqs_cis.append(freqs_cis_i) + + return freqs_cis + + def __call__(self, ids: torch.Tensor): + assert ids.ndim == 2 + assert ids.shape[-1] == len(self.axes_dims) + device = ids.device + + if self.freqs_cis is None: + self.freqs_cis = self.precompute_freqs_cis(self.axes_dims, self.axes_lens, theta=self.theta) + self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis] + else: + # Ensure freqs_cis are on the same device as ids + if self.freqs_cis[0].device != device: + self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis] + + result = [] + for i in range(len(self.axes_dims)): + index = ids[:, i] + result.append(self.freqs_cis[i][index]) + return torch.cat(result, dim=-1) + + +@maybe_allow_in_graph +class ZImageControlTransformerBlock(nn.Module): + def __init__( + self, + layer_id: int, + dim: int, + n_heads: int, + n_kv_heads: int, + norm_eps: float, + qk_norm: bool, + modulation=True, + block_id=0, + ): + super().__init__() + self.dim = dim + self.head_dim = dim // n_heads + + # Refactored to use diffusers Attention with custom processor + # Original Z-Image params: dim, n_heads, n_kv_heads, qk_norm + self.attention = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=dim // n_heads, + heads=n_heads, + qk_norm="rms_norm" if qk_norm else None, + eps=1e-5, + bias=False, + out_bias=False, + processor=ZSingleStreamAttnProcessor(), + ) + + self.feed_forward = FeedForward(dim=dim, hidden_dim=int(dim / 3 * 8)) + self.layer_id = layer_id + + self.attention_norm1 = RMSNorm(dim, eps=norm_eps) + self.ffn_norm1 = RMSNorm(dim, eps=norm_eps) + + self.attention_norm2 = RMSNorm(dim, eps=norm_eps) + self.ffn_norm2 = RMSNorm(dim, eps=norm_eps) + + self.modulation = modulation + if modulation: + self.adaLN_modulation = nn.Sequential(nn.Linear(min(dim, ADALN_EMBED_DIM), 4 * dim, bias=True)) + + # Control variant start + self.block_id = block_id + if block_id == 0: + self.before_proj = zero_module(nn.Linear(self.dim, self.dim)) + self.after_proj = zero_module(nn.Linear(self.dim, self.dim)) + + def forward( + self, + c: torch.Tensor, + x: torch.Tensor, + attn_mask: torch.Tensor, + freqs_cis: torch.Tensor, + adaln_input: torch.Tensor | None = None, + ): + # Control + if self.block_id == 0: + c = self.before_proj(c) + x + all_c = [] + else: + all_c = list(torch.unbind(c)) + c = all_c.pop(-1) + + # Compared to `ZImageTransformerBlock` x -> c + if self.modulation: + assert adaln_input is not None + scale_msa, gate_msa, scale_mlp, gate_mlp = self.adaLN_modulation(adaln_input).unsqueeze(1).chunk(4, dim=2) + gate_msa, gate_mlp = gate_msa.tanh(), gate_mlp.tanh() + scale_msa, scale_mlp = 1.0 + scale_msa, 1.0 + scale_mlp + + # Attention block + attn_out = self.attention( + self.attention_norm1(c) * scale_msa, attention_mask=attn_mask, freqs_cis=freqs_cis + ) + c = c + gate_msa * self.attention_norm2(attn_out) + + # FFN block + c = c + gate_mlp * self.ffn_norm2(self.feed_forward(self.ffn_norm1(c) * scale_mlp)) + else: + # Attention block + attn_out = self.attention(self.attention_norm1(c), attention_mask=attn_mask, freqs_cis=freqs_cis) + c = c + self.attention_norm2(attn_out) + + # FFN block + c = c + self.ffn_norm2(self.feed_forward(self.ffn_norm1(c))) + + # Control + c_skip = self.after_proj(c) + all_c += [c_skip, c] + c = torch.stack(all_c) + return c + + +class ZImageControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + control_layers_places: list[int] = None, + control_refiner_layers_places: list[int] = None, + control_in_dim=None, + add_control_noise_refiner: Literal["control_layers", "control_noise_refiner"] | None = None, + all_patch_size=(2,), + all_f_patch_size=(1,), + dim=3840, + n_refiner_layers=2, + n_heads=30, + n_kv_heads=30, + norm_eps=1e-5, + qk_norm=True, + ): + super().__init__() + self.control_layers_places = control_layers_places + self.control_in_dim = control_in_dim + self.control_refiner_layers_places = control_refiner_layers_places + self.add_control_noise_refiner = add_control_noise_refiner + + assert 0 in self.control_layers_places + + # control blocks + self.control_layers = nn.ModuleList( + [ + ZImageControlTransformerBlock(i, dim, n_heads, n_kv_heads, norm_eps, qk_norm, block_id=i) + for i in self.control_layers_places + ] + ) + + # control patch embeddings + all_x_embedder = {} + for patch_idx, (patch_size, f_patch_size) in enumerate(zip(all_patch_size, all_f_patch_size)): + x_embedder = nn.Linear(f_patch_size * patch_size * patch_size * self.control_in_dim, dim, bias=True) + all_x_embedder[f"{patch_size}-{f_patch_size}"] = x_embedder + + self.control_all_x_embedder = nn.ModuleDict(all_x_embedder) + if self.add_control_noise_refiner == "control_layers": + self.control_noise_refiner = None + elif self.add_control_noise_refiner == "control_noise_refiner": + self.control_noise_refiner = nn.ModuleList( + [ + ZImageControlTransformerBlock( + 1000 + layer_id, + dim, + n_heads, + n_kv_heads, + norm_eps, + qk_norm, + modulation=True, + block_id=layer_id, + ) + for layer_id in range(n_refiner_layers) + ] + ) + else: + self.control_noise_refiner = nn.ModuleList( + [ + ZImageTransformerBlock( + 1000 + layer_id, + dim, + n_heads, + n_kv_heads, + norm_eps, + qk_norm, + modulation=True, + ) + for layer_id in range(n_refiner_layers) + ] + ) + + self.t_scale: float | None = None + self.t_embedder: TimestepEmbedder | None = None + self.all_x_embedder: nn.ModuleDict | None = None + self.cap_embedder: nn.Sequential | None = None + self.rope_embedder: RopeEmbedder | None = None + self.noise_refiner: nn.ModuleList | None = None + self.context_refiner: nn.ModuleList | None = None + self.x_pad_token: nn.Parameter | None = None + self.cap_pad_token: nn.Parameter | None = None + + @classmethod + def from_transformer(cls, controlnet, transformer): + controlnet.t_scale = transformer.t_scale + controlnet.t_embedder = transformer.t_embedder + controlnet.all_x_embedder = transformer.all_x_embedder + controlnet.cap_embedder = transformer.cap_embedder + controlnet.rope_embedder = transformer.rope_embedder + controlnet.noise_refiner = transformer.noise_refiner + controlnet.context_refiner = transformer.context_refiner + controlnet.x_pad_token = transformer.x_pad_token + controlnet.cap_pad_token = transformer.cap_pad_token + return controlnet + + @staticmethod + # Copied from diffusers.models.transformers.transformer_z_image.ZImageTransformer2DModel.create_coordinate_grid + def create_coordinate_grid(size, start=None, device=None): + if start is None: + start = (0 for _ in size) + axes = [torch.arange(x0, x0 + span, dtype=torch.int32, device=device) for x0, span in zip(start, size)] + grids = torch.meshgrid(axes, indexing="ij") + return torch.stack(grids, dim=-1) + + # Copied from diffusers.models.transformers.transformer_z_image.ZImageTransformer2DModel._patchify_image + def _patchify_image(self, image: torch.Tensor, patch_size: int, f_patch_size: int): + """Patchify a single image tensor: (C, F, H, W) -> (num_patches, patch_dim).""" + pH, pW, pF = patch_size, patch_size, f_patch_size + C, F, H, W = image.size() + F_tokens, H_tokens, W_tokens = F // pF, H // pH, W // pW + image = image.view(C, F_tokens, pF, H_tokens, pH, W_tokens, pW) + image = image.permute(1, 3, 5, 2, 4, 6, 0).reshape(F_tokens * H_tokens * W_tokens, pF * pH * pW * C) + return image, (F, H, W), (F_tokens, H_tokens, W_tokens) + + # Copied from diffusers.models.transformers.transformer_z_image.ZImageTransformer2DModel._pad_with_ids + def _pad_with_ids( + self, + feat: torch.Tensor, + pos_grid_size: tuple, + pos_start: tuple, + device: torch.device, + noise_mask_val: int | None = None, + ): + """Pad feature to SEQ_MULTI_OF, create position IDs and pad mask.""" + ori_len = len(feat) + pad_len = (-ori_len) % SEQ_MULTI_OF + total_len = ori_len + pad_len + + # Pos IDs + ori_pos_ids = self.create_coordinate_grid(size=pos_grid_size, start=pos_start, device=device).flatten(0, 2) + if pad_len > 0: + pad_pos_ids = ( + self.create_coordinate_grid(size=(1, 1, 1), start=(0, 0, 0), device=device) + .flatten(0, 2) + .repeat(pad_len, 1) + ) + pos_ids = torch.cat([ori_pos_ids, pad_pos_ids], dim=0) + padded_feat = torch.cat([feat, feat[-1:].repeat(pad_len, 1)], dim=0) + pad_mask = torch.cat( + [ + torch.zeros(ori_len, dtype=torch.bool, device=device), + torch.ones(pad_len, dtype=torch.bool, device=device), + ] + ) + else: + pos_ids = ori_pos_ids + padded_feat = feat + pad_mask = torch.zeros(ori_len, dtype=torch.bool, device=device) + + noise_mask = [noise_mask_val] * total_len if noise_mask_val is not None else None # token level + return padded_feat, pos_ids, pad_mask, total_len, noise_mask + + # Copied from diffusers.models.transformers.transformer_z_image.ZImageTransformer2DModel.patchify_and_embed + def patchify_and_embed( + self, all_image: list[torch.Tensor], all_cap_feats: list[torch.Tensor], patch_size: int, f_patch_size: int + ): + """Patchify for basic mode: single image per batch item.""" + device = all_image[0].device + all_img_out, all_img_size, all_img_pos_ids, all_img_pad_mask = [], [], [], [] + all_cap_out, all_cap_pos_ids, all_cap_pad_mask = [], [], [] + + for image, cap_feat in zip(all_image, all_cap_feats): + # Caption + cap_out, cap_pos_ids, cap_pad_mask, cap_len, _ = self._pad_with_ids( + cap_feat, (len(cap_feat) + (-len(cap_feat)) % SEQ_MULTI_OF, 1, 1), (1, 0, 0), device + ) + all_cap_out.append(cap_out) + all_cap_pos_ids.append(cap_pos_ids) + all_cap_pad_mask.append(cap_pad_mask) + + # Image + img_patches, size, (F_t, H_t, W_t) = self._patchify_image(image, patch_size, f_patch_size) + img_out, img_pos_ids, img_pad_mask, _, _ = self._pad_with_ids( + img_patches, (F_t, H_t, W_t), (cap_len + 1, 0, 0), device + ) + all_img_out.append(img_out) + all_img_size.append(size) + all_img_pos_ids.append(img_pos_ids) + all_img_pad_mask.append(img_pad_mask) + + return ( + all_img_out, + all_cap_out, + all_img_size, + all_img_pos_ids, + all_cap_pos_ids, + all_img_pad_mask, + all_cap_pad_mask, + ) + + def patchify( + self, + all_image: list[torch.Tensor], + patch_size: int, + f_patch_size: int, + ): + pH = pW = patch_size + pF = f_patch_size + all_image_out = [] + + for i, image in enumerate(all_image): + ### Process Image + C, F, H, W = image.size() + F_tokens, H_tokens, W_tokens = F // pF, H // pH, W // pW + + image = image.view(C, F_tokens, pF, H_tokens, pH, W_tokens, pW) + # "c f pf h ph w pw -> (f h w) (pf ph pw c)" + image = image.permute(1, 3, 5, 2, 4, 6, 0).reshape(F_tokens * H_tokens * W_tokens, pF * pH * pW * C) + + image_ori_len = len(image) + image_padding_len = (-image_ori_len) % SEQ_MULTI_OF + + # padded feature + image_padded_feat = torch.cat([image, image[-1:].repeat(image_padding_len, 1)], dim=0) + all_image_out.append(image_padded_feat) + + return all_image_out + + def forward( + self, + x: list[torch.Tensor], + t, + cap_feats: list[torch.Tensor], + control_context: list[torch.Tensor], + conditioning_scale: float = 1.0, + patch_size=2, + f_patch_size=1, + ): + if ( + self.t_scale is None + or self.t_embedder is None + or self.all_x_embedder is None + or self.cap_embedder is None + or self.rope_embedder is None + or self.noise_refiner is None + or self.context_refiner is None + or self.x_pad_token is None + or self.cap_pad_token is None + ): + raise ValueError( + "Required modules are `None`, use `from_transformer` to share required modules from `transformer`." + ) + + assert patch_size in self.config.all_patch_size + assert f_patch_size in self.config.all_f_patch_size + + bsz = len(x) + device = x[0].device + t = t * self.t_scale + t = self.t_embedder(t) + + ( + x, + cap_feats, + x_size, + x_pos_ids, + cap_pos_ids, + x_inner_pad_mask, + cap_inner_pad_mask, + ) = self.patchify_and_embed(x, cap_feats, patch_size, f_patch_size) + + x_item_seqlens = [len(_) for _ in x] + assert all(_ % SEQ_MULTI_OF == 0 for _ in x_item_seqlens) + x_max_item_seqlen = max(x_item_seqlens) + + control_context = self.patchify(control_context, patch_size, f_patch_size) + control_context = torch.cat(control_context, dim=0) + control_context = self.control_all_x_embedder[f"{patch_size}-{f_patch_size}"](control_context) + + control_context[torch.cat(x_inner_pad_mask)] = self.x_pad_token + control_context = list(control_context.split(x_item_seqlens, dim=0)) + + control_context = pad_sequence(control_context, batch_first=True, padding_value=0.0) + + # x embed & refine + x = torch.cat(x, dim=0) + x = self.all_x_embedder[f"{patch_size}-{f_patch_size}"](x) + + # Match t_embedder output dtype to x for layerwise casting compatibility + adaln_input = t.type_as(x) + x[torch.cat(x_inner_pad_mask)] = self.x_pad_token + x = list(x.split(x_item_seqlens, dim=0)) + x_freqs_cis = list(self.rope_embedder(torch.cat(x_pos_ids, dim=0)).split([len(_) for _ in x_pos_ids], dim=0)) + + x = pad_sequence(x, batch_first=True, padding_value=0.0) + x_freqs_cis = pad_sequence(x_freqs_cis, batch_first=True, padding_value=0.0) + # Clarify the length matches to satisfy Dynamo due to "Symbolic Shape Inference" to avoid compilation errors + x_freqs_cis = x_freqs_cis[:, : x.shape[1]] + + x_attn_mask = torch.zeros((bsz, x_max_item_seqlen), dtype=torch.bool, device=device) + for i, seq_len in enumerate(x_item_seqlens): + x_attn_mask[i, :seq_len] = 1 + + if self.add_control_noise_refiner is not None: + if self.add_control_noise_refiner == "control_layers": + layers = self.control_layers + elif self.add_control_noise_refiner == "control_noise_refiner": + layers = self.control_noise_refiner + else: + raise ValueError(f"Unsupported `add_control_noise_refiner` type: {self.add_control_noise_refiner}.") + for layer in layers: + if torch.is_grad_enabled() and self.gradient_checkpointing: + control_context = self._gradient_checkpointing_func( + layer, control_context, x, x_attn_mask, x_freqs_cis, adaln_input + ) + else: + control_context = layer(control_context, x, x_attn_mask, x_freqs_cis, adaln_input) + + hints = torch.unbind(control_context)[:-1] + control_context = torch.unbind(control_context)[-1] + noise_refiner_block_samples = { + layer_idx: hints[idx] * conditioning_scale + for idx, layer_idx in enumerate(self.control_refiner_layers_places) + } + else: + noise_refiner_block_samples = None + + if torch.is_grad_enabled() and self.gradient_checkpointing: + for layer_idx, layer in enumerate(self.noise_refiner): + x = self._gradient_checkpointing_func(layer, x, x_attn_mask, x_freqs_cis, adaln_input) + if noise_refiner_block_samples is not None: + if layer_idx in noise_refiner_block_samples: + x = x + noise_refiner_block_samples[layer_idx] + else: + for layer_idx, layer in enumerate(self.noise_refiner): + x = layer(x, x_attn_mask, x_freqs_cis, adaln_input) + if noise_refiner_block_samples is not None: + if layer_idx in noise_refiner_block_samples: + x = x + noise_refiner_block_samples[layer_idx] + + # cap embed & refine + cap_item_seqlens = [len(_) for _ in cap_feats] + cap_max_item_seqlen = max(cap_item_seqlens) + + cap_feats = torch.cat(cap_feats, dim=0) + cap_feats = self.cap_embedder(cap_feats) + cap_feats[torch.cat(cap_inner_pad_mask)] = self.cap_pad_token + cap_feats = list(cap_feats.split(cap_item_seqlens, dim=0)) + cap_freqs_cis = list( + self.rope_embedder(torch.cat(cap_pos_ids, dim=0)).split([len(_) for _ in cap_pos_ids], dim=0) + ) + + cap_feats = pad_sequence(cap_feats, batch_first=True, padding_value=0.0) + cap_freqs_cis = pad_sequence(cap_freqs_cis, batch_first=True, padding_value=0.0) + # Clarify the length matches to satisfy Dynamo due to "Symbolic Shape Inference" to avoid compilation errors + cap_freqs_cis = cap_freqs_cis[:, : cap_feats.shape[1]] + + cap_attn_mask = torch.zeros((bsz, cap_max_item_seqlen), dtype=torch.bool, device=device) + for i, seq_len in enumerate(cap_item_seqlens): + cap_attn_mask[i, :seq_len] = 1 + + if torch.is_grad_enabled() and self.gradient_checkpointing: + for layer in self.context_refiner: + cap_feats = self._gradient_checkpointing_func(layer, cap_feats, cap_attn_mask, cap_freqs_cis) + else: + for layer in self.context_refiner: + cap_feats = layer(cap_feats, cap_attn_mask, cap_freqs_cis) + + # unified + unified = [] + unified_freqs_cis = [] + for i in range(bsz): + x_len = x_item_seqlens[i] + cap_len = cap_item_seqlens[i] + unified.append(torch.cat([x[i][:x_len], cap_feats[i][:cap_len]])) + unified_freqs_cis.append(torch.cat([x_freqs_cis[i][:x_len], cap_freqs_cis[i][:cap_len]])) + unified_item_seqlens = [a + b for a, b in zip(cap_item_seqlens, x_item_seqlens)] + assert unified_item_seqlens == [len(_) for _ in unified] + unified_max_item_seqlen = max(unified_item_seqlens) + + unified = pad_sequence(unified, batch_first=True, padding_value=0.0) + unified_freqs_cis = pad_sequence(unified_freqs_cis, batch_first=True, padding_value=0.0) + unified_attn_mask = torch.zeros((bsz, unified_max_item_seqlen), dtype=torch.bool, device=device) + for i, seq_len in enumerate(unified_item_seqlens): + unified_attn_mask[i, :seq_len] = 1 + + ## ControlNet start + if not self.add_control_noise_refiner: + if torch.is_grad_enabled() and self.gradient_checkpointing: + for layer in self.control_noise_refiner: + control_context = self._gradient_checkpointing_func( + layer, control_context, x_attn_mask, x_freqs_cis, adaln_input + ) + else: + for layer in self.control_noise_refiner: + control_context = layer(control_context, x_attn_mask, x_freqs_cis, adaln_input) + + # unified + control_context_unified = [] + for i in range(bsz): + x_len = x_item_seqlens[i] + cap_len = cap_item_seqlens[i] + control_context_unified.append(torch.cat([control_context[i][:x_len], cap_feats[i][:cap_len]])) + control_context_unified = pad_sequence(control_context_unified, batch_first=True, padding_value=0.0) + + for layer in self.control_layers: + if torch.is_grad_enabled() and self.gradient_checkpointing: + control_context_unified = self._gradient_checkpointing_func( + layer, control_context_unified, unified, unified_attn_mask, unified_freqs_cis, adaln_input + ) + else: + control_context_unified = layer( + control_context_unified, unified, unified_attn_mask, unified_freqs_cis, adaln_input + ) + + hints = torch.unbind(control_context_unified)[:-1] + controlnet_block_samples = { + layer_idx: hints[idx] * conditioning_scale for idx, layer_idx in enumerate(self.control_layers_places) + } + return controlnet_block_samples diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/multicontrolnet.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/multicontrolnet.py new file mode 100644 index 0000000000000000000000000000000000000000..705c59c0f925eebf1402f3b4bc26d83eb4d4ce8a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/multicontrolnet.py @@ -0,0 +1,182 @@ +import os +from typing import Any, Callable + +import torch +from torch import nn + +from ...utils import logging +from ..controlnets.controlnet import ControlNetModel, ControlNetOutput +from ..modeling_utils import ModelMixin + + +logger = logging.get_logger(__name__) + + +class MultiControlNetModel(ModelMixin): + r""" + Multiple `ControlNetModel` wrapper class for Multi-ControlNet + + This module is a wrapper for multiple instances of the `ControlNetModel`. The `forward()` API is designed to be + compatible with `ControlNetModel`. + + Args: + controlnets (`list[ControlNetModel]`): + Provides additional conditioning to the unet during the denoising process. You must set multiple + `ControlNetModel` as a list. + """ + + def __init__(self, controlnets: list[ControlNetModel] | tuple[ControlNetModel]): + super().__init__() + self.nets = nn.ModuleList(controlnets) + + def forward( + self, + sample: torch.Tensor, + timestep: torch.Tensor | float | int, + encoder_hidden_states: torch.Tensor, + controlnet_cond: list[torch.tensor], + conditioning_scale: list[float], + class_labels: torch.Tensor | None = None, + timestep_cond: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + added_cond_kwargs: dict[str, torch.Tensor] | None = None, + cross_attention_kwargs: dict[str, Any] | None = None, + guess_mode: bool = False, + return_dict: bool = True, + ) -> ControlNetOutput | tuple: + for i, (image, scale, controlnet) in enumerate(zip(controlnet_cond, conditioning_scale, self.nets)): + down_samples, mid_sample = controlnet( + sample=sample, + timestep=timestep, + encoder_hidden_states=encoder_hidden_states, + controlnet_cond=image, + conditioning_scale=scale, + class_labels=class_labels, + timestep_cond=timestep_cond, + attention_mask=attention_mask, + added_cond_kwargs=added_cond_kwargs, + cross_attention_kwargs=cross_attention_kwargs, + guess_mode=guess_mode, + return_dict=return_dict, + ) + + # merge samples + if i == 0: + down_block_res_samples, mid_block_res_sample = down_samples, mid_sample + else: + down_block_res_samples = [ + samples_prev + samples_curr + for samples_prev, samples_curr in zip(down_block_res_samples, down_samples) + ] + mid_block_res_sample += mid_sample + + return down_block_res_samples, mid_block_res_sample + + def save_pretrained( + self, + save_directory: str | os.PathLike, + is_main_process: bool = True, + save_function: Callable = None, + safe_serialization: bool = True, + variant: str | None = None, + ): + """ + Save a model and its configuration file to a directory, so that it can be re-loaded using the + `[`~models.controlnets.multicontrolnet.MultiControlNetModel.from_pretrained`]` class method. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to which to save. Will be created if it doesn't exist. + is_main_process (`bool`, *optional*, defaults to `True`): + Whether the process calling this is the main process or not. Useful when in distributed training like + TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on + the main process to avoid race conditions. + save_function (`Callable`): + The function to use to save the state dictionary. Useful on distributed training like TPUs when one + need to replace `torch.save` by another method. Can be configured with the environment variable + `DIFFUSERS_SAVE_MODE`. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + variant (`str`, *optional*): + If specified, weights are saved in the format pytorch_model..bin. + """ + for idx, controlnet in enumerate(self.nets): + suffix = "" if idx == 0 else f"_{idx}" + controlnet.save_pretrained( + save_directory + suffix, + is_main_process=is_main_process, + save_function=save_function, + safe_serialization=safe_serialization, + variant=variant, + ) + + @classmethod + def from_pretrained(cls, pretrained_model_path: str | os.PathLike | None, **kwargs): + r""" + Instantiate a pretrained MultiControlNet model from multiple pre-trained controlnet models. + + The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train + the model, you should first set it back in training mode with `model.train()`. + + The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come + pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning + task. + + The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those + weights are discarded. + + Parameters: + pretrained_model_path (`os.PathLike`): + A path to a *directory* containing model weights saved using + [`~models.controlnets.multicontrolnet.MultiControlNetModel.save_pretrained`], e.g., + `./my_model_directory/controlnet`. + torch_dtype (`torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model under this dtype. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + device_map (`str` or `dict[str, int | str | torch.device]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be refined to each + parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the + same device. + + To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + max_memory (`Dict`, *optional*): + A dictionary device identifier to maximum memory. Will default to the maximum memory available for each + GPU and the available CPU RAM if unset. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading by not initializing the weights and only loading the pre-trained weights. This + also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the + model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, + setting this argument to `True` will raise an error. + variant (`str`, *optional*): + If specified load weights from `variant` filename, *e.g.* pytorch_model..bin. `variant` is + ignored when using `from_flax`. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the `safetensors` weights will be downloaded if they're available **and** if the + `safetensors` library is installed. If set to `True`, the model will be forcibly loaded from + `safetensors` weights. If set to `False`, loading will *not* use `safetensors`. + """ + idx = 0 + controlnets = [] + + # load controlnet and append to list until no controlnet directory exists anymore + # first controlnet has to be saved under `./mydirectory/controlnet` to be compliant with `DiffusionPipeline.from_prertained` + # second, third, ... controlnets have to be saved under `./mydirectory/controlnet_1`, `./mydirectory/controlnet_2`, ... + model_path_to_load = pretrained_model_path + while os.path.isdir(model_path_to_load): + controlnet = ControlNetModel.from_pretrained(model_path_to_load, **kwargs) + controlnets.append(controlnet) + + idx += 1 + model_path_to_load = pretrained_model_path + f"_{idx}" + + logger.info(f"{len(controlnets)} controlnets loaded from {pretrained_model_path}.") + + if len(controlnets) == 0: + raise ValueError( + f"No ControlNets found under {os.path.dirname(pretrained_model_path)}. Expected at least {pretrained_model_path + '_0'}." + ) + + return cls(controlnets) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/multicontrolnet_union.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/multicontrolnet_union.py new file mode 100644 index 0000000000000000000000000000000000000000..98552f99623ade8529d5c848bef8446128ed34f1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/controlnets/multicontrolnet_union.py @@ -0,0 +1,195 @@ +import os +from typing import Any, Callable + +import torch +from torch import nn + +from ...utils import logging +from ..controlnets.controlnet import ControlNetOutput +from ..controlnets.controlnet_union import ControlNetUnionModel +from ..modeling_utils import ModelMixin + + +logger = logging.get_logger(__name__) + + +class MultiControlNetUnionModel(ModelMixin): + r""" + Multiple `ControlNetUnionModel` wrapper class for Multi-ControlNet-Union. + + This module is a wrapper for multiple instances of the `ControlNetUnionModel`. The `forward()` API is designed to + be compatible with `ControlNetUnionModel`. + + Args: + controlnets (`list[ControlNetUnionModel]`): + Provides additional conditioning to the unet during the denoising process. You must set multiple + `ControlNetUnionModel` as a list. + """ + + def __init__(self, controlnets: list[ControlNetUnionModel] | tuple[ControlNetUnionModel]): + super().__init__() + self.nets = nn.ModuleList(controlnets) + + def forward( + self, + sample: torch.Tensor, + timestep: torch.Tensor | float | int, + encoder_hidden_states: torch.Tensor, + controlnet_cond: list[torch.tensor], + control_type: list[torch.Tensor], + control_type_idx: list[list[int]], + conditioning_scale: list[float], + class_labels: torch.Tensor | None = None, + timestep_cond: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + added_cond_kwargs: dict[str, torch.Tensor] | None = None, + cross_attention_kwargs: dict[str, Any] | None = None, + guess_mode: bool = False, + return_dict: bool = True, + ) -> ControlNetOutput | tuple: + down_block_res_samples, mid_block_res_sample = None, None + for i, (image, ctype, ctype_idx, scale, controlnet) in enumerate( + zip(controlnet_cond, control_type, control_type_idx, conditioning_scale, self.nets) + ): + if scale == 0.0: + continue + down_samples, mid_sample = controlnet( + sample=sample, + timestep=timestep, + encoder_hidden_states=encoder_hidden_states, + controlnet_cond=image, + control_type=ctype, + control_type_idx=ctype_idx, + conditioning_scale=scale, + class_labels=class_labels, + timestep_cond=timestep_cond, + attention_mask=attention_mask, + added_cond_kwargs=added_cond_kwargs, + cross_attention_kwargs=cross_attention_kwargs, + from_multi=True, + guess_mode=guess_mode, + return_dict=return_dict, + ) + + # merge samples + if down_block_res_samples is None and mid_block_res_sample is None: + down_block_res_samples, mid_block_res_sample = down_samples, mid_sample + else: + down_block_res_samples = [ + samples_prev + samples_curr + for samples_prev, samples_curr in zip(down_block_res_samples, down_samples) + ] + mid_block_res_sample += mid_sample + + return down_block_res_samples, mid_block_res_sample + + # Copied from diffusers.models.controlnets.multicontrolnet.MultiControlNetModel.save_pretrained with ControlNet->ControlNetUnion + def save_pretrained( + self, + save_directory: str | os.PathLike, + is_main_process: bool = True, + save_function: Callable = None, + safe_serialization: bool = True, + variant: str | None = None, + ): + """ + Save a model and its configuration file to a directory, so that it can be re-loaded using the + `[`~models.controlnets.multicontrolnet.MultiControlNetUnionModel.from_pretrained`]` class method. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to which to save. Will be created if it doesn't exist. + is_main_process (`bool`, *optional*, defaults to `True`): + Whether the process calling this is the main process or not. Useful when in distributed training like + TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on + the main process to avoid race conditions. + save_function (`Callable`): + The function to use to save the state dictionary. Useful on distributed training like TPUs when one + need to replace `torch.save` by another method. Can be configured with the environment variable + `DIFFUSERS_SAVE_MODE`. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + variant (`str`, *optional*): + If specified, weights are saved in the format pytorch_model..bin. + """ + for idx, controlnet in enumerate(self.nets): + suffix = "" if idx == 0 else f"_{idx}" + controlnet.save_pretrained( + save_directory + suffix, + is_main_process=is_main_process, + save_function=save_function, + safe_serialization=safe_serialization, + variant=variant, + ) + + @classmethod + # Copied from diffusers.models.controlnets.multicontrolnet.MultiControlNetModel.from_pretrained with ControlNet->ControlNetUnion + def from_pretrained(cls, pretrained_model_path: str | os.PathLike | None, **kwargs): + r""" + Instantiate a pretrained MultiControlNetUnion model from multiple pre-trained controlnet models. + + The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train + the model, you should first set it back in training mode with `model.train()`. + + The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come + pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning + task. + + The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those + weights are discarded. + + Parameters: + pretrained_model_path (`os.PathLike`): + A path to a *directory* containing model weights saved using + [`~models.controlnets.multicontrolnet.MultiControlNetUnionModel.save_pretrained`], e.g., + `./my_model_directory/controlnet`. + torch_dtype (`torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model under this dtype. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + device_map (`str` or `dict[str, int | str | torch.device]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be refined to each + parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the + same device. + + To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + max_memory (`Dict`, *optional*): + A dictionary device identifier to maximum memory. Will default to the maximum memory available for each + GPU and the available CPU RAM if unset. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading by not initializing the weights and only loading the pre-trained weights. This + also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the + model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, + setting this argument to `True` will raise an error. + variant (`str`, *optional*): + If specified load weights from `variant` filename, *e.g.* pytorch_model..bin. `variant` is + ignored when using `from_flax`. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the `safetensors` weights will be downloaded if they're available **and** if the + `safetensors` library is installed. If set to `True`, the model will be forcibly loaded from + `safetensors` weights. If set to `False`, loading will *not* use `safetensors`. + """ + idx = 0 + controlnets = [] + + # load controlnet and append to list until no controlnet directory exists anymore + # first controlnet has to be saved under `./mydirectory/controlnet` to be compliant with `DiffusionPipeline.from_prertained` + # second, third, ... controlnets have to be saved under `./mydirectory/controlnet_1`, `./mydirectory/controlnet_2`, ... + model_path_to_load = pretrained_model_path + while os.path.isdir(model_path_to_load): + controlnet = ControlNetUnionModel.from_pretrained(model_path_to_load, **kwargs) + controlnets.append(controlnet) + + idx += 1 + model_path_to_load = pretrained_model_path + f"_{idx}" + + logger.info(f"{len(controlnets)} controlnets loaded from {pretrained_model_path}.") + + if len(controlnets) == 0: + raise ValueError( + f"No ControlNetUnions found under {os.path.dirname(pretrained_model_path)}. Expected at least {pretrained_model_path + '_0'}." + ) + + return cls(controlnets) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/downsampling.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/downsampling.py new file mode 100644 index 0000000000000000000000000000000000000000..871c0ed7ddf7676fde2b89a29c570568b69bc023 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/downsampling.py @@ -0,0 +1,399 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import deprecate +from .normalization import RMSNorm +from .upsampling import upfirdn2d_native + + +class Downsample1D(nn.Module): + """A 1D downsampling layer with an optional convolution. + + Parameters: + channels (`int`): + number of channels in the inputs and outputs. + use_conv (`bool`, default `False`): + option to use a convolution. + out_channels (`int`, optional): + number of output channels. Defaults to `channels`. + padding (`int`, default `1`): + padding for the convolution. + name (`str`, default `conv`): + name of the downsampling 1D layer. + """ + + def __init__( + self, + channels: int, + use_conv: bool = False, + out_channels: int | None = None, + padding: int = 1, + name: str = "conv", + ): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if use_conv: + self.conv = nn.Conv1d(self.channels, self.out_channels, 3, stride=stride, padding=padding) + else: + assert self.channels == self.out_channels + self.conv = nn.AvgPool1d(kernel_size=stride, stride=stride) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + assert inputs.shape[1] == self.channels + return self.conv(inputs) + + +class Downsample2D(nn.Module): + """A 2D downsampling layer with an optional convolution. + + Parameters: + channels (`int`): + number of channels in the inputs and outputs. + use_conv (`bool`, default `False`): + option to use a convolution. + out_channels (`int`, optional): + number of output channels. Defaults to `channels`. + padding (`int`, default `1`): + padding for the convolution. + name (`str`, default `conv`): + name of the downsampling 2D layer. + """ + + def __init__( + self, + channels: int, + use_conv: bool = False, + out_channels: int | None = None, + padding: int = 1, + name: str = "conv", + kernel_size=3, + norm_type=None, + eps=None, + elementwise_affine=None, + bias=True, + ): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if norm_type == "ln_norm": + self.norm = nn.LayerNorm(channels, eps, elementwise_affine) + elif norm_type == "rms_norm": + self.norm = RMSNorm(channels, eps, elementwise_affine) + elif norm_type is None: + self.norm = None + else: + raise ValueError(f"unknown norm_type: {norm_type}") + + if use_conv: + conv = nn.Conv2d( + self.channels, self.out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias + ) + else: + assert self.channels == self.out_channels + conv = nn.AvgPool2d(kernel_size=stride, stride=stride) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.Conv2d_0 = conv + self.conv = conv + elif name == "Conv2d_0": + self.conv = conv + else: + self.conv = conv + + def forward(self, hidden_states: torch.Tensor, *args, **kwargs) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + assert hidden_states.shape[1] == self.channels + + if self.norm is not None: + hidden_states = self.norm(hidden_states.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) + + if self.use_conv and self.padding == 0: + pad = (0, 1, 0, 1) + hidden_states = F.pad(hidden_states, pad, mode="constant", value=0) + + assert hidden_states.shape[1] == self.channels + + hidden_states = self.conv(hidden_states) + + return hidden_states + + +class FirDownsample2D(nn.Module): + """A 2D FIR downsampling layer with an optional convolution. + + Parameters: + channels (`int`): + number of channels in the inputs and outputs. + use_conv (`bool`, default `False`): + option to use a convolution. + out_channels (`int`, optional): + number of output channels. Defaults to `channels`. + fir_kernel (`tuple`, default `(1, 3, 3, 1)`): + kernel for the FIR filter. + """ + + def __init__( + self, + channels: int | None = None, + out_channels: int | None = None, + use_conv: bool = False, + fir_kernel: tuple[int, int, int, int] = (1, 3, 3, 1), + ): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2d(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.fir_kernel = fir_kernel + self.use_conv = use_conv + self.out_channels = out_channels + + def _downsample_2d( + self, + hidden_states: torch.Tensor, + weight: torch.Tensor | None = None, + kernel: torch.Tensor | None = None, + factor: int = 2, + gain: float = 1, + ) -> torch.Tensor: + """Fused `Conv2d()` followed by `downsample_2d()`. + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of + arbitrary order. + + Args: + hidden_states (`torch.Tensor`): + Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + weight (`torch.Tensor`, *optional*): + Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. Grouped convolution can be + performed by `inChannels = x.shape[0] // numGroups`. + kernel (`torch.Tensor`, *optional*): + FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which + corresponds to average pooling. + factor (`int`, *optional*, default to `2`): + Integer downsampling factor. + gain (`float`, *optional*, default to `1.0`): + Scaling factor for signal magnitude. + + Returns: + output (`torch.Tensor`): + Tensor of the shape `[N, C, H // factor, W // factor]` or `[N, H // factor, W // factor, C]`, and same + datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if kernel is None: + kernel = [1] * factor + + # setup kernel + kernel = torch.tensor(kernel, dtype=torch.float32) + if kernel.ndim == 1: + kernel = torch.outer(kernel, kernel) + kernel /= torch.sum(kernel) + + kernel = kernel * gain + + if self.use_conv: + _, _, convH, convW = weight.shape + pad_value = (kernel.shape[0] - factor) + (convW - 1) + stride_value = [factor, factor] + upfirdn_input = upfirdn2d_native( + hidden_states, + torch.tensor(kernel, device=hidden_states.device), + pad=((pad_value + 1) // 2, pad_value // 2), + ) + output = F.conv2d(upfirdn_input, weight, stride=stride_value, padding=0) + else: + pad_value = kernel.shape[0] - factor + output = upfirdn2d_native( + hidden_states, + torch.tensor(kernel, device=hidden_states.device), + down=factor, + pad=((pad_value + 1) // 2, pad_value // 2), + ) + + return output + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.use_conv: + downsample_input = self._downsample_2d(hidden_states, weight=self.Conv2d_0.weight, kernel=self.fir_kernel) + hidden_states = downsample_input + self.Conv2d_0.bias.reshape(1, -1, 1, 1) + else: + hidden_states = self._downsample_2d(hidden_states, kernel=self.fir_kernel, factor=2) + + return hidden_states + + +# downsample/upsample layer used in k-upscaler, might be able to use FirDownsample2D/DirUpsample2D instead +class KDownsample2D(nn.Module): + r"""A 2D K-downsampling layer. + + Parameters: + pad_mode (`str`, *optional*, default to `"reflect"`): the padding mode to use. + """ + + def __init__(self, pad_mode: str = "reflect"): + super().__init__() + self.pad_mode = pad_mode + kernel_1d = torch.tensor([[1 / 8, 3 / 8, 3 / 8, 1 / 8]]) + self.pad = kernel_1d.shape[1] // 2 - 1 + self.register_buffer("kernel", kernel_1d.T @ kernel_1d, persistent=False) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + inputs = F.pad(inputs, (self.pad,) * 4, self.pad_mode) + weight = inputs.new_zeros( + [ + inputs.shape[1], + inputs.shape[1], + self.kernel.shape[0], + self.kernel.shape[1], + ] + ) + indices = torch.arange(inputs.shape[1], device=inputs.device) + kernel = self.kernel.to(weight)[None, :].expand(inputs.shape[1], -1, -1) + weight[indices, indices] = kernel + return F.conv2d(inputs, weight, stride=2) + + +class CogVideoXDownsample3D(nn.Module): + # Todo: Wait for paper release. + r""" + A 3D Downsampling layer using in [CogVideoX]() by Tsinghua University & ZhipuAI + + Args: + in_channels (`int`): + Number of channels in the input image. + out_channels (`int`): + Number of channels produced by the convolution. + kernel_size (`int`, defaults to `3`): + Size of the convolving kernel. + stride (`int`, defaults to `2`): + Stride of the convolution. + padding (`int`, defaults to `0`): + Padding added to all four sides of the input. + compress_time (`bool`, defaults to `False`): + Whether or not to compress the time dimension. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + stride: int = 2, + padding: int = 0, + compress_time: bool = False, + ): + super().__init__() + + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding) + self.compress_time = compress_time + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.compress_time: + batch_size, channels, frames, height, width = x.shape + + # (batch_size, channels, frames, height, width) -> (batch_size, height, width, channels, frames) -> (batch_size * height * width, channels, frames) + x = x.permute(0, 3, 4, 1, 2).reshape(batch_size * height * width, channels, frames) + + if x.shape[-1] % 2 == 1: + x_first, x_rest = x[..., 0], x[..., 1:] + if x_rest.shape[-1] > 0: + # (batch_size * height * width, channels, frames - 1) -> (batch_size * height * width, channels, (frames - 1) // 2) + x_rest = F.avg_pool1d(x_rest, kernel_size=2, stride=2) + + x = torch.cat([x_first[..., None], x_rest], dim=-1) + # (batch_size * height * width, channels, (frames // 2) + 1) -> (batch_size, height, width, channels, (frames // 2) + 1) -> (batch_size, channels, (frames // 2) + 1, height, width) + x = x.reshape(batch_size, height, width, channels, x.shape[-1]).permute(0, 3, 4, 1, 2) + else: + # (batch_size * height * width, channels, frames) -> (batch_size * height * width, channels, frames // 2) + x = F.avg_pool1d(x, kernel_size=2, stride=2) + # (batch_size * height * width, channels, frames // 2) -> (batch_size, height, width, channels, frames // 2) -> (batch_size, channels, frames // 2, height, width) + x = x.reshape(batch_size, height, width, channels, x.shape[-1]).permute(0, 3, 4, 1, 2) + + # Pad the tensor + pad = (0, 1, 0, 1) + x = F.pad(x, pad, mode="constant", value=0) + batch_size, channels, frames, height, width = x.shape + # (batch_size, channels, frames, height, width) -> (batch_size, frames, channels, height, width) -> (batch_size * frames, channels, height, width) + x = x.permute(0, 2, 1, 3, 4).reshape(batch_size * frames, channels, height, width) + x = self.conv(x) + # (batch_size * frames, channels, height, width) -> (batch_size, frames, channels, height, width) -> (batch_size, channels, frames, height, width) + x = x.reshape(batch_size, frames, x.shape[1], x.shape[2], x.shape[3]).permute(0, 2, 1, 3, 4) + return x + + +def downsample_2d( + hidden_states: torch.Tensor, + kernel: torch.Tensor | None = None, + factor: int = 2, + gain: float = 1, +) -> torch.Tensor: + r"""Downsample2D a batch of 2D images with the given filter. + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and downsamples each image with the + given filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the + specified `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its + shape is a multiple of the downsampling factor. + + Args: + hidden_states (`torch.Tensor`) + Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + kernel (`torch.Tensor`, *optional*): + FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which + corresponds to average pooling. + factor (`int`, *optional*, default to `2`): + Integer downsampling factor. + gain (`float`, *optional*, default to `1.0`): + Scaling factor for signal magnitude. + + Returns: + output (`torch.Tensor`): + Tensor of the shape `[N, C, H // factor, W // factor]` + """ + + assert isinstance(factor, int) and factor >= 1 + if kernel is None: + kernel = [1] * factor + + kernel = torch.tensor(kernel, dtype=torch.float32) + if kernel.ndim == 1: + kernel = torch.outer(kernel, kernel) + kernel /= torch.sum(kernel) + + kernel = kernel * gain + pad_value = kernel.shape[0] - factor + output = upfirdn2d_native( + hidden_states, + kernel.to(device=hidden_states.device), + down=factor, + pad=((pad_value + 1) // 2, pad_value // 2), + ) + return output diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/embeddings.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..bcd192c1f1668fd169e2c928bb28e02fadecb4e2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/embeddings.py @@ -0,0 +1,2620 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn + +from ..utils import deprecate +from .activations import FP32SiLU, get_activation +from .attention_processor import Attention + + +def get_timestep_embedding( + timesteps: torch.Tensor, + embedding_dim: int, + flip_sin_to_cos: bool = False, + downscale_freq_shift: float = 1, + scale: float = 1, + max_period: int = 10000, +) -> torch.Tensor: + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. + + Args + timesteps (torch.Tensor): + a 1-D Tensor of N indices, one per batch element. These may be fractional. + embedding_dim (int): + the dimension of the output. + flip_sin_to_cos (bool): + Whether the embedding order should be `cos, sin` (if True) or `sin, cos` (if False) + downscale_freq_shift (float): + Controls the delta between frequencies between dimensions + scale (float): + Scaling factor applied to the embeddings. + max_period (int): + Controls the maximum frequency of the embeddings + Returns + torch.Tensor: an [N x dim] Tensor of positional embeddings. + """ + assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array" + + half_dim = embedding_dim // 2 + exponent = -math.log(max_period) * torch.arange( + start=0, end=half_dim, dtype=torch.float32, device=timesteps.device + ) + exponent = exponent / (half_dim - downscale_freq_shift) + + emb = torch.exp(exponent) + emb = timesteps[:, None].float() * emb[None, :] + + # scale embeddings + emb = scale * emb + + # concat sine and cosine embeddings + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1) + + # flip sine and cosine embeddings + if flip_sin_to_cos: + emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1) + + # zero pad + if embedding_dim % 2 == 1: + emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) + return emb + + +def get_3d_sincos_pos_embed( + embed_dim: int, + spatial_size: int | tuple[int, int], + temporal_size: int, + spatial_interpolation_scale: float = 1.0, + temporal_interpolation_scale: float = 1.0, + device: torch.device | None = None, + output_type: str = "np", +) -> torch.Tensor: + r""" + Creates 3D sinusoidal positional embeddings. + + Args: + embed_dim (`int`): + The embedding dimension of inputs. It must be divisible by 16. + spatial_size (`int` or `tuple[int, int]`): + The spatial dimension of positional embeddings. If an integer is provided, the same size is applied to both + spatial dimensions (height and width). + temporal_size (`int`): + The temporal dimension of positional embeddings (number of frames). + spatial_interpolation_scale (`float`, defaults to 1.0): + Scale factor for spatial grid interpolation. + temporal_interpolation_scale (`float`, defaults to 1.0): + Scale factor for temporal grid interpolation. + + Returns: + `torch.Tensor`: + The 3D sinusoidal positional embeddings of shape `[temporal_size, spatial_size[0] * spatial_size[1], + embed_dim]`. + """ + if output_type == "np": + return _get_3d_sincos_pos_embed_np( + embed_dim=embed_dim, + spatial_size=spatial_size, + temporal_size=temporal_size, + spatial_interpolation_scale=spatial_interpolation_scale, + temporal_interpolation_scale=temporal_interpolation_scale, + ) + if embed_dim % 4 != 0: + raise ValueError("`embed_dim` must be divisible by 4") + if isinstance(spatial_size, int): + spatial_size = (spatial_size, spatial_size) + + embed_dim_spatial = 3 * embed_dim // 4 + embed_dim_temporal = embed_dim // 4 + + # 1. Spatial + grid_h = torch.arange(spatial_size[1], device=device, dtype=torch.float32) / spatial_interpolation_scale + grid_w = torch.arange(spatial_size[0], device=device, dtype=torch.float32) / spatial_interpolation_scale + grid = torch.meshgrid(grid_w, grid_h, indexing="xy") # here w goes first + grid = torch.stack(grid, dim=0) + + grid = grid.reshape([2, 1, spatial_size[1], spatial_size[0]]) + pos_embed_spatial = get_2d_sincos_pos_embed_from_grid(embed_dim_spatial, grid, output_type="pt") + + # 2. Temporal + grid_t = torch.arange(temporal_size, device=device, dtype=torch.float32) / temporal_interpolation_scale + pos_embed_temporal = get_1d_sincos_pos_embed_from_grid(embed_dim_temporal, grid_t, output_type="pt") + + # 3. Concat + pos_embed_spatial = pos_embed_spatial[None, :, :] + pos_embed_spatial = pos_embed_spatial.repeat_interleave( + temporal_size, dim=0, output_size=pos_embed_spatial.shape[0] * temporal_size + ) # [T, H*W, D // 4 * 3] + + pos_embed_temporal = pos_embed_temporal[:, None, :] + pos_embed_temporal = pos_embed_temporal.repeat_interleave( + spatial_size[0] * spatial_size[1], dim=1 + ) # [T, H*W, D // 4] + + pos_embed = torch.concat([pos_embed_temporal, pos_embed_spatial], dim=-1) # [T, H*W, D] + return pos_embed + + +def _get_3d_sincos_pos_embed_np( + embed_dim: int, + spatial_size: int | tuple[int, int], + temporal_size: int, + spatial_interpolation_scale: float = 1.0, + temporal_interpolation_scale: float = 1.0, +) -> np.ndarray: + r""" + Creates 3D sinusoidal positional embeddings. + + Args: + embed_dim (`int`): + The embedding dimension of inputs. It must be divisible by 16. + spatial_size (`int` or `tuple[int, int]`): + The spatial dimension of positional embeddings. If an integer is provided, the same size is applied to both + spatial dimensions (height and width). + temporal_size (`int`): + The temporal dimension of positional embeddings (number of frames). + spatial_interpolation_scale (`float`, defaults to 1.0): + Scale factor for spatial grid interpolation. + temporal_interpolation_scale (`float`, defaults to 1.0): + Scale factor for temporal grid interpolation. + + Returns: + `np.ndarray`: + The 3D sinusoidal positional embeddings of shape `[temporal_size, spatial_size[0] * spatial_size[1], + embed_dim]`. + """ + deprecation_message = ( + "`get_3d_sincos_pos_embed` uses `torch` and supports `device`." + " `from_numpy` is no longer required." + " Pass `output_type='pt' to use the new version now." + ) + deprecate("output_type=='np'", "0.33.0", deprecation_message, standard_warn=False) + if embed_dim % 4 != 0: + raise ValueError("`embed_dim` must be divisible by 4") + if isinstance(spatial_size, int): + spatial_size = (spatial_size, spatial_size) + + embed_dim_spatial = 3 * embed_dim // 4 + embed_dim_temporal = embed_dim // 4 + + # 1. Spatial + grid_h = np.arange(spatial_size[1], dtype=np.float32) / spatial_interpolation_scale + grid_w = np.arange(spatial_size[0], dtype=np.float32) / spatial_interpolation_scale + grid = np.meshgrid(grid_w, grid_h) # here w goes first + grid = np.stack(grid, axis=0) + + grid = grid.reshape([2, 1, spatial_size[1], spatial_size[0]]) + pos_embed_spatial = get_2d_sincos_pos_embed_from_grid(embed_dim_spatial, grid) + + # 2. Temporal + grid_t = np.arange(temporal_size, dtype=np.float32) / temporal_interpolation_scale + pos_embed_temporal = get_1d_sincos_pos_embed_from_grid(embed_dim_temporal, grid_t) + + # 3. Concat + pos_embed_spatial = pos_embed_spatial[np.newaxis, :, :] + pos_embed_spatial = np.repeat(pos_embed_spatial, temporal_size, axis=0) # [T, H*W, D // 4 * 3] + + pos_embed_temporal = pos_embed_temporal[:, np.newaxis, :] + pos_embed_temporal = np.repeat(pos_embed_temporal, spatial_size[0] * spatial_size[1], axis=1) # [T, H*W, D // 4] + + pos_embed = np.concatenate([pos_embed_temporal, pos_embed_spatial], axis=-1) # [T, H*W, D] + return pos_embed + + +def get_2d_sincos_pos_embed( + embed_dim, + grid_size, + cls_token=False, + extra_tokens=0, + interpolation_scale=1.0, + base_size=16, + device: torch.device | None = None, + output_type: str = "np", +): + """ + Creates 2D sinusoidal positional embeddings. + + Args: + embed_dim (`int`): + The embedding dimension. + grid_size (`int`): + The size of the grid height and width. + cls_token (`bool`, defaults to `False`): + Whether or not to add a classification token. + extra_tokens (`int`, defaults to `0`): + The number of extra tokens to add. + interpolation_scale (`float`, defaults to `1.0`): + The scale of the interpolation. + + Returns: + pos_embed (`torch.Tensor`): + Shape is either `[grid_size * grid_size, embed_dim]` if not using cls_token, or `[1 + grid_size*grid_size, + embed_dim]` if using cls_token + """ + if output_type == "np": + deprecation_message = ( + "`get_2d_sincos_pos_embed` uses `torch` and supports `device`." + " `from_numpy` is no longer required." + " Pass `output_type='pt' to use the new version now." + ) + deprecate("output_type=='np'", "0.33.0", deprecation_message, standard_warn=False) + return get_2d_sincos_pos_embed_np( + embed_dim=embed_dim, + grid_size=grid_size, + cls_token=cls_token, + extra_tokens=extra_tokens, + interpolation_scale=interpolation_scale, + base_size=base_size, + ) + if isinstance(grid_size, int): + grid_size = (grid_size, grid_size) + + grid_h = ( + torch.arange(grid_size[0], device=device, dtype=torch.float32) + / (grid_size[0] / base_size) + / interpolation_scale + ) + grid_w = ( + torch.arange(grid_size[1], device=device, dtype=torch.float32) + / (grid_size[1] / base_size) + / interpolation_scale + ) + grid = torch.meshgrid(grid_w, grid_h, indexing="xy") # here w goes first + grid = torch.stack(grid, dim=0) + + grid = grid.reshape([2, 1, grid_size[1], grid_size[0]]) + pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid, output_type=output_type) + if cls_token and extra_tokens > 0: + pos_embed = torch.concat([torch.zeros([extra_tokens, embed_dim]), pos_embed], dim=0) + return pos_embed + + +def get_2d_sincos_pos_embed_from_grid(embed_dim, grid, output_type="np"): + r""" + This function generates 2D sinusoidal positional embeddings from a grid. + + Args: + embed_dim (`int`): The embedding dimension. + grid (`torch.Tensor`): Grid of positions with shape `(H * W,)`. + + Returns: + `torch.Tensor`: The 2D sinusoidal positional embeddings with shape `(H * W, embed_dim)` + """ + if output_type == "np": + deprecation_message = ( + "`get_2d_sincos_pos_embed_from_grid` uses `torch` and supports `device`." + " `from_numpy` is no longer required." + " Pass `output_type='pt' to use the new version now." + ) + deprecate("output_type=='np'", "0.33.0", deprecation_message, standard_warn=False) + return get_2d_sincos_pos_embed_from_grid_np( + embed_dim=embed_dim, + grid=grid, + ) + if embed_dim % 2 != 0: + raise ValueError("embed_dim must be divisible by 2") + + # use half of dimensions to encode grid_h + emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0], output_type=output_type) # (H*W, D/2) + emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1], output_type=output_type) # (H*W, D/2) + + emb = torch.concat([emb_h, emb_w], dim=1) # (H*W, D) + return emb + + +def get_1d_sincos_pos_embed_from_grid(embed_dim, pos, output_type="np", flip_sin_to_cos=False, dtype=None): + """ + This function generates 1D positional embeddings from a grid. + + Args: + embed_dim (`int`): The embedding dimension `D` + pos (`torch.Tensor`): 1D tensor of positions with shape `(M,)` + output_type (`str`, *optional*, defaults to `"np"`): Output type. Use `"pt"` for PyTorch tensors. + flip_sin_to_cos (`bool`, *optional*, defaults to `False`): Whether to flip sine and cosine embeddings. + dtype (`torch.dtype`, *optional*): Data type for frequency calculations. If `None`, defaults to + `torch.float32` on MPS devices (which don't support `torch.float64`) and `torch.float64` on other devices. + + Returns: + `torch.Tensor`: Sinusoidal positional embeddings of shape `(M, D)`. + """ + if output_type == "np": + deprecation_message = ( + "`get_1d_sincos_pos_embed_from_grid` uses `torch` and supports `device`." + " `from_numpy` is no longer required." + " Pass `output_type='pt' to use the new version now." + ) + deprecate("output_type=='np'", "0.34.0", deprecation_message, standard_warn=False) + return get_1d_sincos_pos_embed_from_grid_np(embed_dim=embed_dim, pos=pos) + if embed_dim % 2 != 0: + raise ValueError("embed_dim must be divisible by 2") + + # Auto-detect appropriate dtype if not specified + if dtype is None: + dtype = torch.float32 if pos.device.type == "mps" else torch.float64 + + omega = torch.arange(embed_dim // 2, device=pos.device, dtype=dtype) + omega /= embed_dim / 2.0 + omega = 1.0 / 10000**omega # (D/2,) + + pos = pos.reshape(-1) # (M,) + out = torch.outer(pos, omega) # (M, D/2), outer product + + emb_sin = torch.sin(out) # (M, D/2) + emb_cos = torch.cos(out) # (M, D/2) + + emb = torch.concat([emb_sin, emb_cos], dim=1) # (M, D) + + # flip sine and cosine embeddings + if flip_sin_to_cos: + emb = torch.cat([emb[:, embed_dim // 2 :], emb[:, : embed_dim // 2]], dim=1) + + return emb + + +def get_2d_sincos_pos_embed_np( + embed_dim, grid_size, cls_token=False, extra_tokens=0, interpolation_scale=1.0, base_size=16 +): + """ + Creates 2D sinusoidal positional embeddings. + + Args: + embed_dim (`int`): + The embedding dimension. + grid_size (`int`): + The size of the grid height and width. + cls_token (`bool`, defaults to `False`): + Whether or not to add a classification token. + extra_tokens (`int`, defaults to `0`): + The number of extra tokens to add. + interpolation_scale (`float`, defaults to `1.0`): + The scale of the interpolation. + + Returns: + pos_embed (`np.ndarray`): + Shape is either `[grid_size * grid_size, embed_dim]` if not using cls_token, or `[1 + grid_size*grid_size, + embed_dim]` if using cls_token + """ + if isinstance(grid_size, int): + grid_size = (grid_size, grid_size) + + grid_h = np.arange(grid_size[0], dtype=np.float32) / (grid_size[0] / base_size) / interpolation_scale + grid_w = np.arange(grid_size[1], dtype=np.float32) / (grid_size[1] / base_size) / interpolation_scale + grid = np.meshgrid(grid_w, grid_h) # here w goes first + grid = np.stack(grid, axis=0) + + grid = grid.reshape([2, 1, grid_size[1], grid_size[0]]) + pos_embed = get_2d_sincos_pos_embed_from_grid_np(embed_dim, grid) + if cls_token and extra_tokens > 0: + pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0) + return pos_embed + + +def get_2d_sincos_pos_embed_from_grid_np(embed_dim, grid): + r""" + This function generates 2D sinusoidal positional embeddings from a grid. + + Args: + embed_dim (`int`): The embedding dimension. + grid (`np.ndarray`): Grid of positions with shape `(H * W,)`. + + Returns: + `np.ndarray`: The 2D sinusoidal positional embeddings with shape `(H * W, embed_dim)` + """ + if embed_dim % 2 != 0: + raise ValueError("embed_dim must be divisible by 2") + + # use half of dimensions to encode grid_h + emb_h = get_1d_sincos_pos_embed_from_grid_np(embed_dim // 2, grid[0]) # (H*W, D/2) + emb_w = get_1d_sincos_pos_embed_from_grid_np(embed_dim // 2, grid[1]) # (H*W, D/2) + + emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D) + return emb + + +def get_1d_sincos_pos_embed_from_grid_np(embed_dim, pos): + """ + This function generates 1D positional embeddings from a grid. + + Args: + embed_dim (`int`): The embedding dimension `D` + pos (`numpy.ndarray`): 1D tensor of positions with shape `(M,)` + + Returns: + `numpy.ndarray`: Sinusoidal positional embeddings of shape `(M, D)`. + """ + if embed_dim % 2 != 0: + raise ValueError("embed_dim must be divisible by 2") + + omega = np.arange(embed_dim // 2, dtype=np.float64) + omega /= embed_dim / 2.0 + omega = 1.0 / 10000**omega # (D/2,) + + pos = pos.reshape(-1) # (M,) + out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product + + emb_sin = np.sin(out) # (M, D/2) + emb_cos = np.cos(out) # (M, D/2) + + emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D) + return emb + + +class PatchEmbed(nn.Module): + """ + 2D Image to Patch Embedding with support for SD3 cropping. + + Args: + height (`int`, defaults to `224`): The height of the image. + width (`int`, defaults to `224`): The width of the image. + patch_size (`int`, defaults to `16`): The size of the patches. + in_channels (`int`, defaults to `3`): The number of input channels. + embed_dim (`int`, defaults to `768`): The output dimension of the embedding. + layer_norm (`bool`, defaults to `False`): Whether or not to use layer normalization. + flatten (`bool`, defaults to `True`): Whether or not to flatten the output. + bias (`bool`, defaults to `True`): Whether or not to use bias. + interpolation_scale (`float`, defaults to `1`): The scale of the interpolation. + pos_embed_type (`str`, defaults to `"sincos"`): The type of positional embedding. + pos_embed_max_size (`int`, defaults to `None`): The maximum size of the positional embedding. + """ + + def __init__( + self, + height=224, + width=224, + patch_size=16, + in_channels=3, + embed_dim=768, + layer_norm=False, + flatten=True, + bias=True, + interpolation_scale=1, + pos_embed_type="sincos", + pos_embed_max_size=None, # For SD3 cropping + ): + super().__init__() + + num_patches = (height // patch_size) * (width // patch_size) + self.flatten = flatten + self.layer_norm = layer_norm + self.pos_embed_max_size = pos_embed_max_size + + self.proj = nn.Conv2d( + in_channels, embed_dim, kernel_size=(patch_size, patch_size), stride=patch_size, bias=bias + ) + if layer_norm: + self.norm = nn.LayerNorm(embed_dim, elementwise_affine=False, eps=1e-6) + else: + self.norm = None + + self.patch_size = patch_size + self.height, self.width = height // patch_size, width // patch_size + self.base_size = height // patch_size + self.interpolation_scale = interpolation_scale + + # Calculate positional embeddings based on max size or default + if pos_embed_max_size: + grid_size = pos_embed_max_size + else: + grid_size = int(num_patches**0.5) + + if pos_embed_type is None: + self.pos_embed = None + elif pos_embed_type == "sincos": + pos_embed = get_2d_sincos_pos_embed( + embed_dim, + grid_size, + base_size=self.base_size, + interpolation_scale=self.interpolation_scale, + output_type="pt", + ) + persistent = True if pos_embed_max_size else False + self.register_buffer("pos_embed", pos_embed.float().unsqueeze(0), persistent=persistent) + else: + raise ValueError(f"Unsupported pos_embed_type: {pos_embed_type}") + + def cropped_pos_embed(self, height, width): + """Crops positional embeddings for SD3 compatibility.""" + if self.pos_embed_max_size is None: + raise ValueError("`pos_embed_max_size` must be set for cropping.") + + height = height // self.patch_size + width = width // self.patch_size + if height > self.pos_embed_max_size: + raise ValueError( + f"Height ({height}) cannot be greater than `pos_embed_max_size`: {self.pos_embed_max_size}." + ) + if width > self.pos_embed_max_size: + raise ValueError( + f"Width ({width}) cannot be greater than `pos_embed_max_size`: {self.pos_embed_max_size}." + ) + + top = (self.pos_embed_max_size - height) // 2 + left = (self.pos_embed_max_size - width) // 2 + spatial_pos_embed = self.pos_embed.reshape(1, self.pos_embed_max_size, self.pos_embed_max_size, -1) + spatial_pos_embed = spatial_pos_embed[:, top : top + height, left : left + width, :] + spatial_pos_embed = spatial_pos_embed.reshape(1, -1, spatial_pos_embed.shape[-1]) + return spatial_pos_embed + + def forward(self, latent): + if self.pos_embed_max_size is not None: + height, width = latent.shape[-2:] + else: + height, width = latent.shape[-2] // self.patch_size, latent.shape[-1] // self.patch_size + latent = self.proj(latent) + if self.flatten: + latent = latent.flatten(2).transpose(1, 2) # BCHW -> BNC + if self.layer_norm: + latent = self.norm(latent) + if self.pos_embed is None: + return latent.to(latent.dtype) + # Interpolate or crop positional embeddings as needed + if self.pos_embed_max_size: + pos_embed = self.cropped_pos_embed(height, width) + else: + if self.height != height or self.width != width: + pos_embed = get_2d_sincos_pos_embed( + embed_dim=self.pos_embed.shape[-1], + grid_size=(height, width), + base_size=self.base_size, + interpolation_scale=self.interpolation_scale, + device=latent.device, + output_type="pt", + ) + pos_embed = pos_embed.float().unsqueeze(0) + else: + pos_embed = self.pos_embed + + return (latent + pos_embed).to(latent.dtype) + + +class LuminaPatchEmbed(nn.Module): + """ + 2D Image to Patch Embedding with support for Lumina-T2X + + Args: + patch_size (`int`, defaults to `2`): The size of the patches. + in_channels (`int`, defaults to `4`): The number of input channels. + embed_dim (`int`, defaults to `768`): The output dimension of the embedding. + bias (`bool`, defaults to `True`): Whether or not to use bias. + """ + + def __init__(self, patch_size=2, in_channels=4, embed_dim=768, bias=True): + super().__init__() + self.patch_size = patch_size + self.proj = nn.Linear( + in_features=patch_size * patch_size * in_channels, + out_features=embed_dim, + bias=bias, + ) + + def forward(self, x, freqs_cis): + """ + Patchifies and embeds the input tensor(s). + + Args: + x (list[torch.Tensor] | torch.Tensor): The input tensor(s) to be patchified and embedded. + + Returns: + tuple[torch.Tensor, torch.Tensor, list[tuple[int, int]], torch.Tensor]: A tuple containing the patchified + and embedded tensor(s), the mask indicating the valid patches, the original image size(s), and the + frequency tensor(s). + """ + freqs_cis = freqs_cis.to(x[0].device) + patch_height = patch_width = self.patch_size + batch_size, channel, height, width = x.size() + height_tokens, width_tokens = height // patch_height, width // patch_width + + x = x.view(batch_size, channel, height_tokens, patch_height, width_tokens, patch_width).permute( + 0, 2, 4, 1, 3, 5 + ) + x = x.flatten(3) + x = self.proj(x) + x = x.flatten(1, 2) + + mask = torch.ones(x.shape[0], x.shape[1], dtype=torch.int32, device=x.device) + + return ( + x, + mask, + [(height, width)] * batch_size, + freqs_cis[:height_tokens, :width_tokens].flatten(0, 1).unsqueeze(0), + ) + + +class CogVideoXPatchEmbed(nn.Module): + def __init__( + self, + patch_size: int = 2, + patch_size_t: int | None = None, + in_channels: int = 16, + embed_dim: int = 1920, + text_embed_dim: int = 4096, + bias: bool = True, + sample_width: int = 90, + sample_height: int = 60, + sample_frames: int = 49, + temporal_compression_ratio: int = 4, + max_text_seq_length: int = 226, + spatial_interpolation_scale: float = 1.875, + temporal_interpolation_scale: float = 1.0, + use_positional_embeddings: bool = True, + use_learned_positional_embeddings: bool = True, + ) -> None: + super().__init__() + + self.patch_size = patch_size + self.patch_size_t = patch_size_t + self.embed_dim = embed_dim + self.sample_height = sample_height + self.sample_width = sample_width + self.sample_frames = sample_frames + self.temporal_compression_ratio = temporal_compression_ratio + self.max_text_seq_length = max_text_seq_length + self.spatial_interpolation_scale = spatial_interpolation_scale + self.temporal_interpolation_scale = temporal_interpolation_scale + self.use_positional_embeddings = use_positional_embeddings + self.use_learned_positional_embeddings = use_learned_positional_embeddings + + if patch_size_t is None: + # CogVideoX 1.0 checkpoints + self.proj = nn.Conv2d( + in_channels, embed_dim, kernel_size=(patch_size, patch_size), stride=patch_size, bias=bias + ) + else: + # CogVideoX 1.5 checkpoints + self.proj = nn.Linear(in_channels * patch_size * patch_size * patch_size_t, embed_dim) + + self.text_proj = nn.Linear(text_embed_dim, embed_dim) + + if use_positional_embeddings or use_learned_positional_embeddings: + persistent = use_learned_positional_embeddings + pos_embedding = self._get_positional_embeddings(sample_height, sample_width, sample_frames) + self.register_buffer("pos_embedding", pos_embedding, persistent=persistent) + + def _get_positional_embeddings( + self, sample_height: int, sample_width: int, sample_frames: int, device: torch.device | None = None + ) -> torch.Tensor: + post_patch_height = sample_height // self.patch_size + post_patch_width = sample_width // self.patch_size + post_time_compression_frames = (sample_frames - 1) // self.temporal_compression_ratio + 1 + num_patches = post_patch_height * post_patch_width * post_time_compression_frames + + pos_embedding = get_3d_sincos_pos_embed( + self.embed_dim, + (post_patch_width, post_patch_height), + post_time_compression_frames, + self.spatial_interpolation_scale, + self.temporal_interpolation_scale, + device=device, + output_type="pt", + ) + pos_embedding = pos_embedding.flatten(0, 1) + joint_pos_embedding = pos_embedding.new_zeros( + 1, self.max_text_seq_length + num_patches, self.embed_dim, requires_grad=False + ) + joint_pos_embedding.data[:, self.max_text_seq_length :].copy_(pos_embedding) + + return joint_pos_embedding + + def forward(self, text_embeds: torch.Tensor, image_embeds: torch.Tensor): + r""" + Args: + text_embeds (`torch.Tensor`): + Input text embeddings. Expected shape: (batch_size, seq_length, embedding_dim). + image_embeds (`torch.Tensor`): + Input image embeddings. Expected shape: (batch_size, num_frames, channels, height, width). + """ + text_embeds = self.text_proj(text_embeds) + + batch_size, num_frames, channels, height, width = image_embeds.shape + + if self.patch_size_t is None: + image_embeds = image_embeds.reshape(-1, channels, height, width) + image_embeds = self.proj(image_embeds) + image_embeds = image_embeds.view(batch_size, num_frames, *image_embeds.shape[1:]) + image_embeds = image_embeds.flatten(3).transpose(2, 3) # [batch, num_frames, height x width, channels] + image_embeds = image_embeds.flatten(1, 2) # [batch, num_frames x height x width, channels] + else: + p = self.patch_size + p_t = self.patch_size_t + + image_embeds = image_embeds.permute(0, 1, 3, 4, 2) + image_embeds = image_embeds.reshape( + batch_size, num_frames // p_t, p_t, height // p, p, width // p, p, channels + ) + image_embeds = image_embeds.permute(0, 1, 3, 5, 7, 2, 4, 6).flatten(4, 7).flatten(1, 3) + image_embeds = self.proj(image_embeds) + + embeds = torch.cat( + [text_embeds, image_embeds], dim=1 + ).contiguous() # [batch, seq_length + num_frames x height x width, channels] + + if self.use_positional_embeddings or self.use_learned_positional_embeddings: + if self.use_learned_positional_embeddings and (self.sample_width != width or self.sample_height != height): + raise ValueError( + "It is currently not possible to generate videos at a different resolution that the defaults. This should only be the case with 'THUDM/CogVideoX-5b-I2V'." + "If you think this is incorrect, please open an issue at https://github.com/huggingface/diffusers/issues." + ) + + pre_time_compression_frames = (num_frames - 1) * self.temporal_compression_ratio + 1 + + if ( + self.sample_height != height + or self.sample_width != width + or self.sample_frames != pre_time_compression_frames + ): + pos_embedding = self._get_positional_embeddings( + height, width, pre_time_compression_frames, device=embeds.device + ) + else: + pos_embedding = self.pos_embedding + + pos_embedding = pos_embedding.to(dtype=embeds.dtype) + embeds = embeds + pos_embedding + + return embeds + + +class CogView3PlusPatchEmbed(nn.Module): + def __init__( + self, + in_channels: int = 16, + hidden_size: int = 2560, + patch_size: int = 2, + text_hidden_size: int = 4096, + pos_embed_max_size: int = 128, + ): + super().__init__() + self.in_channels = in_channels + self.hidden_size = hidden_size + self.patch_size = patch_size + self.text_hidden_size = text_hidden_size + self.pos_embed_max_size = pos_embed_max_size + # Linear projection for image patches + self.proj = nn.Linear(in_channels * patch_size**2, hidden_size) + + # Linear projection for text embeddings + self.text_proj = nn.Linear(text_hidden_size, hidden_size) + + pos_embed = get_2d_sincos_pos_embed( + hidden_size, pos_embed_max_size, base_size=pos_embed_max_size, output_type="pt" + ) + pos_embed = pos_embed.reshape(pos_embed_max_size, pos_embed_max_size, hidden_size) + self.register_buffer("pos_embed", pos_embed.float(), persistent=False) + + def forward(self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, channel, height, width = hidden_states.shape + + if height % self.patch_size != 0 or width % self.patch_size != 0: + raise ValueError("Height and width must be divisible by patch size") + + height = height // self.patch_size + width = width // self.patch_size + hidden_states = hidden_states.view(batch_size, channel, height, self.patch_size, width, self.patch_size) + hidden_states = hidden_states.permute(0, 2, 4, 1, 3, 5).contiguous() + hidden_states = hidden_states.view(batch_size, height * width, channel * self.patch_size * self.patch_size) + + # Project the patches + hidden_states = self.proj(hidden_states) + encoder_hidden_states = self.text_proj(encoder_hidden_states) + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + # Calculate text_length + text_length = encoder_hidden_states.shape[1] + + image_pos_embed = self.pos_embed[:height, :width].reshape(height * width, -1) + text_pos_embed = torch.zeros( + (text_length, self.hidden_size), dtype=image_pos_embed.dtype, device=image_pos_embed.device + ) + pos_embed = torch.cat([text_pos_embed, image_pos_embed], dim=0)[None, ...] + + return (hidden_states + pos_embed).to(hidden_states.dtype) + + +def get_3d_rotary_pos_embed( + embed_dim, + crops_coords, + grid_size, + temporal_size, + theta: int = 10000, + use_real: bool = True, + grid_type: str = "linspace", + max_size: tuple[int, int] | None = None, + device: torch.device | None = None, +) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: + """ + RoPE for video tokens with 3D structure. + + Args: + embed_dim: (`int`): + The embedding dimension size, corresponding to hidden_size_head. + crops_coords (`tuple[int]`): + The top-left and bottom-right coordinates of the crop. + grid_size (`tuple[int]`): + The grid size of the spatial positional embedding (height, width). + temporal_size (`int`): + The size of the temporal dimension. + theta (`float`): + Scaling factor for frequency computation. + grid_type (`str`): + Whether to use "linspace" or "slice" to compute grids. + + Returns: + `torch.Tensor`: positional embedding with shape `(temporal_size * grid_size[0] * grid_size[1], embed_dim/2)`. + """ + if use_real is not True: + raise ValueError(" `use_real = False` is not currently supported for get_3d_rotary_pos_embed") + + if grid_type == "linspace": + start, stop = crops_coords + grid_size_h, grid_size_w = grid_size + grid_h = torch.linspace( + start[0], stop[0] * (grid_size_h - 1) / grid_size_h, grid_size_h, device=device, dtype=torch.float32 + ) + grid_w = torch.linspace( + start[1], stop[1] * (grid_size_w - 1) / grid_size_w, grid_size_w, device=device, dtype=torch.float32 + ) + grid_t = torch.arange(temporal_size, device=device, dtype=torch.float32) + grid_t = torch.linspace( + 0, temporal_size * (temporal_size - 1) / temporal_size, temporal_size, device=device, dtype=torch.float32 + ) + elif grid_type == "slice": + max_h, max_w = max_size + grid_size_h, grid_size_w = grid_size + grid_h = torch.arange(max_h, device=device, dtype=torch.float32) + grid_w = torch.arange(max_w, device=device, dtype=torch.float32) + grid_t = torch.arange(temporal_size, device=device, dtype=torch.float32) + else: + raise ValueError("Invalid value passed for `grid_type`.") + + # Compute dimensions for each axis + dim_t = embed_dim // 4 + dim_h = embed_dim // 8 * 3 + dim_w = embed_dim // 8 * 3 + + # Temporal frequencies + freqs_t = get_1d_rotary_pos_embed(dim_t, grid_t, theta=theta, use_real=True) + # Spatial frequencies for height and width + freqs_h = get_1d_rotary_pos_embed(dim_h, grid_h, theta=theta, use_real=True) + freqs_w = get_1d_rotary_pos_embed(dim_w, grid_w, theta=theta, use_real=True) + + # BroadCast and concatenate temporal and spaial frequencie (height and width) into a 3d tensor + def combine_time_height_width(freqs_t, freqs_h, freqs_w): + freqs_t = freqs_t[:, None, None, :].expand( + -1, grid_size_h, grid_size_w, -1 + ) # temporal_size, grid_size_h, grid_size_w, dim_t + freqs_h = freqs_h[None, :, None, :].expand( + temporal_size, -1, grid_size_w, -1 + ) # temporal_size, grid_size_h, grid_size_2, dim_h + freqs_w = freqs_w[None, None, :, :].expand( + temporal_size, grid_size_h, -1, -1 + ) # temporal_size, grid_size_h, grid_size_2, dim_w + + freqs = torch.cat( + [freqs_t, freqs_h, freqs_w], dim=-1 + ) # temporal_size, grid_size_h, grid_size_w, (dim_t + dim_h + dim_w) + freqs = freqs.view( + temporal_size * grid_size_h * grid_size_w, -1 + ) # (temporal_size * grid_size_h * grid_size_w), (dim_t + dim_h + dim_w) + return freqs + + t_cos, t_sin = freqs_t # both t_cos and t_sin has shape: temporal_size, dim_t + h_cos, h_sin = freqs_h # both h_cos and h_sin has shape: grid_size_h, dim_h + w_cos, w_sin = freqs_w # both w_cos and w_sin has shape: grid_size_w, dim_w + + if grid_type == "slice": + t_cos, t_sin = t_cos[:temporal_size], t_sin[:temporal_size] + h_cos, h_sin = h_cos[:grid_size_h], h_sin[:grid_size_h] + w_cos, w_sin = w_cos[:grid_size_w], w_sin[:grid_size_w] + + cos = combine_time_height_width(t_cos, h_cos, w_cos) + sin = combine_time_height_width(t_sin, h_sin, w_sin) + return cos, sin + + +def get_3d_rotary_pos_embed_allegro( + embed_dim, + crops_coords, + grid_size, + temporal_size, + interpolation_scale: tuple[float, float, float] = (1.0, 1.0, 1.0), + theta: int = 10000, + device: torch.device | None = None, +) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: + # TODO(aryan): docs + start, stop = crops_coords + grid_size_h, grid_size_w = grid_size + interpolation_scale_t, interpolation_scale_h, interpolation_scale_w = interpolation_scale + grid_t = torch.linspace( + 0, temporal_size * (temporal_size - 1) / temporal_size, temporal_size, device=device, dtype=torch.float32 + ) + grid_h = torch.linspace( + start[0], stop[0] * (grid_size_h - 1) / grid_size_h, grid_size_h, device=device, dtype=torch.float32 + ) + grid_w = torch.linspace( + start[1], stop[1] * (grid_size_w - 1) / grid_size_w, grid_size_w, device=device, dtype=torch.float32 + ) + + # Compute dimensions for each axis + dim_t = embed_dim // 3 + dim_h = embed_dim // 3 + dim_w = embed_dim // 3 + + # Temporal frequencies + freqs_t = get_1d_rotary_pos_embed( + dim_t, grid_t / interpolation_scale_t, theta=theta, use_real=True, repeat_interleave_real=False + ) + # Spatial frequencies for height and width + freqs_h = get_1d_rotary_pos_embed( + dim_h, grid_h / interpolation_scale_h, theta=theta, use_real=True, repeat_interleave_real=False + ) + freqs_w = get_1d_rotary_pos_embed( + dim_w, grid_w / interpolation_scale_w, theta=theta, use_real=True, repeat_interleave_real=False + ) + + return freqs_t, freqs_h, freqs_w, grid_t, grid_h, grid_w + + +def get_2d_rotary_pos_embed( + embed_dim, crops_coords, grid_size, use_real=True, device: torch.device | None = None, output_type: str = "np" +): + """ + RoPE for image tokens with 2d structure. + + Args: + embed_dim: (`int`): + The embedding dimension size + crops_coords (`tuple[int]`) + The top-left and bottom-right coordinates of the crop. + grid_size (`tuple[int]`): + The grid size of the positional embedding. + use_real (`bool`): + If True, return real part and imaginary part separately. Otherwise, return complex numbers. + device: (`torch.device`, **optional**): + The device used to create tensors. + + Returns: + `torch.Tensor`: positional embedding with shape `( grid_size * grid_size, embed_dim/2)`. + """ + if output_type == "np": + deprecation_message = ( + "`get_2d_sincos_pos_embed` uses `torch` and supports `device`." + " `from_numpy` is no longer required." + " Pass `output_type='pt' to use the new version now." + ) + deprecate("output_type=='np'", "0.33.0", deprecation_message, standard_warn=False) + return _get_2d_rotary_pos_embed_np( + embed_dim=embed_dim, + crops_coords=crops_coords, + grid_size=grid_size, + use_real=use_real, + ) + start, stop = crops_coords + # scale end by (steps−1)/steps matches np.linspace(..., endpoint=False) + grid_h = torch.linspace( + start[0], stop[0] * (grid_size[0] - 1) / grid_size[0], grid_size[0], device=device, dtype=torch.float32 + ) + grid_w = torch.linspace( + start[1], stop[1] * (grid_size[1] - 1) / grid_size[1], grid_size[1], device=device, dtype=torch.float32 + ) + grid = torch.meshgrid(grid_w, grid_h, indexing="xy") + grid = torch.stack(grid, dim=0) # [2, W, H] + + grid = grid.reshape([2, 1, *grid.shape[1:]]) + pos_embed = get_2d_rotary_pos_embed_from_grid(embed_dim, grid, use_real=use_real) + return pos_embed + + +def _get_2d_rotary_pos_embed_np(embed_dim, crops_coords, grid_size, use_real=True): + """ + RoPE for image tokens with 2d structure. + + Args: + embed_dim: (`int`): + The embedding dimension size + crops_coords (`tuple[int]`) + The top-left and bottom-right coordinates of the crop. + grid_size (`tuple[int]`): + The grid size of the positional embedding. + use_real (`bool`): + If True, return real part and imaginary part separately. Otherwise, return complex numbers. + + Returns: + `torch.Tensor`: positional embedding with shape `( grid_size * grid_size, embed_dim/2)`. + """ + start, stop = crops_coords + grid_h = np.linspace(start[0], stop[0], grid_size[0], endpoint=False, dtype=np.float32) + grid_w = np.linspace(start[1], stop[1], grid_size[1], endpoint=False, dtype=np.float32) + grid = np.meshgrid(grid_w, grid_h) # here w goes first + grid = np.stack(grid, axis=0) # [2, W, H] + + grid = grid.reshape([2, 1, *grid.shape[1:]]) + pos_embed = get_2d_rotary_pos_embed_from_grid(embed_dim, grid, use_real=use_real) + return pos_embed + + +def get_2d_rotary_pos_embed_from_grid(embed_dim, grid, use_real=False): + """ + Get 2D RoPE from grid. + + Args: + embed_dim: (`int`): + The embedding dimension size, corresponding to hidden_size_head. + grid (`np.ndarray`): + The grid of the positional embedding. + use_real (`bool`): + If True, return real part and imaginary part separately. Otherwise, return complex numbers. + + Returns: + `torch.Tensor`: positional embedding with shape `( grid_size * grid_size, embed_dim/2)`. + """ + assert embed_dim % 4 == 0 + + # use half of dimensions to encode grid_h + emb_h = get_1d_rotary_pos_embed( + embed_dim // 2, grid[0].reshape(-1), use_real=use_real + ) # (H*W, D/2) if use_real else (H*W, D/4) + emb_w = get_1d_rotary_pos_embed( + embed_dim // 2, grid[1].reshape(-1), use_real=use_real + ) # (H*W, D/2) if use_real else (H*W, D/4) + + if use_real: + cos = torch.cat([emb_h[0], emb_w[0]], dim=1) # (H*W, D) + sin = torch.cat([emb_h[1], emb_w[1]], dim=1) # (H*W, D) + return cos, sin + else: + emb = torch.cat([emb_h, emb_w], dim=1) # (H*W, D/2) + return emb + + +def get_2d_rotary_pos_embed_lumina(embed_dim, len_h, len_w, linear_factor=1.0, ntk_factor=1.0): + """ + Get 2D RoPE from grid. + + Args: + embed_dim: (`int`): + The embedding dimension size, corresponding to hidden_size_head. + grid (`np.ndarray`): + The grid of the positional embedding. + linear_factor (`float`): + The linear factor of the positional embedding, which is used to scale the positional embedding in the linear + layer. + ntk_factor (`float`): + The ntk factor of the positional embedding, which is used to scale the positional embedding in the ntk layer. + + Returns: + `torch.Tensor`: positional embedding with shape `( grid_size * grid_size, embed_dim/2)`. + """ + assert embed_dim % 4 == 0 + + emb_h = get_1d_rotary_pos_embed( + embed_dim // 2, len_h, linear_factor=linear_factor, ntk_factor=ntk_factor + ) # (H, D/4) + emb_w = get_1d_rotary_pos_embed( + embed_dim // 2, len_w, linear_factor=linear_factor, ntk_factor=ntk_factor + ) # (W, D/4) + emb_h = emb_h.view(len_h, 1, embed_dim // 4, 1).repeat(1, len_w, 1, 1) # (H, W, D/4, 1) + emb_w = emb_w.view(1, len_w, embed_dim // 4, 1).repeat(len_h, 1, 1, 1) # (H, W, D/4, 1) + + emb = torch.cat([emb_h, emb_w], dim=-1).flatten(2) # (H, W, D/2) + return emb + + +def get_1d_rotary_pos_embed( + dim: int, + pos: np.ndarray | int, + theta: float = 10000.0, + use_real=False, + linear_factor=1.0, + ntk_factor=1.0, + repeat_interleave_real=True, + freqs_dtype=torch.float32, # torch.float32, torch.float64 (flux) +): + """ + Precompute the frequency tensor for complex exponentials (cis) with given dimensions. + + This function calculates a frequency tensor with complex exponentials using the given dimension 'dim' and the end + index 'end'. The 'theta' parameter scales the frequencies. The returned tensor contains complex values in complex64 + data type. + + Args: + dim (`int`): Dimension of the frequency tensor. + pos (`np.ndarray` or `int`): Position indices for the frequency tensor. [S] or scalar + theta (`float`, *optional*, defaults to 10000.0): + Scaling factor for frequency computation. Defaults to 10000.0. + use_real (`bool`, *optional*): + If True, return real part and imaginary part separately. Otherwise, return complex numbers. + linear_factor (`float`, *optional*, defaults to 1.0): + Scaling factor for the context extrapolation. Defaults to 1.0. + ntk_factor (`float`, *optional*, defaults to 1.0): + Scaling factor for the NTK-Aware RoPE. Defaults to 1.0. + repeat_interleave_real (`bool`, *optional*, defaults to `True`): + If `True` and `use_real`, real part and imaginary part are each interleaved with themselves to reach `dim`. + Otherwise, they are concateanted with themselves. + freqs_dtype (`torch.float32` or `torch.float64`, *optional*, defaults to `torch.float32`): + the dtype of the frequency tensor. + Returns: + `torch.Tensor`: Precomputed frequency tensor with complex exponentials. [S, D/2] + """ + assert dim % 2 == 0 + + if isinstance(pos, int): + pos = torch.arange(pos) + if isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos) # type: ignore # [S] + + theta = theta * ntk_factor + freqs = ( + 1.0 / (theta ** (torch.arange(0, dim, 2, dtype=freqs_dtype, device=pos.device) / dim)) / linear_factor + ) # [D/2] + freqs = torch.outer(pos, freqs) # type: ignore # [S, D/2] + is_npu = freqs.device.type == "npu" + if is_npu: + freqs = freqs.float() + if use_real and repeat_interleave_real: + # flux, hunyuan-dit, cogvideox + freqs_cos = freqs.cos().repeat_interleave(2, dim=1, output_size=freqs.shape[1] * 2).float() # [S, D] + freqs_sin = freqs.sin().repeat_interleave(2, dim=1, output_size=freqs.shape[1] * 2).float() # [S, D] + return freqs_cos, freqs_sin + elif use_real: + # stable audio, allegro + freqs_cos = torch.cat([freqs.cos(), freqs.cos()], dim=-1).float() # [S, D] + freqs_sin = torch.cat([freqs.sin(), freqs.sin()], dim=-1).float() # [S, D] + return freqs_cos, freqs_sin + else: + # lumina + freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64 # [S, D/2] + return freqs_cis + + +def apply_rotary_emb( + x: torch.Tensor, + freqs_cis: torch.Tensor | tuple[torch.Tensor], + use_real: bool = True, + use_real_unbind_dim: int = -1, + sequence_dim: int = 2, +) -> tuple[torch.Tensor, torch.Tensor]: + """ + Apply rotary embeddings to input tensors using the given frequency tensor. This function applies rotary embeddings + to the given query or key 'x' tensors using the provided frequency tensor 'freqs_cis'. The input tensors are + reshaped as complex numbers, and the frequency tensor is reshaped for broadcasting compatibility. The resulting + tensors contain rotary embeddings and are returned as real tensors. + + Args: + x (`torch.Tensor`): + Query or key tensor to apply rotary embeddings. [B, H, S, D] xk (torch.Tensor): Key tensor to apply + freqs_cis (`tuple[torch.Tensor]`): Precomputed frequency tensor for complex exponentials. ([S, D], [S, D],) + + Returns: + tuple[torch.Tensor, torch.Tensor]: tuple of modified query tensor and key tensor with rotary embeddings. + """ + if use_real: + cos, sin = freqs_cis # [S, D] + if sequence_dim == 2: + cos = cos[None, None, :, :] + sin = sin[None, None, :, :] + elif sequence_dim == 1: + cos = cos[None, :, None, :] + sin = sin[None, :, None, :] + else: + raise ValueError(f"`sequence_dim={sequence_dim}` but should be 1 or 2.") + + cos, sin = cos.to(x.device), sin.to(x.device) + + if use_real_unbind_dim == -1: + # Used for flux, cogvideox, hunyuan-dit + x_real, x_imag = x.reshape(*x.shape[:-1], -1, 2).unbind(-1) # [B, H, S, D//2] + x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(3) + elif use_real_unbind_dim == -2: + # Used for Stable Audio, OmniGen, CogView4 and Cosmos + x_real, x_imag = x.reshape(*x.shape[:-1], 2, -1).unbind(-2) # [B, H, S, D//2] + x_rotated = torch.cat([-x_imag, x_real], dim=-1) + else: + raise ValueError(f"`use_real_unbind_dim={use_real_unbind_dim}` but should be -1 or -2.") + + out = (x.float() * cos + x_rotated.float() * sin).to(x.dtype) + + return out + else: + # used for lumina + x_rotated = torch.view_as_complex(x.float().reshape(*x.shape[:-1], -1, 2)) + freqs_cis = freqs_cis.unsqueeze(2) + x_out = torch.view_as_real(x_rotated * freqs_cis).flatten(3) + + return x_out.type_as(x) + + +def apply_rotary_emb_allegro(x: torch.Tensor, freqs_cis, positions): + # TODO(aryan): rewrite + def apply_1d_rope(tokens, pos, cos, sin): + cos = F.embedding(pos, cos)[:, None, :, :] + sin = F.embedding(pos, sin)[:, None, :, :] + x1, x2 = tokens[..., : tokens.shape[-1] // 2], tokens[..., tokens.shape[-1] // 2 :] + tokens_rotated = torch.cat((-x2, x1), dim=-1) + return (tokens.float() * cos + tokens_rotated.float() * sin).to(tokens.dtype) + + (t_cos, t_sin), (h_cos, h_sin), (w_cos, w_sin) = freqs_cis + t, h, w = x.chunk(3, dim=-1) + t = apply_1d_rope(t, positions[0], t_cos, t_sin) + h = apply_1d_rope(h, positions[1], h_cos, h_sin) + w = apply_1d_rope(w, positions[2], w_cos, w_sin) + x = torch.cat([t, h, w], dim=-1) + return x + + +class TimestepEmbedding(nn.Module): + def __init__( + self, + in_channels: int, + time_embed_dim: int, + act_fn: str = "silu", + out_dim: int = None, + post_act_fn: str | None = None, + cond_proj_dim=None, + sample_proj_bias=True, + ): + super().__init__() + + self.linear_1 = nn.Linear(in_channels, time_embed_dim, sample_proj_bias) + + if cond_proj_dim is not None: + self.cond_proj = nn.Linear(cond_proj_dim, in_channels, bias=False) + else: + self.cond_proj = None + + self.act = get_activation(act_fn) + + if out_dim is not None: + time_embed_dim_out = out_dim + else: + time_embed_dim_out = time_embed_dim + self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim_out, sample_proj_bias) + + if post_act_fn is None: + self.post_act = None + else: + self.post_act = get_activation(post_act_fn) + + def forward(self, sample, condition=None): + if condition is not None: + sample = sample + self.cond_proj(condition) + sample = self.linear_1(sample) + + if self.act is not None: + sample = self.act(sample) + + sample = self.linear_2(sample) + + if self.post_act is not None: + sample = self.post_act(sample) + return sample + + +class Timesteps(nn.Module): + def __init__(self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float, scale: int = 1): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + self.downscale_freq_shift = downscale_freq_shift + self.scale = scale + + def forward(self, timesteps: torch.Tensor) -> torch.Tensor: + t_emb = get_timestep_embedding( + timesteps, + self.num_channels, + flip_sin_to_cos=self.flip_sin_to_cos, + downscale_freq_shift=self.downscale_freq_shift, + scale=self.scale, + ) + return t_emb + + +class GaussianFourierProjection(nn.Module): + """Gaussian Fourier embeddings for noise levels.""" + + def __init__( + self, embedding_size: int = 256, scale: float = 1.0, set_W_to_weight=True, log=True, flip_sin_to_cos=False + ): + super().__init__() + self.weight = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False) + self.log = log + self.flip_sin_to_cos = flip_sin_to_cos + + if set_W_to_weight: + # to delete later + del self.weight + self.W = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False) + self.weight = self.W + del self.W + + def forward(self, x): + if self.log: + x = torch.log(x) + + x_proj = x[:, None] * self.weight[None, :] * 2 * np.pi + + if self.flip_sin_to_cos: + out = torch.cat([torch.cos(x_proj), torch.sin(x_proj)], dim=-1) + else: + out = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1) + return out + + +class SinusoidalPositionalEmbedding(nn.Module): + """Apply positional information to a sequence of embeddings. + + Takes in a sequence of embeddings with shape (batch_size, seq_length, embed_dim) and adds positional embeddings to + them + + Args: + embed_dim: (int): Dimension of the positional embedding. + max_seq_length: Maximum sequence length to apply positional embeddings + + """ + + def __init__(self, embed_dim: int, max_seq_length: int = 32): + super().__init__() + position = torch.arange(max_seq_length).unsqueeze(1) + div_term = torch.exp(torch.arange(0, embed_dim, 2) * (-math.log(10000.0) / embed_dim)) + pe = torch.zeros(1, max_seq_length, embed_dim) + pe[0, :, 0::2] = torch.sin(position * div_term) + pe[0, :, 1::2] = torch.cos(position * div_term) + self.register_buffer("pe", pe) + + def forward(self, x): + _, seq_length, _ = x.shape + x = x + self.pe[:, :seq_length] + return x + + +class ImagePositionalEmbeddings(nn.Module): + """ + Converts latent image classes into vector embeddings. Sums the vector embeddings with positional embeddings for the + height and width of the latent space. + + For more details, see figure 10 of the dall-e paper: https://huggingface.co/papers/2102.12092 + + For VQ-diffusion: + + Output vector embeddings are used as input for the transformer. + + Note that the vector embeddings for the transformer are different than the vector embeddings from the VQVAE. + + Args: + num_embed (`int`): + Number of embeddings for the latent pixels embeddings. + height (`int`): + Height of the latent image i.e. the number of height embeddings. + width (`int`): + Width of the latent image i.e. the number of width embeddings. + embed_dim (`int`): + Dimension of the produced vector embeddings. Used for the latent pixel, height, and width embeddings. + """ + + def __init__( + self, + num_embed: int, + height: int, + width: int, + embed_dim: int, + ): + super().__init__() + + self.height = height + self.width = width + self.num_embed = num_embed + self.embed_dim = embed_dim + + self.emb = nn.Embedding(self.num_embed, embed_dim) + self.height_emb = nn.Embedding(self.height, embed_dim) + self.width_emb = nn.Embedding(self.width, embed_dim) + + def forward(self, index): + emb = self.emb(index) + + height_emb = self.height_emb(torch.arange(self.height, device=index.device).view(1, self.height)) + + # 1 x H x D -> 1 x H x 1 x D + height_emb = height_emb.unsqueeze(2) + + width_emb = self.width_emb(torch.arange(self.width, device=index.device).view(1, self.width)) + + # 1 x W x D -> 1 x 1 x W x D + width_emb = width_emb.unsqueeze(1) + + pos_emb = height_emb + width_emb + + # 1 x H x W x D -> 1 x L xD + pos_emb = pos_emb.view(1, self.height * self.width, -1) + + emb = emb + pos_emb[:, : emb.shape[1], :] + + return emb + + +class LabelEmbedding(nn.Module): + """ + Embeds class labels into vector representations. Also handles label dropout for classifier-free guidance. + + Args: + num_classes (`int`): The number of classes. + hidden_size (`int`): The size of the vector embeddings. + dropout_prob (`float`): The probability of dropping a label. + """ + + def __init__(self, num_classes, hidden_size, dropout_prob): + super().__init__() + use_cfg_embedding = dropout_prob > 0 + self.embedding_table = nn.Embedding(num_classes + use_cfg_embedding, hidden_size) + self.num_classes = num_classes + self.dropout_prob = dropout_prob + + def token_drop(self, labels, force_drop_ids=None): + """ + Drops labels to enable classifier-free guidance. + """ + if force_drop_ids is None: + drop_ids = torch.rand(labels.shape[0], device=labels.device) < self.dropout_prob + else: + drop_ids = torch.tensor(force_drop_ids == 1) + labels = torch.where(drop_ids, self.num_classes, labels) + return labels + + def forward(self, labels: torch.LongTensor, force_drop_ids=None): + use_dropout = self.dropout_prob > 0 + if (self.training and use_dropout) or (force_drop_ids is not None): + labels = self.token_drop(labels, force_drop_ids) + embeddings = self.embedding_table(labels) + return embeddings + + +class TextImageProjection(nn.Module): + def __init__( + self, + text_embed_dim: int = 1024, + image_embed_dim: int = 768, + cross_attention_dim: int = 768, + num_image_text_embeds: int = 10, + ): + super().__init__() + + self.num_image_text_embeds = num_image_text_embeds + self.image_embeds = nn.Linear(image_embed_dim, self.num_image_text_embeds * cross_attention_dim) + self.text_proj = nn.Linear(text_embed_dim, cross_attention_dim) + + def forward(self, text_embeds: torch.Tensor, image_embeds: torch.Tensor): + batch_size = text_embeds.shape[0] + + # image + image_text_embeds = self.image_embeds(image_embeds) + image_text_embeds = image_text_embeds.reshape(batch_size, self.num_image_text_embeds, -1) + + # text + text_embeds = self.text_proj(text_embeds) + + return torch.cat([image_text_embeds, text_embeds], dim=1) + + +class ImageProjection(nn.Module): + def __init__( + self, + image_embed_dim: int = 768, + cross_attention_dim: int = 768, + num_image_text_embeds: int = 32, + ): + super().__init__() + + self.num_image_text_embeds = num_image_text_embeds + self.image_embeds = nn.Linear(image_embed_dim, self.num_image_text_embeds * cross_attention_dim) + self.norm = nn.LayerNorm(cross_attention_dim) + + def forward(self, image_embeds: torch.Tensor): + batch_size = image_embeds.shape[0] + + # image + image_embeds = self.image_embeds(image_embeds.to(self.image_embeds.weight.dtype)) + image_embeds = image_embeds.reshape(batch_size, self.num_image_text_embeds, -1) + image_embeds = self.norm(image_embeds) + return image_embeds + + +class IPAdapterFullImageProjection(nn.Module): + def __init__(self, image_embed_dim=1024, cross_attention_dim=1024): + super().__init__() + from .attention import FeedForward + + self.ff = FeedForward(image_embed_dim, cross_attention_dim, mult=1, activation_fn="gelu") + self.norm = nn.LayerNorm(cross_attention_dim) + + def forward(self, image_embeds: torch.Tensor): + return self.norm(self.ff(image_embeds)) + + +class IPAdapterFaceIDImageProjection(nn.Module): + def __init__(self, image_embed_dim=1024, cross_attention_dim=1024, mult=1, num_tokens=1): + super().__init__() + from .attention import FeedForward + + self.num_tokens = num_tokens + self.cross_attention_dim = cross_attention_dim + self.ff = FeedForward(image_embed_dim, cross_attention_dim * num_tokens, mult=mult, activation_fn="gelu") + self.norm = nn.LayerNorm(cross_attention_dim) + + def forward(self, image_embeds: torch.Tensor): + x = self.ff(image_embeds) + x = x.reshape(-1, self.num_tokens, self.cross_attention_dim) + return self.norm(x) + + +class CombinedTimestepLabelEmbeddings(nn.Module): + def __init__(self, num_classes, embedding_dim, class_dropout_prob=0.1): + super().__init__() + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=1) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + self.class_embedder = LabelEmbedding(num_classes, embedding_dim, class_dropout_prob) + + def forward(self, timestep, class_labels, hidden_dtype=None): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, D) + + class_labels = self.class_embedder(class_labels) # (N, D) + + conditioning = timesteps_emb + class_labels # (N, D) + + return conditioning + + +class CombinedTimestepTextProjEmbeddings(nn.Module): + def __init__(self, embedding_dim, pooled_projection_dim): + super().__init__() + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + self.text_embedder = PixArtAlphaTextProjection(pooled_projection_dim, embedding_dim, act_fn="silu") + + def forward(self, timestep, pooled_projection): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=pooled_projection.dtype)) # (N, D) + + pooled_projections = self.text_embedder(pooled_projection) + + conditioning = timesteps_emb + pooled_projections + + return conditioning + + +class CombinedTimestepGuidanceTextProjEmbeddings(nn.Module): + def __init__(self, embedding_dim, pooled_projection_dim): + super().__init__() + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + self.guidance_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + self.text_embedder = PixArtAlphaTextProjection(pooled_projection_dim, embedding_dim, act_fn="silu") + + def forward(self, timestep, guidance, pooled_projection): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=pooled_projection.dtype)) # (N, D) + + guidance_proj = self.time_proj(guidance) + guidance_emb = self.guidance_embedder(guidance_proj.to(dtype=pooled_projection.dtype)) # (N, D) + + time_guidance_emb = timesteps_emb + guidance_emb + + pooled_projections = self.text_embedder(pooled_projection) + conditioning = time_guidance_emb + pooled_projections + + return conditioning + + +class CogView3CombinedTimestepSizeEmbeddings(nn.Module): + def __init__(self, embedding_dim: int, condition_dim: int, pooled_projection_dim: int, timesteps_dim: int = 256): + super().__init__() + + self.time_proj = Timesteps(num_channels=timesteps_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.condition_proj = Timesteps(num_channels=condition_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=timesteps_dim, time_embed_dim=embedding_dim) + self.condition_embedder = PixArtAlphaTextProjection(pooled_projection_dim, embedding_dim, act_fn="silu") + + def forward( + self, + timestep: torch.Tensor, + original_size: torch.Tensor, + target_size: torch.Tensor, + crop_coords: torch.Tensor, + hidden_dtype: torch.dtype, + ) -> torch.Tensor: + timesteps_proj = self.time_proj(timestep) + + original_size_proj = self.condition_proj(original_size.flatten()).view(original_size.size(0), -1) + crop_coords_proj = self.condition_proj(crop_coords.flatten()).view(crop_coords.size(0), -1) + target_size_proj = self.condition_proj(target_size.flatten()).view(target_size.size(0), -1) + + # (B, 3 * condition_dim) + condition_proj = torch.cat([original_size_proj, crop_coords_proj, target_size_proj], dim=1) + + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (B, embedding_dim) + condition_emb = self.condition_embedder(condition_proj.to(dtype=hidden_dtype)) # (B, embedding_dim) + + conditioning = timesteps_emb + condition_emb + return conditioning + + +class HunyuanDiTAttentionPool(nn.Module): + # Copied from https://github.com/Tencent/HunyuanDiT/blob/cb709308d92e6c7e8d59d0dff41b74d35088db6a/hydit/modules/poolers.py#L6 + + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + self.positional_embedding = nn.Parameter(torch.randn(spacial_dim + 1, embed_dim) / embed_dim**0.5) + self.k_proj = nn.Linear(embed_dim, embed_dim) + self.q_proj = nn.Linear(embed_dim, embed_dim) + self.v_proj = nn.Linear(embed_dim, embed_dim) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim) + self.num_heads = num_heads + + def forward(self, x): + x = x.permute(1, 0, 2) # NLC -> LNC + x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0) # (L+1)NC + x = x + self.positional_embedding[:, None, :].to(x.dtype) # (L+1)NC + x, _ = F.multi_head_attention_forward( + query=x[:1], + key=x, + value=x, + embed_dim_to_check=x.shape[-1], + num_heads=self.num_heads, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + in_proj_weight=None, + in_proj_bias=torch.cat([self.q_proj.bias, self.k_proj.bias, self.v_proj.bias]), + bias_k=None, + bias_v=None, + add_zero_attn=False, + dropout_p=0, + out_proj_weight=self.c_proj.weight, + out_proj_bias=self.c_proj.bias, + use_separate_proj_weight=True, + training=self.training, + need_weights=False, + ) + return x.squeeze(0) + + +class HunyuanCombinedTimestepTextSizeStyleEmbedding(nn.Module): + def __init__( + self, + embedding_dim, + pooled_projection_dim=1024, + seq_len=256, + cross_attention_dim=2048, + use_style_cond_and_image_meta_size=True, + ): + super().__init__() + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.size_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + + self.pooler = HunyuanDiTAttentionPool( + seq_len, cross_attention_dim, num_heads=8, output_dim=pooled_projection_dim + ) + + # Here we use a default learned embedder layer for future extension. + self.use_style_cond_and_image_meta_size = use_style_cond_and_image_meta_size + if use_style_cond_and_image_meta_size: + self.style_embedder = nn.Embedding(1, embedding_dim) + extra_in_dim = 256 * 6 + embedding_dim + pooled_projection_dim + else: + extra_in_dim = pooled_projection_dim + + self.extra_embedder = PixArtAlphaTextProjection( + in_features=extra_in_dim, + hidden_size=embedding_dim * 4, + out_features=embedding_dim, + act_fn="silu_fp32", + ) + + def forward(self, timestep, encoder_hidden_states, image_meta_size, style, hidden_dtype=None): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, 256) + + # extra condition1: text + pooled_projections = self.pooler(encoder_hidden_states) # (N, 1024) + + if self.use_style_cond_and_image_meta_size: + # extra condition2: image meta size embedding + image_meta_size = self.size_proj(image_meta_size.view(-1)) + image_meta_size = image_meta_size.to(dtype=hidden_dtype) + image_meta_size = image_meta_size.view(-1, 6 * 256) # (N, 1536) + + # extra condition3: style embedding + style_embedding = self.style_embedder(style) # (N, embedding_dim) + + # Concatenate all extra vectors + extra_cond = torch.cat([pooled_projections, image_meta_size, style_embedding], dim=1) + else: + extra_cond = torch.cat([pooled_projections], dim=1) + + conditioning = timesteps_emb + self.extra_embedder(extra_cond) # [B, D] + + return conditioning + + +class LuminaCombinedTimestepCaptionEmbedding(nn.Module): + def __init__(self, hidden_size=4096, cross_attention_dim=2048, frequency_embedding_size=256): + super().__init__() + self.time_proj = Timesteps( + num_channels=frequency_embedding_size, flip_sin_to_cos=True, downscale_freq_shift=0.0 + ) + + self.timestep_embedder = TimestepEmbedding(in_channels=frequency_embedding_size, time_embed_dim=hidden_size) + + self.caption_embedder = nn.Sequential( + nn.LayerNorm(cross_attention_dim), + nn.Linear( + cross_attention_dim, + hidden_size, + bias=True, + ), + ) + + def forward(self, timestep, caption_feat, caption_mask): + # timestep embedding: + time_freq = self.time_proj(timestep) + time_embed = self.timestep_embedder(time_freq.to(dtype=caption_feat.dtype)) + + # caption condition embedding: + caption_mask_float = caption_mask.float().unsqueeze(-1) + caption_feats_pool = (caption_feat * caption_mask_float).sum(dim=1) / caption_mask_float.sum(dim=1) + caption_feats_pool = caption_feats_pool.to(caption_feat) + caption_embed = self.caption_embedder(caption_feats_pool) + + conditioning = time_embed + caption_embed + + return conditioning + + +class MochiCombinedTimestepCaptionEmbedding(nn.Module): + def __init__( + self, + embedding_dim: int, + pooled_projection_dim: int, + text_embed_dim: int, + time_embed_dim: int = 256, + num_attention_heads: int = 8, + ) -> None: + super().__init__() + + self.time_proj = Timesteps(num_channels=time_embed_dim, flip_sin_to_cos=True, downscale_freq_shift=0.0) + self.timestep_embedder = TimestepEmbedding(in_channels=time_embed_dim, time_embed_dim=embedding_dim) + self.pooler = MochiAttentionPool( + num_attention_heads=num_attention_heads, embed_dim=text_embed_dim, output_dim=embedding_dim + ) + self.caption_proj = nn.Linear(text_embed_dim, pooled_projection_dim) + + def forward( + self, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_attention_mask: torch.Tensor, + hidden_dtype: torch.dtype | None = None, + ): + time_proj = self.time_proj(timestep) + time_emb = self.timestep_embedder(time_proj.to(dtype=hidden_dtype)) + + pooled_projections = self.pooler(encoder_hidden_states, encoder_attention_mask) + caption_proj = self.caption_proj(encoder_hidden_states) + + conditioning = time_emb + pooled_projections + return conditioning, caption_proj + + +class TextTimeEmbedding(nn.Module): + def __init__(self, encoder_dim: int, time_embed_dim: int, num_heads: int = 64): + super().__init__() + self.norm1 = nn.LayerNorm(encoder_dim) + self.pool = AttentionPooling(num_heads, encoder_dim) + self.proj = nn.Linear(encoder_dim, time_embed_dim) + self.norm2 = nn.LayerNorm(time_embed_dim) + + def forward(self, hidden_states): + hidden_states = self.norm1(hidden_states) + hidden_states = self.pool(hidden_states) + hidden_states = self.proj(hidden_states) + hidden_states = self.norm2(hidden_states) + return hidden_states + + +class TextImageTimeEmbedding(nn.Module): + def __init__(self, text_embed_dim: int = 768, image_embed_dim: int = 768, time_embed_dim: int = 1536): + super().__init__() + self.text_proj = nn.Linear(text_embed_dim, time_embed_dim) + self.text_norm = nn.LayerNorm(time_embed_dim) + self.image_proj = nn.Linear(image_embed_dim, time_embed_dim) + + def forward(self, text_embeds: torch.Tensor, image_embeds: torch.Tensor): + # text + time_text_embeds = self.text_proj(text_embeds) + time_text_embeds = self.text_norm(time_text_embeds) + + # image + time_image_embeds = self.image_proj(image_embeds) + + return time_image_embeds + time_text_embeds + + +class ImageTimeEmbedding(nn.Module): + def __init__(self, image_embed_dim: int = 768, time_embed_dim: int = 1536): + super().__init__() + self.image_proj = nn.Linear(image_embed_dim, time_embed_dim) + self.image_norm = nn.LayerNorm(time_embed_dim) + + def forward(self, image_embeds: torch.Tensor): + # image + time_image_embeds = self.image_proj(image_embeds) + time_image_embeds = self.image_norm(time_image_embeds) + return time_image_embeds + + +class ImageHintTimeEmbedding(nn.Module): + def __init__(self, image_embed_dim: int = 768, time_embed_dim: int = 1536): + super().__init__() + self.image_proj = nn.Linear(image_embed_dim, time_embed_dim) + self.image_norm = nn.LayerNorm(time_embed_dim) + self.input_hint_block = nn.Sequential( + nn.Conv2d(3, 16, 3, padding=1), + nn.SiLU(), + nn.Conv2d(16, 16, 3, padding=1), + nn.SiLU(), + nn.Conv2d(16, 32, 3, padding=1, stride=2), + nn.SiLU(), + nn.Conv2d(32, 32, 3, padding=1), + nn.SiLU(), + nn.Conv2d(32, 96, 3, padding=1, stride=2), + nn.SiLU(), + nn.Conv2d(96, 96, 3, padding=1), + nn.SiLU(), + nn.Conv2d(96, 256, 3, padding=1, stride=2), + nn.SiLU(), + nn.Conv2d(256, 4, 3, padding=1), + ) + + def forward(self, image_embeds: torch.Tensor, hint: torch.Tensor): + # image + time_image_embeds = self.image_proj(image_embeds) + time_image_embeds = self.image_norm(time_image_embeds) + hint = self.input_hint_block(hint) + return time_image_embeds, hint + + +class AttentionPooling(nn.Module): + # Copied from https://github.com/deep-floyd/IF/blob/2f91391f27dd3c468bf174be5805b4cc92980c0b/deepfloyd_if/model/nn.py#L54 + + def __init__(self, num_heads, embed_dim, dtype=None): + super().__init__() + self.dtype = dtype + self.positional_embedding = nn.Parameter(torch.randn(1, embed_dim) / embed_dim**0.5) + self.k_proj = nn.Linear(embed_dim, embed_dim, dtype=self.dtype) + self.q_proj = nn.Linear(embed_dim, embed_dim, dtype=self.dtype) + self.v_proj = nn.Linear(embed_dim, embed_dim, dtype=self.dtype) + self.num_heads = num_heads + self.dim_per_head = embed_dim // self.num_heads + + def forward(self, x): + bs, length, width = x.size() + + def shape(x): + # (bs, length, width) --> (bs, length, n_heads, dim_per_head) + x = x.view(bs, -1, self.num_heads, self.dim_per_head) + # (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head) + x = x.transpose(1, 2) + # (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head) + x = x.reshape(bs * self.num_heads, -1, self.dim_per_head) + # (bs*n_heads, length, dim_per_head) --> (bs*n_heads, dim_per_head, length) + x = x.transpose(1, 2) + return x + + class_token = x.mean(dim=1, keepdim=True) + self.positional_embedding.to(x.dtype) + x = torch.cat([class_token, x], dim=1) # (bs, length+1, width) + + # (bs*n_heads, class_token_length, dim_per_head) + q = shape(self.q_proj(class_token)) + # (bs*n_heads, length+class_token_length, dim_per_head) + k = shape(self.k_proj(x)) + v = shape(self.v_proj(x)) + + # (bs*n_heads, class_token_length, length+class_token_length): + scale = 1 / math.sqrt(math.sqrt(self.dim_per_head)) + weight = torch.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + + # (bs*n_heads, dim_per_head, class_token_length) + a = torch.einsum("bts,bcs->bct", weight, v) + + # (bs, length+1, width) + a = a.reshape(bs, -1, 1).transpose(1, 2) + + return a[:, 0, :] # cls_token + + +class MochiAttentionPool(nn.Module): + def __init__( + self, + num_attention_heads: int, + embed_dim: int, + output_dim: int | None = None, + ) -> None: + super().__init__() + + self.output_dim = output_dim or embed_dim + self.num_attention_heads = num_attention_heads + + self.to_kv = nn.Linear(embed_dim, 2 * embed_dim) + self.to_q = nn.Linear(embed_dim, embed_dim) + self.to_out = nn.Linear(embed_dim, self.output_dim) + + @staticmethod + def pool_tokens(x: torch.Tensor, mask: torch.Tensor, *, keepdim=False) -> torch.Tensor: + """ + Pool tokens in x using mask. + + NOTE: We assume x does not require gradients. + + Args: + x: (B, L, D) tensor of tokens. + mask: (B, L) boolean tensor indicating which tokens are not padding. + + Returns: + pooled: (B, D) tensor of pooled tokens. + """ + assert x.size(1) == mask.size(1) # Expected mask to have same length as tokens. + assert x.size(0) == mask.size(0) # Expected mask to have same batch size as tokens. + mask = mask[:, :, None].to(dtype=x.dtype) + mask = mask / mask.sum(dim=1, keepdim=True).clamp(min=1) + pooled = (x * mask).sum(dim=1, keepdim=keepdim) + return pooled + + def forward(self, x: torch.Tensor, mask: torch.BoolTensor) -> torch.Tensor: + r""" + Args: + x (`torch.Tensor`): + Tensor of shape `(B, S, D)` of input tokens. + mask (`torch.Tensor`): + Boolean ensor of shape `(B, S)` indicating which tokens are not padding. + + Returns: + `torch.Tensor`: + `(B, D)` tensor of pooled tokens. + """ + D = x.size(2) + + # Construct attention mask, shape: (B, 1, num_queries=1, num_keys=1+L). + attn_mask = mask[:, None, None, :].bool() # (B, 1, 1, L). + attn_mask = F.pad(attn_mask, (1, 0), value=True) # (B, 1, 1, 1+L). + + # Average non-padding token features. These will be used as the query. + x_pool = self.pool_tokens(x, mask, keepdim=True) # (B, 1, D) + + # Concat pooled features to input sequence. + x = torch.cat([x_pool, x], dim=1) # (B, L+1, D) + + # Compute queries, keys, values. Only the mean token is used to create a query. + kv = self.to_kv(x) # (B, L+1, 2 * D) + q = self.to_q(x[:, 0]) # (B, D) + + # Extract heads. + head_dim = D // self.num_attention_heads + kv = kv.unflatten(2, (2, self.num_attention_heads, head_dim)) # (B, 1+L, 2, H, head_dim) + kv = kv.transpose(1, 3) # (B, H, 2, 1+L, head_dim) + k, v = kv.unbind(2) # (B, H, 1+L, head_dim) + q = q.unflatten(1, (self.num_attention_heads, head_dim)) # (B, H, head_dim) + q = q.unsqueeze(2) # (B, H, 1, head_dim) + + # Compute attention. + x = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask, dropout_p=0.0) # (B, H, 1, head_dim) + + # Concatenate heads and run output. + x = x.squeeze(2).flatten(1, 2) # (B, D = H * head_dim) + x = self.to_out(x) + return x + + +def get_fourier_embeds_from_boundingbox(embed_dim, box): + """ + Args: + embed_dim: int + box: a 3-D tensor [B x N x 4] representing the bounding boxes for GLIGEN pipeline + Returns: + [B x N x embed_dim] tensor of positional embeddings + """ + + batch_size, num_boxes = box.shape[:2] + + emb = 100 ** (torch.arange(embed_dim) / embed_dim) + emb = emb[None, None, None].to(device=box.device, dtype=box.dtype) + emb = emb * box.unsqueeze(-1) + + emb = torch.stack((emb.sin(), emb.cos()), dim=-1) + emb = emb.permute(0, 1, 3, 4, 2).reshape(batch_size, num_boxes, embed_dim * 2 * 4) + + return emb + + +class GLIGENTextBoundingboxProjection(nn.Module): + def __init__(self, positive_len, out_dim, feature_type="text-only", fourier_freqs=8): + super().__init__() + self.positive_len = positive_len + self.out_dim = out_dim + + self.fourier_embedder_dim = fourier_freqs + self.position_dim = fourier_freqs * 2 * 4 # 2: sin/cos, 4: xyxy + + if isinstance(out_dim, tuple): + out_dim = out_dim[0] + + if feature_type == "text-only": + self.linears = nn.Sequential( + nn.Linear(self.positive_len + self.position_dim, 512), + nn.SiLU(), + nn.Linear(512, 512), + nn.SiLU(), + nn.Linear(512, out_dim), + ) + self.null_positive_feature = torch.nn.Parameter(torch.zeros([self.positive_len])) + + elif feature_type == "text-image": + self.linears_text = nn.Sequential( + nn.Linear(self.positive_len + self.position_dim, 512), + nn.SiLU(), + nn.Linear(512, 512), + nn.SiLU(), + nn.Linear(512, out_dim), + ) + self.linears_image = nn.Sequential( + nn.Linear(self.positive_len + self.position_dim, 512), + nn.SiLU(), + nn.Linear(512, 512), + nn.SiLU(), + nn.Linear(512, out_dim), + ) + self.null_text_feature = torch.nn.Parameter(torch.zeros([self.positive_len])) + self.null_image_feature = torch.nn.Parameter(torch.zeros([self.positive_len])) + + self.null_position_feature = torch.nn.Parameter(torch.zeros([self.position_dim])) + + def forward( + self, + boxes, + masks, + positive_embeddings=None, + phrases_masks=None, + image_masks=None, + phrases_embeddings=None, + image_embeddings=None, + ): + masks = masks.unsqueeze(-1) + + # embedding position (it may includes padding as placeholder) + xyxy_embedding = get_fourier_embeds_from_boundingbox(self.fourier_embedder_dim, boxes) # B*N*4 -> B*N*C + + # learnable null embedding + xyxy_null = self.null_position_feature.view(1, 1, -1) + + # replace padding with learnable null embedding + xyxy_embedding = xyxy_embedding * masks + (1 - masks) * xyxy_null + + # positionet with text only information + if positive_embeddings is not None: + # learnable null embedding + positive_null = self.null_positive_feature.view(1, 1, -1) + + # replace padding with learnable null embedding + positive_embeddings = positive_embeddings * masks + (1 - masks) * positive_null + + objs = self.linears(torch.cat([positive_embeddings, xyxy_embedding], dim=-1)) + + # positionet with text and image information + else: + phrases_masks = phrases_masks.unsqueeze(-1) + image_masks = image_masks.unsqueeze(-1) + + # learnable null embedding + text_null = self.null_text_feature.view(1, 1, -1) + image_null = self.null_image_feature.view(1, 1, -1) + + # replace padding with learnable null embedding + phrases_embeddings = phrases_embeddings * phrases_masks + (1 - phrases_masks) * text_null + image_embeddings = image_embeddings * image_masks + (1 - image_masks) * image_null + + objs_text = self.linears_text(torch.cat([phrases_embeddings, xyxy_embedding], dim=-1)) + objs_image = self.linears_image(torch.cat([image_embeddings, xyxy_embedding], dim=-1)) + objs = torch.cat([objs_text, objs_image], dim=1) + + return objs + + +class PixArtAlphaCombinedTimestepSizeEmbeddings(nn.Module): + """ + For PixArt-Alpha. + + Reference: + https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L164C9-L168C29 + """ + + def __init__(self, embedding_dim, size_emb_dim, use_additional_conditions: bool = False): + super().__init__() + + self.outdim = size_emb_dim + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.use_additional_conditions = use_additional_conditions + if use_additional_conditions: + self.additional_condition_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.resolution_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=size_emb_dim) + self.aspect_ratio_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=size_emb_dim) + + def forward(self, timestep, resolution, aspect_ratio, batch_size, hidden_dtype): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, D) + + if self.use_additional_conditions: + resolution_emb = self.additional_condition_proj(resolution.flatten()).to(hidden_dtype) + resolution_emb = self.resolution_embedder(resolution_emb).reshape(batch_size, -1) + aspect_ratio_emb = self.additional_condition_proj(aspect_ratio.flatten()).to(hidden_dtype) + aspect_ratio_emb = self.aspect_ratio_embedder(aspect_ratio_emb).reshape(batch_size, -1) + conditioning = timesteps_emb + torch.cat([resolution_emb, aspect_ratio_emb], dim=1) + else: + conditioning = timesteps_emb + + return conditioning + + +class PixArtAlphaTextProjection(nn.Module): + """ + Projects caption embeddings. Also handles dropout for classifier-free guidance. + + Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/nets/PixArt_blocks.py + """ + + def __init__(self, in_features, hidden_size, out_features=None, act_fn="gelu_tanh"): + super().__init__() + if out_features is None: + out_features = hidden_size + self.linear_1 = nn.Linear(in_features=in_features, out_features=hidden_size, bias=True) + if act_fn == "gelu_tanh": + self.act_1 = nn.GELU(approximate="tanh") + elif act_fn == "silu": + self.act_1 = nn.SiLU() + elif act_fn == "silu_fp32": + self.act_1 = FP32SiLU() + else: + raise ValueError(f"Unknown activation function: {act_fn}") + self.linear_2 = nn.Linear(in_features=hidden_size, out_features=out_features, bias=True) + + def forward(self, caption): + hidden_states = self.linear_1(caption) + hidden_states = self.act_1(hidden_states) + hidden_states = self.linear_2(hidden_states) + return hidden_states + + +class IPAdapterPlusImageProjectionBlock(nn.Module): + def __init__( + self, + embed_dims: int = 768, + dim_head: int = 64, + heads: int = 16, + ffn_ratio: float = 4, + ) -> None: + super().__init__() + from .attention import FeedForward + + self.ln0 = nn.LayerNorm(embed_dims) + self.ln1 = nn.LayerNorm(embed_dims) + self.attn = Attention( + query_dim=embed_dims, + dim_head=dim_head, + heads=heads, + out_bias=False, + ) + self.ff = nn.Sequential( + nn.LayerNorm(embed_dims), + FeedForward(embed_dims, embed_dims, activation_fn="gelu", mult=ffn_ratio, bias=False), + ) + + def forward(self, x, latents, residual): + encoder_hidden_states = self.ln0(x) + latents = self.ln1(latents) + encoder_hidden_states = torch.cat([encoder_hidden_states, latents], dim=-2) + latents = self.attn(latents, encoder_hidden_states) + residual + latents = self.ff(latents) + latents + return latents + + +class IPAdapterPlusImageProjection(nn.Module): + """Resampler of IP-Adapter Plus. + + Args: + embed_dims (int): The feature dimension. Defaults to 768. output_dims (int): The number of output channels, + that is the same + number of the channels in the `unet.config.cross_attention_dim`. Defaults to 1024. + hidden_dims (int): + The number of hidden channels. Defaults to 1280. depth (int): The number of blocks. Defaults + to 8. dim_head (int): The number of head channels. Defaults to 64. heads (int): Parallel attention heads. + Defaults to 16. num_queries (int): + The number of queries. Defaults to 8. ffn_ratio (float): The expansion ratio + of feedforward network hidden + layer channels. Defaults to 4. + """ + + def __init__( + self, + embed_dims: int = 768, + output_dims: int = 1024, + hidden_dims: int = 1280, + depth: int = 4, + dim_head: int = 64, + heads: int = 16, + num_queries: int = 8, + ffn_ratio: float = 4, + ) -> None: + super().__init__() + self.latents = nn.Parameter(torch.randn(1, num_queries, hidden_dims) / hidden_dims**0.5) + + self.proj_in = nn.Linear(embed_dims, hidden_dims) + + self.proj_out = nn.Linear(hidden_dims, output_dims) + self.norm_out = nn.LayerNorm(output_dims) + + self.layers = nn.ModuleList( + [IPAdapterPlusImageProjectionBlock(hidden_dims, dim_head, heads, ffn_ratio) for _ in range(depth)] + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Forward pass. + + Args: + x (torch.Tensor): Input Tensor. + Returns: + torch.Tensor: Output Tensor. + """ + latents = self.latents.repeat(x.size(0), 1, 1) + + x = self.proj_in(x) + + for block in self.layers: + residual = latents + latents = block(x, latents, residual) + + latents = self.proj_out(latents) + return self.norm_out(latents) + + +class IPAdapterFaceIDPlusImageProjection(nn.Module): + """FacePerceiverResampler of IP-Adapter Plus. + + Args: + embed_dims (int): The feature dimension. Defaults to 768. output_dims (int): The number of output channels, + that is the same + number of the channels in the `unet.config.cross_attention_dim`. Defaults to 1024. + hidden_dims (int): + The number of hidden channels. Defaults to 1280. depth (int): The number of blocks. Defaults + to 8. dim_head (int): The number of head channels. Defaults to 64. heads (int): Parallel attention heads. + Defaults to 16. num_tokens (int): Number of tokens num_queries (int): The number of queries. Defaults to 8. + ffn_ratio (float): The expansion ratio of feedforward network hidden + layer channels. Defaults to 4. + ffproj_ratio (float): The expansion ratio of feedforward network hidden + layer channels (for ID embeddings). Defaults to 4. + """ + + def __init__( + self, + embed_dims: int = 768, + output_dims: int = 768, + hidden_dims: int = 1280, + id_embeddings_dim: int = 512, + depth: int = 4, + dim_head: int = 64, + heads: int = 16, + num_tokens: int = 4, + num_queries: int = 8, + ffn_ratio: float = 4, + ffproj_ratio: int = 2, + ) -> None: + super().__init__() + from .attention import FeedForward + + self.num_tokens = num_tokens + self.embed_dim = embed_dims + self.clip_embeds = None + self.shortcut = False + self.shortcut_scale = 1.0 + + self.proj = FeedForward(id_embeddings_dim, embed_dims * num_tokens, activation_fn="gelu", mult=ffproj_ratio) + self.norm = nn.LayerNorm(embed_dims) + + self.proj_in = nn.Linear(hidden_dims, embed_dims) + + self.proj_out = nn.Linear(embed_dims, output_dims) + self.norm_out = nn.LayerNorm(output_dims) + + self.layers = nn.ModuleList( + [IPAdapterPlusImageProjectionBlock(embed_dims, dim_head, heads, ffn_ratio) for _ in range(depth)] + ) + + def forward(self, id_embeds: torch.Tensor) -> torch.Tensor: + """Forward pass. + + Args: + id_embeds (torch.Tensor): Input Tensor (ID embeds). + Returns: + torch.Tensor: Output Tensor. + """ + id_embeds = id_embeds.to(self.clip_embeds.dtype) + id_embeds = self.proj(id_embeds) + id_embeds = id_embeds.reshape(-1, self.num_tokens, self.embed_dim) + id_embeds = self.norm(id_embeds) + latents = id_embeds + + clip_embeds = self.proj_in(self.clip_embeds) + x = clip_embeds.reshape(-1, clip_embeds.shape[2], clip_embeds.shape[3]) + + for block in self.layers: + residual = latents + latents = block(x, latents, residual) + + latents = self.proj_out(latents) + out = self.norm_out(latents) + if self.shortcut: + out = id_embeds + self.shortcut_scale * out + return out + + +class IPAdapterTimeImageProjectionBlock(nn.Module): + """Block for IPAdapterTimeImageProjection. + + Args: + hidden_dim (`int`, defaults to 1280): + The number of hidden channels. + dim_head (`int`, defaults to 64): + The number of head channels. + heads (`int`, defaults to 20): + Parallel attention heads. + ffn_ratio (`int`, defaults to 4): + The expansion ratio of feedforward network hidden layer channels. + """ + + def __init__( + self, + hidden_dim: int = 1280, + dim_head: int = 64, + heads: int = 20, + ffn_ratio: int = 4, + ) -> None: + super().__init__() + from .attention import FeedForward + + self.ln0 = nn.LayerNorm(hidden_dim) + self.ln1 = nn.LayerNorm(hidden_dim) + self.attn = Attention( + query_dim=hidden_dim, + cross_attention_dim=hidden_dim, + dim_head=dim_head, + heads=heads, + bias=False, + out_bias=False, + ) + self.ff = FeedForward(hidden_dim, hidden_dim, activation_fn="gelu", mult=ffn_ratio, bias=False) + + # AdaLayerNorm + self.adaln_silu = nn.SiLU() + self.adaln_proj = nn.Linear(hidden_dim, 4 * hidden_dim) + self.adaln_norm = nn.LayerNorm(hidden_dim) + + # Set attention scale and fuse KV + self.attn.scale = 1 / math.sqrt(math.sqrt(dim_head)) + self.attn.fuse_projections() + self.attn.to_k = None + self.attn.to_v = None + + def forward(self, x: torch.Tensor, latents: torch.Tensor, timestep_emb: torch.Tensor) -> torch.Tensor: + """Forward pass. + + Args: + x (`torch.Tensor`): + Image features. + latents (`torch.Tensor`): + Latent features. + timestep_emb (`torch.Tensor`): + Timestep embedding. + + Returns: + `torch.Tensor`: Output latent features. + """ + + # Shift and scale for AdaLayerNorm + emb = self.adaln_proj(self.adaln_silu(timestep_emb)) + shift_msa, scale_msa, shift_mlp, scale_mlp = emb.chunk(4, dim=1) + + # Fused Attention + residual = latents + x = self.ln0(x) + latents = self.ln1(latents) * (1 + scale_msa[:, None]) + shift_msa[:, None] + + batch_size = latents.shape[0] + + query = self.attn.to_q(latents) + kv_input = torch.cat((x, latents), dim=-2) + key, value = self.attn.to_kv(kv_input).chunk(2, dim=-1) + + inner_dim = key.shape[-1] + head_dim = inner_dim // self.attn.heads + + query = query.view(batch_size, -1, self.attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, self.attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, self.attn.heads, head_dim).transpose(1, 2) + + weight = (query * self.attn.scale) @ (key * self.attn.scale).transpose(-2, -1) + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + latents = weight @ value + + latents = latents.transpose(1, 2).reshape(batch_size, -1, self.attn.heads * head_dim) + latents = self.attn.to_out[0](latents) + latents = self.attn.to_out[1](latents) + latents = latents + residual + + ## FeedForward + residual = latents + latents = self.adaln_norm(latents) * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + return self.ff(latents) + residual + + +# Modified from https://github.com/mlfoundations/open_flamingo/blob/main/open_flamingo/src/helpers.py +class IPAdapterTimeImageProjection(nn.Module): + """Resampler of SD3 IP-Adapter with timestep embedding. + + Args: + embed_dim (`int`, defaults to 1152): + The feature dimension. + output_dim (`int`, defaults to 2432): + The number of output channels. + hidden_dim (`int`, defaults to 1280): + The number of hidden channels. + depth (`int`, defaults to 4): + The number of blocks. + dim_head (`int`, defaults to 64): + The number of head channels. + heads (`int`, defaults to 20): + Parallel attention heads. + num_queries (`int`, defaults to 64): + The number of queries. + ffn_ratio (`int`, defaults to 4): + The expansion ratio of feedforward network hidden layer channels. + timestep_in_dim (`int`, defaults to 320): + The number of input channels for timestep embedding. + timestep_flip_sin_to_cos (`bool`, defaults to True): + Flip the timestep embedding order to `cos, sin` (if True) or `sin, cos` (if False). + timestep_freq_shift (`int`, defaults to 0): + Controls the timestep delta between frequencies between dimensions. + """ + + def __init__( + self, + embed_dim: int = 1152, + output_dim: int = 2432, + hidden_dim: int = 1280, + depth: int = 4, + dim_head: int = 64, + heads: int = 20, + num_queries: int = 64, + ffn_ratio: int = 4, + timestep_in_dim: int = 320, + timestep_flip_sin_to_cos: bool = True, + timestep_freq_shift: int = 0, + ) -> None: + super().__init__() + self.latents = nn.Parameter(torch.randn(1, num_queries, hidden_dim) / hidden_dim**0.5) + self.proj_in = nn.Linear(embed_dim, hidden_dim) + self.proj_out = nn.Linear(hidden_dim, output_dim) + self.norm_out = nn.LayerNorm(output_dim) + self.layers = nn.ModuleList( + [IPAdapterTimeImageProjectionBlock(hidden_dim, dim_head, heads, ffn_ratio) for _ in range(depth)] + ) + self.time_proj = Timesteps(timestep_in_dim, timestep_flip_sin_to_cos, timestep_freq_shift) + self.time_embedding = TimestepEmbedding(timestep_in_dim, hidden_dim, act_fn="silu") + + def forward(self, x: torch.Tensor, timestep: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """Forward pass. + + Args: + x (`torch.Tensor`): + Image features. + timestep (`torch.Tensor`): + Timestep in denoising process. + Returns: + `tuple`[`torch.Tensor`, `torch.Tensor`]: The pair (latents, timestep_emb). + """ + timestep_emb = self.time_proj(timestep).to(dtype=x.dtype) + timestep_emb = self.time_embedding(timestep_emb) + + latents = self.latents.repeat(x.size(0), 1, 1) + + x = self.proj_in(x) + x = x + timestep_emb[:, None] + + for block in self.layers: + latents = block(x, latents, timestep_emb) + + latents = self.proj_out(latents) + latents = self.norm_out(latents) + + return latents, timestep_emb + + +class MultiIPAdapterImageProjection(nn.Module): + def __init__(self, IPAdapterImageProjectionLayers: list[nn.Module] | tuple[nn.Module]): + super().__init__() + self.image_projection_layers = nn.ModuleList(IPAdapterImageProjectionLayers) + + @property + def num_ip_adapters(self) -> int: + """Number of IP-Adapters loaded.""" + return len(self.image_projection_layers) + + def forward(self, image_embeds: list[torch.Tensor]): + projected_image_embeds = [] + + # currently, we accept `image_embeds` as + # 1. a tensor (deprecated) with shape [batch_size, embed_dim] or [batch_size, sequence_length, embed_dim] + # 2. list of `n` tensors where `n` is number of ip-adapters, each tensor can hae shape [batch_size, num_images, embed_dim] or [batch_size, num_images, sequence_length, embed_dim] + if not isinstance(image_embeds, list): + deprecation_message = ( + "You have passed a tensor as `image_embeds`.This is deprecated and will be removed in a future release." + " Please make sure to update your script to pass `image_embeds` as a list of tensors to suppress this warning." + ) + deprecate("image_embeds not a list", "1.0.0", deprecation_message, standard_warn=False) + image_embeds = [image_embeds.unsqueeze(1)] + + if len(image_embeds) != len(self.image_projection_layers): + raise ValueError( + f"image_embeds must have the same length as image_projection_layers, got {len(image_embeds)} and {len(self.image_projection_layers)}" + ) + + for image_embed, image_projection_layer in zip(image_embeds, self.image_projection_layers): + batch_size, num_images = image_embed.shape[0], image_embed.shape[1] + image_embed = image_embed.reshape((batch_size * num_images,) + image_embed.shape[2:]) + image_embed = image_projection_layer(image_embed) + image_embed = image_embed.reshape((batch_size, num_images) + image_embed.shape[1:]) + + projected_image_embeds.append(image_embed) + + return projected_image_embeds + + +class FluxPosEmbed(nn.Module): + def __new__(cls, *args, **kwargs): + deprecation_message = "Importing and using `FluxPosEmbed` from `diffusers.models.embeddings` is deprecated. Please import it from `diffusers.models.transformers.transformer_flux`." + deprecate("FluxPosEmbed", "1.0.0", deprecation_message) + + from .transformers.transformer_flux import FluxPosEmbed + + return FluxPosEmbed(*args, **kwargs) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/embeddings_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/embeddings_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..3790905e583c9d0df1a6c57e67821656c24c96a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/embeddings_flax.py @@ -0,0 +1,126 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import flax.linen as nn +import jax.numpy as jnp + +from ..utils import logging + + +logger = logging.get_logger(__name__) + + +def get_sinusoidal_embeddings( + timesteps: jnp.ndarray, + embedding_dim: int, + freq_shift: float = 1, + min_timescale: float = 1, + max_timescale: float = 1.0e4, + flip_sin_to_cos: bool = False, + scale: float = 1.0, +) -> jnp.ndarray: + """Returns the positional encoding (same as Tensor2Tensor). + + Args: + timesteps (`jnp.ndarray` of shape `(N,)`): + A 1-D array of N indices, one per batch element. These may be fractional. + embedding_dim (`int`): + The number of output channels. + freq_shift (`float`, *optional*, defaults to `1`): + Shift applied to the frequency scaling of the embeddings. + min_timescale (`float`, *optional*, defaults to `1`): + The smallest time unit used in the sinusoidal calculation (should probably be 0.0). + max_timescale (`float`, *optional*, defaults to `1.0e4`): + The largest time unit used in the sinusoidal calculation. + flip_sin_to_cos (`bool`, *optional*, defaults to `False`): + Whether to flip the order of sinusoidal components to cosine first. + scale (`float`, *optional*, defaults to `1.0`): + A scaling factor applied to the positional embeddings. + + Returns: + a Tensor of timing signals [N, num_channels] + """ + assert timesteps.ndim == 1, "Timesteps should be a 1d-array" + assert embedding_dim % 2 == 0, f"Embedding dimension {embedding_dim} should be even" + num_timescales = float(embedding_dim // 2) + log_timescale_increment = math.log(max_timescale / min_timescale) / (num_timescales - freq_shift) + inv_timescales = min_timescale * jnp.exp(jnp.arange(num_timescales, dtype=jnp.float32) * -log_timescale_increment) + emb = jnp.expand_dims(timesteps, 1) * jnp.expand_dims(inv_timescales, 0) + + # scale embeddings + scaled_time = scale * emb + + if flip_sin_to_cos: + signal = jnp.concatenate([jnp.cos(scaled_time), jnp.sin(scaled_time)], axis=1) + else: + signal = jnp.concatenate([jnp.sin(scaled_time), jnp.cos(scaled_time)], axis=1) + signal = jnp.reshape(signal, [jnp.shape(timesteps)[0], embedding_dim]) + return signal + + +class FlaxTimestepEmbedding(nn.Module): + r""" + Time step Embedding Module. Learns embeddings for input time steps. + + Args: + time_embed_dim (`int`, *optional*, defaults to `32`): + Time step embedding dimension. + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + The data type for the embedding parameters. + """ + + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + time_embed_dim: int = 32 + dtype: jnp.dtype = jnp.float32 + + @nn.compact + def __call__(self, temb): + temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_1")(temb) + temb = nn.silu(temb) + temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_2")(temb) + return temb + + +class FlaxTimesteps(nn.Module): + r""" + Wrapper Module for sinusoidal Time step Embeddings as described in https://huggingface.co/papers/2006.11239 + + Args: + dim (`int`, *optional*, defaults to `32`): + Time step embedding dimension. + flip_sin_to_cos (`bool`, *optional*, defaults to `False`): + Whether to flip the sinusoidal function from sine to cosine. + freq_shift (`float`, *optional*, defaults to `1`): + Frequency shift applied to the sinusoidal embeddings. + """ + + dim: int = 32 + flip_sin_to_cos: bool = False + freq_shift: float = 1 + + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + @nn.compact + def __call__(self, timesteps): + return get_sinusoidal_embeddings( + timesteps, embedding_dim=self.dim, flip_sin_to_cos=self.flip_sin_to_cos, freq_shift=self.freq_shift + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/lora.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/lora.py new file mode 100644 index 0000000000000000000000000000000000000000..489a0f0abea9275096bd30577346525cfc1fe1ac --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/lora.py @@ -0,0 +1,455 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# IMPORTANT: # +################################################################### +# ----------------------------------------------------------------# +# This file is deprecated and will be removed soon # +# (as soon as PEFT will become a required dependency for LoRA) # +# ----------------------------------------------------------------# +################################################################### + +import torch +import torch.nn.functional as F +from torch import nn + +from ..utils import deprecate, logging +from ..utils.import_utils import is_transformers_available + + +if is_transformers_available(): + from transformers import CLIPTextModel, CLIPTextModelWithProjection + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def text_encoder_attn_modules(text_encoder: nn.Module): + attn_modules = [] + + if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)): + for i, layer in enumerate(text_encoder.text_model.encoder.layers): + name = f"text_model.encoder.layers.{i}.self_attn" + mod = layer.self_attn + attn_modules.append((name, mod)) + else: + raise ValueError(f"do not know how to get attention modules for: {text_encoder.__class__.__name__}") + + return attn_modules + + +def text_encoder_mlp_modules(text_encoder: nn.Module): + mlp_modules = [] + + if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)): + for i, layer in enumerate(text_encoder.text_model.encoder.layers): + mlp_mod = layer.mlp + name = f"text_model.encoder.layers.{i}.mlp" + mlp_modules.append((name, mlp_mod)) + else: + raise ValueError(f"do not know how to get mlp modules for: {text_encoder.__class__.__name__}") + + return mlp_modules + + +def adjust_lora_scale_text_encoder(text_encoder, lora_scale: float = 1.0): + for _, attn_module in text_encoder_attn_modules(text_encoder): + if isinstance(attn_module.q_proj, PatchedLoraProjection): + attn_module.q_proj.lora_scale = lora_scale + attn_module.k_proj.lora_scale = lora_scale + attn_module.v_proj.lora_scale = lora_scale + attn_module.out_proj.lora_scale = lora_scale + + for _, mlp_module in text_encoder_mlp_modules(text_encoder): + if isinstance(mlp_module.fc1, PatchedLoraProjection): + mlp_module.fc1.lora_scale = lora_scale + mlp_module.fc2.lora_scale = lora_scale + + +class PatchedLoraProjection(torch.nn.Module): + def __init__(self, regular_linear_layer, lora_scale=1, network_alpha=None, rank=4, dtype=None): + deprecation_message = "Use of `PatchedLoraProjection` is deprecated. Please switch to PEFT backend by installing PEFT: `pip install peft`." + deprecate("PatchedLoraProjection", "1.0.0", deprecation_message) + + super().__init__() + from ..models.lora import LoRALinearLayer + + self.regular_linear_layer = regular_linear_layer + + device = self.regular_linear_layer.weight.device + + if dtype is None: + dtype = self.regular_linear_layer.weight.dtype + + self.lora_linear_layer = LoRALinearLayer( + self.regular_linear_layer.in_features, + self.regular_linear_layer.out_features, + network_alpha=network_alpha, + device=device, + dtype=dtype, + rank=rank, + ) + + self.lora_scale = lora_scale + + # overwrite PyTorch's `state_dict` to be sure that only the 'regular_linear_layer' weights are saved + # when saving the whole text encoder model and when LoRA is unloaded or fused + def state_dict(self, *args, destination=None, prefix="", keep_vars=False): + if self.lora_linear_layer is None: + return self.regular_linear_layer.state_dict( + *args, destination=destination, prefix=prefix, keep_vars=keep_vars + ) + + return super().state_dict(*args, destination=destination, prefix=prefix, keep_vars=keep_vars) + + def _fuse_lora(self, lora_scale=1.0, safe_fusing=False): + if self.lora_linear_layer is None: + return + + dtype, device = self.regular_linear_layer.weight.data.dtype, self.regular_linear_layer.weight.data.device + + w_orig = self.regular_linear_layer.weight.data.float() + w_up = self.lora_linear_layer.up.weight.data.float() + w_down = self.lora_linear_layer.down.weight.data.float() + + if self.lora_linear_layer.network_alpha is not None: + w_up = w_up * self.lora_linear_layer.network_alpha / self.lora_linear_layer.rank + + fused_weight = w_orig + (lora_scale * torch.bmm(w_up[None, :], w_down[None, :])[0]) + + if safe_fusing and torch.isnan(fused_weight).any().item(): + raise ValueError( + "This LoRA weight seems to be broken. " + f"Encountered NaN values when trying to fuse LoRA weights for {self}." + "LoRA weights will not be fused." + ) + + self.regular_linear_layer.weight.data = fused_weight.to(device=device, dtype=dtype) + + # we can drop the lora layer now + self.lora_linear_layer = None + + # offload the up and down matrices to CPU to not blow the memory + self.w_up = w_up.cpu() + self.w_down = w_down.cpu() + self.lora_scale = lora_scale + + def _unfuse_lora(self): + if not (getattr(self, "w_up", None) is not None and getattr(self, "w_down", None) is not None): + return + + fused_weight = self.regular_linear_layer.weight.data + dtype, device = fused_weight.dtype, fused_weight.device + + w_up = self.w_up.to(device=device).float() + w_down = self.w_down.to(device).float() + + unfused_weight = fused_weight.float() - (self.lora_scale * torch.bmm(w_up[None, :], w_down[None, :])[0]) + self.regular_linear_layer.weight.data = unfused_weight.to(device=device, dtype=dtype) + + self.w_up = None + self.w_down = None + + def forward(self, input): + if self.lora_scale is None: + self.lora_scale = 1.0 + if self.lora_linear_layer is None: + return self.regular_linear_layer(input) + return self.regular_linear_layer(input) + (self.lora_scale * self.lora_linear_layer(input)) + + +class LoRALinearLayer(nn.Module): + r""" + A linear layer that is used with LoRA. + + Parameters: + in_features (`int`): + Number of input features. + out_features (`int`): + Number of output features. + rank (`int`, `optional`, defaults to 4): + The rank of the LoRA layer. + network_alpha (`float`, `optional`, defaults to `None`): + The value of the network alpha used for stable learning and preventing underflow. This value has the same + meaning as the `--network_alpha` option in the kohya-ss trainer script. See + https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning + device (`torch.device`, `optional`, defaults to `None`): + The device to use for the layer's weights. + dtype (`torch.dtype`, `optional`, defaults to `None`): + The dtype to use for the layer's weights. + """ + + def __init__( + self, + in_features: int, + out_features: int, + rank: int = 4, + network_alpha: float | None = None, + device: torch.device | str | None = None, + dtype: torch.dtype | None = None, + ): + super().__init__() + + deprecation_message = "Use of `LoRALinearLayer` is deprecated. Please switch to PEFT backend by installing PEFT: `pip install peft`." + deprecate("LoRALinearLayer", "1.0.0", deprecation_message) + + self.down = nn.Linear(in_features, rank, bias=False, device=device, dtype=dtype) + self.up = nn.Linear(rank, out_features, bias=False, device=device, dtype=dtype) + # This value has the same meaning as the `--network_alpha` option in the kohya-ss trainer script. + # See https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning + self.network_alpha = network_alpha + self.rank = rank + self.out_features = out_features + self.in_features = in_features + + nn.init.normal_(self.down.weight, std=1 / rank) + nn.init.zeros_(self.up.weight) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + orig_dtype = hidden_states.dtype + dtype = self.down.weight.dtype + + down_hidden_states = self.down(hidden_states.to(dtype)) + up_hidden_states = self.up(down_hidden_states) + + if self.network_alpha is not None: + up_hidden_states *= self.network_alpha / self.rank + + return up_hidden_states.to(orig_dtype) + + +class LoRAConv2dLayer(nn.Module): + r""" + A convolutional layer that is used with LoRA. + + Parameters: + in_features (`int`): + Number of input features. + out_features (`int`): + Number of output features. + rank (`int`, `optional`, defaults to 4): + The rank of the LoRA layer. + kernel_size (`int` or `tuple` of two `int`, `optional`, defaults to 1): + The kernel size of the convolution. + stride (`int` or `tuple` of two `int`, `optional`, defaults to 1): + The stride of the convolution. + padding (`int` or `tuple` of two `int` or `str`, `optional`, defaults to 0): + The padding of the convolution. + network_alpha (`float`, `optional`, defaults to `None`): + The value of the network alpha used for stable learning and preventing underflow. This value has the same + meaning as the `--network_alpha` option in the kohya-ss trainer script. See + https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning + """ + + def __init__( + self, + in_features: int, + out_features: int, + rank: int = 4, + kernel_size: int | tuple[int, int] = (1, 1), + stride: int | tuple[int, int] = (1, 1), + padding: int | tuple[int, int] | str = 0, + network_alpha: float | None = None, + ): + super().__init__() + + deprecation_message = "Use of `LoRAConv2dLayer` is deprecated. Please switch to PEFT backend by installing PEFT: `pip install peft`." + deprecate("LoRAConv2dLayer", "1.0.0", deprecation_message) + + self.down = nn.Conv2d(in_features, rank, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) + # according to the official kohya_ss trainer kernel_size are always fixed for the up layer + # # see: https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L129 + self.up = nn.Conv2d(rank, out_features, kernel_size=(1, 1), stride=(1, 1), bias=False) + + # This value has the same meaning as the `--network_alpha` option in the kohya-ss trainer script. + # See https://github.com/darkstorm2150/sd-scripts/blob/main/docs/train_network_README-en.md#execute-learning + self.network_alpha = network_alpha + self.rank = rank + + nn.init.normal_(self.down.weight, std=1 / rank) + nn.init.zeros_(self.up.weight) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + orig_dtype = hidden_states.dtype + dtype = self.down.weight.dtype + + down_hidden_states = self.down(hidden_states.to(dtype)) + up_hidden_states = self.up(down_hidden_states) + + if self.network_alpha is not None: + up_hidden_states *= self.network_alpha / self.rank + + return up_hidden_states.to(orig_dtype) + + +class LoRACompatibleConv(nn.Conv2d): + """ + A convolutional layer that can be used with LoRA. + """ + + def __init__(self, *args, lora_layer: LoRAConv2dLayer | None = None, **kwargs): + deprecation_message = "Use of `LoRACompatibleConv` is deprecated. Please switch to PEFT backend by installing PEFT: `pip install peft`." + deprecate("LoRACompatibleConv", "1.0.0", deprecation_message) + + super().__init__(*args, **kwargs) + self.lora_layer = lora_layer + + def set_lora_layer(self, lora_layer: LoRAConv2dLayer | None): + deprecation_message = "Use of `set_lora_layer()` is deprecated. Please switch to PEFT backend by installing PEFT: `pip install peft`." + deprecate("set_lora_layer", "1.0.0", deprecation_message) + + self.lora_layer = lora_layer + + def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False): + if self.lora_layer is None: + return + + dtype, device = self.weight.data.dtype, self.weight.data.device + + w_orig = self.weight.data.float() + w_up = self.lora_layer.up.weight.data.float() + w_down = self.lora_layer.down.weight.data.float() + + if self.lora_layer.network_alpha is not None: + w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank + + fusion = torch.mm(w_up.flatten(start_dim=1), w_down.flatten(start_dim=1)) + fusion = fusion.reshape((w_orig.shape)) + fused_weight = w_orig + (lora_scale * fusion) + + if safe_fusing and torch.isnan(fused_weight).any().item(): + raise ValueError( + "This LoRA weight seems to be broken. " + f"Encountered NaN values when trying to fuse LoRA weights for {self}." + "LoRA weights will not be fused." + ) + + self.weight.data = fused_weight.to(device=device, dtype=dtype) + + # we can drop the lora layer now + self.lora_layer = None + + # offload the up and down matrices to CPU to not blow the memory + self.w_up = w_up.cpu() + self.w_down = w_down.cpu() + self._lora_scale = lora_scale + + def _unfuse_lora(self): + if not (getattr(self, "w_up", None) is not None and getattr(self, "w_down", None) is not None): + return + + fused_weight = self.weight.data + dtype, device = fused_weight.data.dtype, fused_weight.data.device + + self.w_up = self.w_up.to(device=device).float() + self.w_down = self.w_down.to(device).float() + + fusion = torch.mm(self.w_up.flatten(start_dim=1), self.w_down.flatten(start_dim=1)) + fusion = fusion.reshape((fused_weight.shape)) + unfused_weight = fused_weight.float() - (self._lora_scale * fusion) + self.weight.data = unfused_weight.to(device=device, dtype=dtype) + + self.w_up = None + self.w_down = None + + def forward(self, hidden_states: torch.Tensor, scale: float = 1.0) -> torch.Tensor: + if self.padding_mode != "zeros": + hidden_states = F.pad(hidden_states, self._reversed_padding_repeated_twice, mode=self.padding_mode) + padding = (0, 0) + else: + padding = self.padding + + original_outputs = F.conv2d( + hidden_states, self.weight, self.bias, self.stride, padding, self.dilation, self.groups + ) + + if self.lora_layer is None: + return original_outputs + else: + return original_outputs + (scale * self.lora_layer(hidden_states)) + + +class LoRACompatibleLinear(nn.Linear): + """ + A Linear layer that can be used with LoRA. + """ + + def __init__(self, *args, lora_layer: LoRALinearLayer | None = None, **kwargs): + deprecation_message = "Use of `LoRACompatibleLinear` is deprecated. Please switch to PEFT backend by installing PEFT: `pip install peft`." + deprecate("LoRACompatibleLinear", "1.0.0", deprecation_message) + + super().__init__(*args, **kwargs) + self.lora_layer = lora_layer + + def set_lora_layer(self, lora_layer: LoRALinearLayer | None): + deprecation_message = "Use of `set_lora_layer()` is deprecated. Please switch to PEFT backend by installing PEFT: `pip install peft`." + deprecate("set_lora_layer", "1.0.0", deprecation_message) + self.lora_layer = lora_layer + + def _fuse_lora(self, lora_scale: float = 1.0, safe_fusing: bool = False): + if self.lora_layer is None: + return + + dtype, device = self.weight.data.dtype, self.weight.data.device + + w_orig = self.weight.data.float() + w_up = self.lora_layer.up.weight.data.float() + w_down = self.lora_layer.down.weight.data.float() + + if self.lora_layer.network_alpha is not None: + w_up = w_up * self.lora_layer.network_alpha / self.lora_layer.rank + + fused_weight = w_orig + (lora_scale * torch.bmm(w_up[None, :], w_down[None, :])[0]) + + if safe_fusing and torch.isnan(fused_weight).any().item(): + raise ValueError( + "This LoRA weight seems to be broken. " + f"Encountered NaN values when trying to fuse LoRA weights for {self}." + "LoRA weights will not be fused." + ) + + self.weight.data = fused_weight.to(device=device, dtype=dtype) + + # we can drop the lora layer now + self.lora_layer = None + + # offload the up and down matrices to CPU to not blow the memory + self.w_up = w_up.cpu() + self.w_down = w_down.cpu() + self._lora_scale = lora_scale + + def _unfuse_lora(self): + if not (getattr(self, "w_up", None) is not None and getattr(self, "w_down", None) is not None): + return + + fused_weight = self.weight.data + dtype, device = fused_weight.dtype, fused_weight.device + + w_up = self.w_up.to(device=device).float() + w_down = self.w_down.to(device).float() + + unfused_weight = fused_weight.float() - (self._lora_scale * torch.bmm(w_up[None, :], w_down[None, :])[0]) + self.weight.data = unfused_weight.to(device=device, dtype=dtype) + + self.w_up = None + self.w_down = None + + def forward(self, hidden_states: torch.Tensor, scale: float = 1.0) -> torch.Tensor: + if self.lora_layer is None: + out = super().forward(hidden_states) + return out + else: + out = super().forward(hidden_states) + (scale * self.lora_layer(hidden_states)) + return out diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/model_loading_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/model_loading_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..04642ad5d40154578dd38cf7d493260de8cc7d76 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/model_loading_utils.py @@ -0,0 +1,758 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import importlib +import inspect +import os +from array import array +from collections import OrderedDict, defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from zipfile import is_zipfile + +import safetensors +import torch +from huggingface_hub import DDUFEntry +from huggingface_hub.utils import EntryNotFoundError + +from ..quantizers import DiffusersQuantizer +from ..utils import ( + DEFAULT_HF_PARALLEL_LOADING_WORKERS, + GGUF_FILE_EXTENSION, + SAFE_WEIGHTS_INDEX_NAME, + SAFETENSORS_FILE_EXTENSION, + WEIGHTS_INDEX_NAME, + _add_variant, + _get_model_file, + deprecate, + is_accelerate_available, + is_accelerate_version, + is_gguf_available, + is_torch_available, + is_torch_version, + logging, +) +from ..utils.distributed_utils import is_torch_dist_rank_zero + + +logger = logging.get_logger(__name__) + +_CLASS_REMAPPING_DICT = { + "Transformer2DModel": { + "ada_norm_zero": "DiTTransformer2DModel", + "ada_norm_single": "PixArtTransformer2DModel", + } +} + + +if is_accelerate_available(): + from accelerate import infer_auto_device_map + from accelerate.utils import get_balanced_memory, get_max_memory, offload_weight, set_module_tensor_to_device + + +# Adapted from `transformers` (see modeling_utils.py) +def _determine_device_map( + model: torch.nn.Module, device_map, max_memory, torch_dtype, keep_in_fp32_modules=[], hf_quantizer=None +): + if isinstance(device_map, str): + special_dtypes = {} + if hf_quantizer is not None: + special_dtypes.update(hf_quantizer.get_special_dtypes_update(model, torch_dtype)) + special_dtypes.update( + { + name: torch.float32 + for name, _ in model.named_parameters() + if any(m in name for m in keep_in_fp32_modules) + } + ) + + target_dtype = torch_dtype + if hf_quantizer is not None: + target_dtype = hf_quantizer.adjust_target_dtype(target_dtype) + + no_split_modules = model._get_no_split_modules(device_map) + device_map_kwargs = {"no_split_module_classes": no_split_modules} + + if "special_dtypes" in inspect.signature(infer_auto_device_map).parameters: + device_map_kwargs["special_dtypes"] = special_dtypes + elif len(special_dtypes) > 0: + logger.warning( + "This model has some weights that should be kept in higher precision, you need to upgrade " + "`accelerate` to properly deal with them (`pip install --upgrade accelerate`)." + ) + + if device_map != "sequential": + max_memory = get_balanced_memory( + model, + dtype=torch_dtype, + low_zero=(device_map == "balanced_low_0"), + max_memory=max_memory, + **device_map_kwargs, + ) + else: + max_memory = get_max_memory(max_memory) + + if hf_quantizer is not None: + max_memory = hf_quantizer.adjust_max_memory(max_memory) + + device_map_kwargs["max_memory"] = max_memory + device_map = infer_auto_device_map(model, dtype=target_dtype, **device_map_kwargs) + + return device_map + + +def _fetch_remapped_cls_from_config(config, old_class): + previous_class_name = old_class.__name__ + remapped_class_name = _CLASS_REMAPPING_DICT.get(previous_class_name).get(config["norm_type"], None) + + # Details: + # https://github.com/huggingface/diffusers/pull/7647#discussion_r1621344818 + if remapped_class_name: + # load diffusers library to import compatible and original scheduler + diffusers_library = importlib.import_module(__name__.split(".")[0]) + remapped_class = getattr(diffusers_library, remapped_class_name) + logger.info( + f"Changing class object to be of `{remapped_class_name}` type from `{previous_class_name}` type." + f"This is because `{previous_class_name}` is scheduled to be deprecated in a future version. Note that this" + " DOESN'T affect the final results." + ) + return remapped_class + else: + return old_class + + +def _determine_param_device(param_name: str, device_map: dict[str, int | str | torch.device] | None): + """ + Find the device of param_name from the device_map. + """ + if device_map is None: + return "cpu" + else: + module_name = param_name + # find next higher level module that is defined in device_map: + # bert.lm_head.weight -> bert.lm_head -> bert -> '' + while len(module_name) > 0 and module_name not in device_map: + module_name = ".".join(module_name.split(".")[:-1]) + if module_name == "" and "" not in device_map: + raise ValueError(f"{param_name} doesn't have any device set.") + return device_map[module_name] + + +def load_state_dict( + checkpoint_file: str | os.PathLike, + dduf_entries: dict[str, DDUFEntry] | None = None, + disable_mmap: bool = False, + map_location: str | torch.device = "cpu", +): + """ + Reads a checkpoint file, returning properly formatted errors if they arise. + """ + # TODO: maybe refactor a bit this part where we pass a dict here + if isinstance(checkpoint_file, dict): + return checkpoint_file + try: + file_extension = os.path.basename(checkpoint_file).split(".")[-1] + if file_extension == SAFETENSORS_FILE_EXTENSION: + if dduf_entries: + # tensors are loaded on cpu + with dduf_entries[checkpoint_file].as_mmap() as mm: + return safetensors.torch.load(mm) + if disable_mmap: + return safetensors.torch.load(open(checkpoint_file, "rb").read()) + else: + return safetensors.torch.load_file(checkpoint_file, device=map_location) + elif file_extension == GGUF_FILE_EXTENSION: + return load_gguf_checkpoint(checkpoint_file) + else: + extra_args = {} + weights_only_kwarg = {"weights_only": True} if is_torch_version(">=", "1.13") else {} + # mmap can only be used with files serialized with zipfile-based format. + if ( + isinstance(checkpoint_file, str) + and map_location != "meta" + and is_torch_version(">=", "2.1.0") + and is_zipfile(checkpoint_file) + and not disable_mmap + ): + extra_args = {"mmap": True} + return torch.load(checkpoint_file, map_location=map_location, **weights_only_kwarg, **extra_args) + except Exception as e: + try: + with open(checkpoint_file) as f: + if f.read().startswith("version"): + raise OSError( + "You seem to have cloned a repository without having git-lfs installed. Please install " + "git-lfs and run `git lfs install` followed by `git lfs pull` in the folder " + "you cloned." + ) + else: + raise ValueError( + f"Unable to locate the file {checkpoint_file} which is necessary to load this pretrained " + "model. Make sure you have saved the model properly." + ) from e + except (UnicodeDecodeError, ValueError): + raise OSError( + f"Unable to load weights from checkpoint file for '{checkpoint_file}' at '{checkpoint_file}'. " + ) + + +def load_model_dict_into_meta( + model, + state_dict: OrderedDict, + dtype: str | torch.dtype | None = None, + model_name_or_path: str | None = None, + hf_quantizer: DiffusersQuantizer | None = None, + keep_in_fp32_modules: list | None = None, + device_map: dict[str, int | str | torch.device] | None = None, + unexpected_keys: list[str] | None = None, + offload_folder: str | os.PathLike | None = None, + offload_index: dict | None = None, + state_dict_index: dict | None = None, + state_dict_folder: str | os.PathLike | None = None, +) -> list[str]: + """ + This is somewhat similar to `_load_state_dict_into_model`, but deals with a model that has some or all of its + params on a `meta` device. It replaces the model params with the data from the `state_dict` + """ + + is_quantized = hf_quantizer is not None + empty_state_dict = model.state_dict() + + for param_name, param in state_dict.items(): + if param_name not in empty_state_dict: + continue + + set_module_kwargs = {} + # We convert floating dtypes to the `dtype` passed. We also want to keep the buffers/params + # in int/uint/bool and not cast them. + # TODO: revisit cases when param.dtype == torch.float8_e4m3fn + if dtype is not None and torch.is_floating_point(param): + if keep_in_fp32_modules is not None and any( + module_to_keep_in_fp32 in param_name.split(".") for module_to_keep_in_fp32 in keep_in_fp32_modules + ): + param = param.to(torch.float32) + set_module_kwargs["dtype"] = torch.float32 + # For quantizers have save weights using torch.float8_e4m3fn + elif hf_quantizer is not None and param.dtype == getattr(torch, "float8_e4m3fn", None): + pass + else: + param = param.to(dtype) + set_module_kwargs["dtype"] = dtype + + if is_accelerate_version(">", "1.8.1"): + set_module_kwargs["non_blocking"] = True + set_module_kwargs["clear_cache"] = False + + # For compatibility with PyTorch load_state_dict which converts state dict dtype to existing dtype in model, and which + # uses `param.copy_(input_param)` that preserves the contiguity of the parameter in the model. + # Reference: https://github.com/pytorch/pytorch/blob/db79ceb110f6646523019a59bbd7b838f43d4a86/torch/nn/modules/module.py#L2040C29-L2040C29 + old_param = model + splits = param_name.split(".") + for split in splits: + old_param = getattr(old_param, split) + + if not isinstance(old_param, (torch.nn.Parameter, torch.Tensor)): + old_param = None + + if old_param is not None: + if dtype is None: + param = param.to(old_param.dtype) + + if old_param.is_contiguous(): + param = param.contiguous() + + param_device = _determine_param_device(param_name, device_map) + + # bnb params are flattened. + # gguf quants have a different shape based on the type of quantization applied + if empty_state_dict[param_name].shape != param.shape: + if ( + is_quantized + and hf_quantizer.pre_quantized + and hf_quantizer.check_if_quantized_param( + model, param, param_name, state_dict, param_device=param_device + ) + ): + hf_quantizer.check_quantized_param_shape(param_name, empty_state_dict[param_name], param) + else: + model_name_or_path_str = f"{model_name_or_path} " if model_name_or_path is not None else "" + raise ValueError( + f"Cannot load {model_name_or_path_str} because {param_name} expected shape {empty_state_dict[param_name].shape}, but got {param.shape}. If you want to instead overwrite randomly initialized weights, please make sure to pass both `low_cpu_mem_usage=False` and `ignore_mismatched_sizes=True`. For more information, see also: https://github.com/huggingface/diffusers/issues/1619#issuecomment-1345604389 as an example." + ) + if param_device == "disk": + offload_index = offload_weight(param, param_name, offload_folder, offload_index) + elif param_device == "cpu" and state_dict_index is not None: + state_dict_index = offload_weight(param, param_name, state_dict_folder, state_dict_index) + elif is_quantized and ( + hf_quantizer.check_if_quantized_param(model, param, param_name, state_dict, param_device=param_device) + ): + hf_quantizer.create_quantized_param( + model, param, param_name, param_device, state_dict, unexpected_keys, dtype=dtype + ) + else: + set_module_tensor_to_device(model, param_name, param_device, value=param, **set_module_kwargs) + + return offload_index, state_dict_index + + +def check_support_param_buffer_assignment(model_to_load, state_dict, start_prefix=""): + """ + Checks if `model_to_load` supports param buffer assignment (such as when loading in empty weights) by first + checking if the model explicitly disables it, then by ensuring that the state dict keys are a subset of the model's + parameters. + + """ + if model_to_load.device.type == "meta": + return False + + if len([key for key in state_dict if key.startswith(start_prefix)]) == 0: + return False + + # Some models explicitly do not support param buffer assignment + if not getattr(model_to_load, "_supports_param_buffer_assignment", True): + logger.debug( + f"{model_to_load.__class__.__name__} does not support param buffer assignment, loading will be slower" + ) + return False + + # If the model does, the incoming `state_dict` and the `model_to_load` must be the same dtype + first_key = next(iter(model_to_load.state_dict().keys())) + if start_prefix + first_key in state_dict: + return state_dict[start_prefix + first_key].dtype == model_to_load.state_dict()[first_key].dtype + + return False + + +def _load_shard_file( + shard_file, + model, + model_state_dict, + device_map=None, + dtype=None, + hf_quantizer=None, + keep_in_fp32_modules=None, + dduf_entries=None, + loaded_keys=None, + unexpected_keys=None, + offload_index=None, + offload_folder=None, + state_dict_index=None, + state_dict_folder=None, + ignore_mismatched_sizes=False, + low_cpu_mem_usage=False, + disable_mmap=False, +): + state_dict = load_state_dict(shard_file, dduf_entries=dduf_entries, disable_mmap=disable_mmap) + mismatched_keys = _find_mismatched_keys( + state_dict, + model_state_dict, + loaded_keys, + ignore_mismatched_sizes, + ) + error_msgs = [] + if low_cpu_mem_usage: + offload_index, state_dict_index = load_model_dict_into_meta( + model, + state_dict, + device_map=device_map, + dtype=dtype, + hf_quantizer=hf_quantizer, + keep_in_fp32_modules=keep_in_fp32_modules, + unexpected_keys=unexpected_keys, + offload_folder=offload_folder, + offload_index=offload_index, + state_dict_index=state_dict_index, + state_dict_folder=state_dict_folder, + ) + else: + assign_to_params_buffers = check_support_param_buffer_assignment(model, state_dict) + + error_msgs += _load_state_dict_into_model(model, state_dict, assign_to_params_buffers) + return offload_index, state_dict_index, mismatched_keys, error_msgs + + +def _load_shard_files_with_threadpool( + shard_files, + model, + model_state_dict, + device_map=None, + dtype=None, + hf_quantizer=None, + keep_in_fp32_modules=None, + dduf_entries=None, + loaded_keys=None, + unexpected_keys=None, + offload_index=None, + offload_folder=None, + state_dict_index=None, + state_dict_folder=None, + ignore_mismatched_sizes=False, + low_cpu_mem_usage=False, + disable_mmap=False, +): + # Do not spawn anymore workers than you need + num_workers = min(len(shard_files), DEFAULT_HF_PARALLEL_LOADING_WORKERS) + + logger.info(f"Loading model weights in parallel with {num_workers} workers...") + + error_msgs = [] + mismatched_keys = [] + + load_one = functools.partial( + _load_shard_file, + model=model, + model_state_dict=model_state_dict, + device_map=device_map, + dtype=dtype, + hf_quantizer=hf_quantizer, + keep_in_fp32_modules=keep_in_fp32_modules, + dduf_entries=dduf_entries, + loaded_keys=loaded_keys, + unexpected_keys=unexpected_keys, + offload_index=offload_index, + offload_folder=offload_folder, + state_dict_index=state_dict_index, + state_dict_folder=state_dict_folder, + ignore_mismatched_sizes=ignore_mismatched_sizes, + low_cpu_mem_usage=low_cpu_mem_usage, + disable_mmap=disable_mmap, + ) + + tqdm_kwargs = {"total": len(shard_files), "desc": "Loading checkpoint shards"} + if not is_torch_dist_rank_zero(): + tqdm_kwargs["disable"] = True + + with ThreadPoolExecutor(max_workers=num_workers) as executor: + with logging.tqdm(**tqdm_kwargs) as pbar: + futures = [executor.submit(load_one, shard_file) for shard_file in shard_files] + for future in as_completed(futures): + result = future.result() + offload_index, state_dict_index, _mismatched_keys, _error_msgs = result + error_msgs += _error_msgs + mismatched_keys += _mismatched_keys + pbar.update(1) + + return offload_index, state_dict_index, mismatched_keys, error_msgs + + +def _find_mismatched_keys( + state_dict, + model_state_dict, + loaded_keys, + ignore_mismatched_sizes, +): + mismatched_keys = [] + if ignore_mismatched_sizes: + for checkpoint_key in loaded_keys: + model_key = checkpoint_key + # If the checkpoint is sharded, we may not have the key here. + if checkpoint_key not in state_dict: + continue + + if model_key in model_state_dict and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape: + mismatched_keys.append( + (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) + ) + del state_dict[checkpoint_key] + return mismatched_keys + + +def _load_state_dict_into_model( + model_to_load, state_dict: OrderedDict, assign_to_params_buffers: bool = False +) -> list[str]: + # Convert old format to new format if needed from a PyTorch state_dict + # copy state_dict so _load_from_state_dict can modify it + state_dict = state_dict.copy() + error_msgs = [] + + # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants + # so we need to apply the function recursively. + def load(module: torch.nn.Module, prefix: str = "", assign_to_params_buffers: bool = False): + local_metadata = {} + local_metadata["assign_to_params_buffers"] = assign_to_params_buffers + if assign_to_params_buffers and not is_torch_version(">=", "2.1"): + logger.info("You need to have torch>=2.1 in order to load the model with assign_to_params_buffers=True") + args = (state_dict, prefix, local_metadata, True, [], [], error_msgs) + module._load_from_state_dict(*args) + + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + ".", assign_to_params_buffers) + + load(model_to_load, assign_to_params_buffers=assign_to_params_buffers) + + return error_msgs + + +def _fetch_index_file( + is_local, + pretrained_model_name_or_path, + subfolder, + use_safetensors, + cache_dir, + variant, + force_download, + proxies, + local_files_only, + token, + revision, + user_agent, + commit_hash, + dduf_entries: dict[str, DDUFEntry] | None = None, +): + if is_local: + index_file = Path( + pretrained_model_name_or_path, + subfolder or "", + _add_variant(SAFE_WEIGHTS_INDEX_NAME if use_safetensors else WEIGHTS_INDEX_NAME, variant), + ) + else: + index_file_in_repo = Path( + subfolder or "", + _add_variant(SAFE_WEIGHTS_INDEX_NAME if use_safetensors else WEIGHTS_INDEX_NAME, variant), + ).as_posix() + try: + index_file = _get_model_file( + pretrained_model_name_or_path, + weights_name=index_file_in_repo, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=None, + user_agent=user_agent, + commit_hash=commit_hash, + dduf_entries=dduf_entries, + ) + if not dduf_entries: + index_file = Path(index_file) + except (EntryNotFoundError, EnvironmentError): + index_file = None + + return index_file + + +def _fetch_index_file_legacy( + is_local, + pretrained_model_name_or_path, + subfolder, + use_safetensors, + cache_dir, + variant, + force_download, + proxies, + local_files_only, + token, + revision, + user_agent, + commit_hash, + dduf_entries: dict[str, DDUFEntry] | None = None, +): + if is_local: + index_file = Path( + pretrained_model_name_or_path, + subfolder or "", + SAFE_WEIGHTS_INDEX_NAME if use_safetensors else WEIGHTS_INDEX_NAME, + ).as_posix() + splits = index_file.split(".") + split_index = -3 if ".cache" in index_file else -2 + splits = splits[:-split_index] + [variant] + splits[-split_index:] + index_file = ".".join(splits) + if os.path.exists(index_file): + deprecation_message = f"This serialization format is now deprecated to standardize the serialization format between `transformers` and `diffusers`. We recommend you to remove the existing files associated with the current variant ({variant}) and re-obtain them by running a `save_pretrained()`." + deprecate("legacy_sharded_ckpts_with_variant", "1.0.0", deprecation_message, standard_warn=False) + index_file = Path(index_file) + else: + index_file = None + else: + if variant is not None: + index_file_in_repo = Path( + subfolder or "", + SAFE_WEIGHTS_INDEX_NAME if use_safetensors else WEIGHTS_INDEX_NAME, + ).as_posix() + splits = index_file_in_repo.split(".") + split_index = -2 + splits = splits[:-split_index] + [variant] + splits[-split_index:] + index_file_in_repo = ".".join(splits) + try: + index_file = _get_model_file( + pretrained_model_name_or_path, + weights_name=index_file_in_repo, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=None, + user_agent=user_agent, + commit_hash=commit_hash, + dduf_entries=dduf_entries, + ) + index_file = Path(index_file) + deprecation_message = f"This serialization format is now deprecated to standardize the serialization format between `transformers` and `diffusers`. We recommend you to remove the existing files associated with the current variant ({variant}) and re-obtain them by running a `save_pretrained()`." + deprecate("legacy_sharded_ckpts_with_variant", "1.0.0", deprecation_message, standard_warn=False) + except (EntryNotFoundError, EnvironmentError): + index_file = None + + return index_file + + +def _gguf_parse_value(_value, data_type): + if not isinstance(data_type, list): + data_type = [data_type] + if len(data_type) == 1: + data_type = data_type[0] + array_data_type = None + else: + if data_type[0] != 9: + raise ValueError("Received multiple types, therefore expected the first type to indicate an array.") + data_type, array_data_type = data_type + + if data_type in [0, 1, 2, 3, 4, 5, 10, 11]: + _value = int(_value[0]) + elif data_type in [6, 12]: + _value = float(_value[0]) + elif data_type in [7]: + _value = bool(_value[0]) + elif data_type in [8]: + _value = array("B", list(_value)).tobytes().decode() + elif data_type in [9]: + _value = _gguf_parse_value(_value, array_data_type) + return _value + + +def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False): + """ + Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed tokenizer and config + attributes. + + Args: + gguf_checkpoint_path (`str`): + The path the to GGUF file to load + return_tensors (`bool`, defaults to `True`): + Whether to read the tensors from the file and return them. Not doing so is faster and only loads the + metadata in memory. + """ + + if is_gguf_available() and is_torch_available(): + import gguf + from gguf import GGUFReader + + from ..quantizers.gguf.utils import SUPPORTED_GGUF_QUANT_TYPES, GGUFParameter + else: + logger.error( + "Loading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see " + "https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions." + ) + raise ImportError("Please install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.") + + reader = GGUFReader(gguf_checkpoint_path) + + parsed_parameters = {} + for tensor in reader.tensors: + name = tensor.name + quant_type = tensor.tensor_type + + # if the tensor is a torch supported dtype do not use GGUFParameter + is_gguf_quant = quant_type not in [gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16] + if is_gguf_quant and quant_type not in SUPPORTED_GGUF_QUANT_TYPES: + _supported_quants_str = "\n".join([str(type) for type in SUPPORTED_GGUF_QUANT_TYPES]) + raise ValueError( + ( + f"{name} has a quantization type: {str(quant_type)} which is unsupported." + "\n\nCurrently the following quantization types are supported: \n\n" + f"{_supported_quants_str}" + "\n\nTo request support for this quantization type please open an issue here: https://github.com/huggingface/diffusers" + ) + ) + + weights = torch.from_numpy(tensor.data.copy()) + parsed_parameters[name] = GGUFParameter(weights, quant_type=quant_type) if is_gguf_quant else weights + + return parsed_parameters + + +def _find_mismatched_keys(state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes): + mismatched_keys = [] + if not ignore_mismatched_sizes: + return mismatched_keys + for checkpoint_key in loaded_keys: + model_key = checkpoint_key + # If the checkpoint is sharded, we may not have the key here. + if checkpoint_key not in state_dict: + continue + + if model_key in model_state_dict and state_dict[checkpoint_key].shape != model_state_dict[model_key].shape: + mismatched_keys.append( + (checkpoint_key, state_dict[checkpoint_key].shape, model_state_dict[model_key].shape) + ) + del state_dict[checkpoint_key] + return mismatched_keys + + +def _expand_device_map(device_map, param_names): + """ + Expand a device map to return the correspondence parameter name to device. + """ + new_device_map = {} + for module, device in device_map.items(): + new_device_map.update( + {p: device for p in param_names if p == module or p.startswith(f"{module}.") or module == ""} + ) + return new_device_map + + +# Adapted from: https://github.com/huggingface/transformers/blob/0687d481e2c71544501ef9cb3eef795a6e79b1de/src/transformers/modeling_utils.py#L5859 +def _caching_allocator_warmup( + model, expanded_device_map: dict[str, torch.device], dtype: torch.dtype, hf_quantizer: DiffusersQuantizer | None +) -> None: + """ + This function warm-ups the caching allocator based on the size of the model tensors that will reside on each + device. It allows to have one large call to Malloc, instead of recursively calling it later when loading the model, + which is actually the loading speed bottleneck. Calling this function allows to cut the model loading time by a + very large margin. + """ + factor = 2 if hf_quantizer is None else hf_quantizer.get_cuda_warm_up_factor() + + # Keep only accelerator devices + accelerator_device_map = { + param: torch.device(device) + for param, device in expanded_device_map.items() + if str(device) not in ["cpu", "disk"] + } + if not accelerator_device_map: + return + + elements_per_device = defaultdict(int) + for param_name, device in accelerator_device_map.items(): + try: + p = model.get_parameter(param_name) + except AttributeError: + try: + p = model.get_buffer(param_name) + except AttributeError: + raise AttributeError(f"Parameter or buffer with name={param_name} not found in model") + # TODO: account for TP when needed. + elements_per_device[device] += p.numel() + + # This will kick off the caching allocator to avoid having to Malloc afterwards + for device, elem_count in elements_per_device.items(): + warmup_elems = max(1, elem_count // factor) + _ = torch.empty(warmup_elems, dtype=dtype, device=device, requires_grad=False) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_flax_pytorch_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_flax_pytorch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d64c48a9601ec75afc6685174ede20673df5d01f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_flax_pytorch_utils.py @@ -0,0 +1,135 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch - Flax general utilities.""" + +import re + +import jax.numpy as jnp +from flax.traverse_util import flatten_dict, unflatten_dict +from jax.random import PRNGKey + +from ..utils import logging + + +logger = logging.get_logger(__name__) + + +def rename_key(key): + regex = r"\w+[.]\d+" + pats = re.findall(regex, key) + for pat in pats: + key = key.replace(pat, "_".join(pat.split("."))) + return key + + +##################### +# PyTorch => Flax # +##################### + + +# Adapted from https://github.com/huggingface/transformers/blob/c603c80f46881ae18b2ca50770ef65fa4033eacd/src/transformers/modeling_flax_pytorch_utils.py#L69 +# and https://github.com/patil-suraj/stable-diffusion-jax/blob/main/stable_diffusion_jax/convert_diffusers_to_jax.py +def rename_key_and_reshape_tensor(pt_tuple_key, pt_tensor, random_flax_state_dict): + """Rename PT weight names to corresponding Flax weight names and reshape tensor if necessary""" + # conv norm or layer norm + renamed_pt_tuple_key = pt_tuple_key[:-1] + ("scale",) + + # rename attention layers + if len(pt_tuple_key) > 1: + for rename_from, rename_to in ( + ("to_out_0", "proj_attn"), + ("to_k", "key"), + ("to_v", "value"), + ("to_q", "query"), + ): + if pt_tuple_key[-2] == rename_from: + weight_name = pt_tuple_key[-1] + weight_name = "kernel" if weight_name == "weight" else weight_name + renamed_pt_tuple_key = pt_tuple_key[:-2] + (rename_to, weight_name) + if renamed_pt_tuple_key in random_flax_state_dict: + assert random_flax_state_dict[renamed_pt_tuple_key].shape == pt_tensor.T.shape + return renamed_pt_tuple_key, pt_tensor.T + + if ( + any("norm" in str_ for str_ in pt_tuple_key) + and (pt_tuple_key[-1] == "bias") + and (pt_tuple_key[:-1] + ("bias",) not in random_flax_state_dict) + and (pt_tuple_key[:-1] + ("scale",) in random_flax_state_dict) + ): + renamed_pt_tuple_key = pt_tuple_key[:-1] + ("scale",) + return renamed_pt_tuple_key, pt_tensor + elif pt_tuple_key[-1] in ["weight", "gamma"] and pt_tuple_key[:-1] + ("scale",) in random_flax_state_dict: + renamed_pt_tuple_key = pt_tuple_key[:-1] + ("scale",) + return renamed_pt_tuple_key, pt_tensor + + # embedding + if pt_tuple_key[-1] == "weight" and pt_tuple_key[:-1] + ("embedding",) in random_flax_state_dict: + pt_tuple_key = pt_tuple_key[:-1] + ("embedding",) + return renamed_pt_tuple_key, pt_tensor + + # conv layer + renamed_pt_tuple_key = pt_tuple_key[:-1] + ("kernel",) + if pt_tuple_key[-1] == "weight" and pt_tensor.ndim == 4: + pt_tensor = pt_tensor.transpose(2, 3, 1, 0) + return renamed_pt_tuple_key, pt_tensor + + # linear layer + renamed_pt_tuple_key = pt_tuple_key[:-1] + ("kernel",) + if pt_tuple_key[-1] == "weight": + pt_tensor = pt_tensor.T + return renamed_pt_tuple_key, pt_tensor + + # old PyTorch layer norm weight + renamed_pt_tuple_key = pt_tuple_key[:-1] + ("weight",) + if pt_tuple_key[-1] == "gamma": + return renamed_pt_tuple_key, pt_tensor + + # old PyTorch layer norm bias + renamed_pt_tuple_key = pt_tuple_key[:-1] + ("bias",) + if pt_tuple_key[-1] == "beta": + return renamed_pt_tuple_key, pt_tensor + + return pt_tuple_key, pt_tensor + + +def convert_pytorch_state_dict_to_flax(pt_state_dict, flax_model, init_key=42): + # Step 1: Convert pytorch tensor to numpy + pt_state_dict = {k: v.numpy() for k, v in pt_state_dict.items()} + + # Step 2: Since the model is stateless, get random Flax params + random_flax_params = flax_model.init_weights(PRNGKey(init_key)) + + random_flax_state_dict = flatten_dict(random_flax_params) + flax_state_dict = {} + + # Need to change some parameters name to match Flax names + for pt_key, pt_tensor in pt_state_dict.items(): + renamed_pt_key = rename_key(pt_key) + pt_tuple_key = tuple(renamed_pt_key.split(".")) + + # Correctly rename weight parameters + flax_key, flax_tensor = rename_key_and_reshape_tensor(pt_tuple_key, pt_tensor, random_flax_state_dict) + + if flax_key in random_flax_state_dict: + if flax_tensor.shape != random_flax_state_dict[flax_key].shape: + raise ValueError( + f"PyTorch checkpoint seems to be incorrect. Weight {pt_key} was expected to be of shape " + f"{random_flax_state_dict[flax_key].shape}, but is {flax_tensor.shape}." + ) + + # also add unexpected weight so that warning is thrown + flax_state_dict[flax_key] = jnp.asarray(flax_tensor) + + return unflatten_dict(flax_state_dict) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_flax_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_flax_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9f62bd7199e08e68114319615c949be34e25dd7c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_flax_utils.py @@ -0,0 +1,558 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pickle import UnpicklingError +from typing import Any + +import jax +import jax.numpy as jnp +import msgpack.exceptions +from flax.core.frozen_dict import FrozenDict, unfreeze +from flax.serialization import from_bytes, to_bytes +from flax.traverse_util import flatten_dict, unflatten_dict +from huggingface_hub import create_repo, hf_hub_download +from huggingface_hub.utils import ( + EntryNotFoundError, + HfHubHTTPError, + RepositoryNotFoundError, + RevisionNotFoundError, + validate_hf_hub_args, +) + +from .. import __version__, is_torch_available +from ..utils import ( + CONFIG_NAME, + FLAX_WEIGHTS_NAME, + HUGGINGFACE_CO_RESOLVE_ENDPOINT, + WEIGHTS_NAME, + PushToHubMixin, + logging, +) +from .modeling_flax_pytorch_utils import convert_pytorch_state_dict_to_flax + + +logger = logging.get_logger(__name__) + + +class FlaxModelMixin(PushToHubMixin): + r""" + Base class for all Flax models. + + [`FlaxModelMixin`] takes care of storing the model configuration and provides methods for loading, downloading and + saving models. + + - **config_name** ([`str`]) -- Filename to save a model to when calling [`~FlaxModelMixin.save_pretrained`]. + """ + + config_name = CONFIG_NAME + _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"] + _flax_internal_args = ["name", "parent", "dtype"] + + @classmethod + def _from_config(cls, config, **kwargs): + """ + All context managers that the model should be initialized under go here. + """ + return cls(config, **kwargs) + + def _cast_floating_to(self, params: dict | FrozenDict, dtype: jnp.dtype, mask: Any = None) -> Any: + """ + Helper method to cast floating-point values of given parameter `PyTree` to given `dtype`. + """ + + # taken from https://github.com/deepmind/jmp/blob/3a8318abc3292be38582794dbf7b094e6583b192/jmp/_src/policy.py#L27 + def conditional_cast(param): + if isinstance(param, jnp.ndarray) and jnp.issubdtype(param.dtype, jnp.floating): + param = param.astype(dtype) + return param + + if mask is None: + return jax.tree_map(conditional_cast, params) + + flat_params = flatten_dict(params) + flat_mask, _ = jax.tree_flatten(mask) + + for masked, key in zip(flat_mask, flat_params.keys()): + if masked: + param = flat_params[key] + flat_params[key] = conditional_cast(param) + + return unflatten_dict(flat_params) + + def to_bf16(self, params: dict | FrozenDict, mask: Any = None): + r""" + Cast the floating-point `params` to `jax.numpy.bfloat16`. This returns a new `params` tree and does not cast + the `params` in place. + + This method can be used on a TPU to explicitly convert the model parameters to bfloat16 precision to do full + half-precision training or to save weights in bfloat16 for inference in order to save memory and improve speed. + + Arguments: + params (`dict | FrozenDict`): + A `PyTree` of model parameters. + mask (`dict | FrozenDict`): + A `PyTree` with same structure as the `params` tree. The leaves should be booleans. It should be `True` + for params you want to cast, and `False` for those you want to skip. + + Examples: + + ```python + >>> from diffusers import FlaxUNet2DConditionModel + + >>> # load model + >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> # By default, the model parameters will be in fp32 precision, to cast these to bfloat16 precision + >>> params = model.to_bf16(params) + >>> # If you don't want to cast certain parameters (for example layer norm bias and scale) + >>> # then pass the mask as follows + >>> from flax import traverse_util + + >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> flat_params = traverse_util.flatten_dict(params) + >>> mask = { + ... path: (path[-2] != ("LayerNorm", "bias") and path[-2:] != ("LayerNorm", "scale")) + ... for path in flat_params + ... } + >>> mask = traverse_util.unflatten_dict(mask) + >>> params = model.to_bf16(params, mask) + ```""" + return self._cast_floating_to(params, jnp.bfloat16, mask) + + def to_fp32(self, params: dict | FrozenDict, mask: Any = None): + r""" + Cast the floating-point `params` to `jax.numpy.float32`. This method can be used to explicitly convert the + model parameters to fp32 precision. This returns a new `params` tree and does not cast the `params` in place. + + Arguments: + params (`dict | FrozenDict`): + A `PyTree` of model parameters. + mask (`dict | FrozenDict`): + A `PyTree` with same structure as the `params` tree. The leaves should be booleans. It should be `True` + for params you want to cast, and `False` for those you want to skip. + + Examples: + + ```python + >>> from diffusers import FlaxUNet2DConditionModel + + >>> # Download model and configuration from huggingface.co + >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> # By default, the model params will be in fp32, to illustrate the use of this method, + >>> # we'll first cast to fp16 and back to fp32 + >>> params = model.to_f16(params) + >>> # now cast back to fp32 + >>> params = model.to_fp32(params) + ```""" + return self._cast_floating_to(params, jnp.float32, mask) + + def to_fp16(self, params: dict | FrozenDict, mask: Any = None): + r""" + Cast the floating-point `params` to `jax.numpy.float16`. This returns a new `params` tree and does not cast the + `params` in place. + + This method can be used on a GPU to explicitly convert the model parameters to float16 precision to do full + half-precision training or to save weights in float16 for inference in order to save memory and improve speed. + + Arguments: + params (`dict | FrozenDict`): + A `PyTree` of model parameters. + mask (`dict | FrozenDict`): + A `PyTree` with same structure as the `params` tree. The leaves should be booleans. It should be `True` + for params you want to cast, and `False` for those you want to skip. + + Examples: + + ```python + >>> from diffusers import FlaxUNet2DConditionModel + + >>> # load model + >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> # By default, the model params will be in fp32, to cast these to float16 + >>> params = model.to_fp16(params) + >>> # If you want don't want to cast certain parameters (for example layer norm bias and scale) + >>> # then pass the mask as follows + >>> from flax import traverse_util + + >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> flat_params = traverse_util.flatten_dict(params) + >>> mask = { + ... path: (path[-2] != ("LayerNorm", "bias") and path[-2:] != ("LayerNorm", "scale")) + ... for path in flat_params + ... } + >>> mask = traverse_util.unflatten_dict(mask) + >>> params = model.to_fp16(params, mask) + ```""" + return self._cast_floating_to(params, jnp.float16, mask) + + def init_weights(self, rng: jax.Array) -> dict: + raise NotImplementedError(f"init_weights method has to be implemented for {self}") + + @classmethod + @validate_hf_hub_args + def from_pretrained( + cls, + pretrained_model_name_or_path: str | os.PathLike, + dtype: jnp.dtype = jnp.float32, + *model_args, + **kwargs, + ): + r""" + Instantiate a pretrained Flax model from a pretrained model configuration. + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`): + Can be either: + + - A string, the *model id* (for example `stable-diffusion-v1-5/stable-diffusion-v1-5`) of a + pretrained model hosted on the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + using [`~FlaxModelMixin.save_pretrained`]. + dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`): + The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and + `jax.numpy.bfloat16` (on TPUs). + + This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If + specified, all the computation will be performed with the given `dtype`. + + > [!TIP] > This only specifies the dtype of the *computation* and does not influence the dtype of model + > parameters. > > If you wish to change the dtype of the model parameters, see + [`~FlaxModelMixin.to_fp16`] and > [`~FlaxModelMixin.to_bf16`]. + + model_args (sequence of positional arguments, *optional*): + All remaining positional arguments are passed to the underlying model's `__init__` method. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + from_pt (`bool`, *optional*, defaults to `False`): + Load the model weights from a PyTorch checkpoint save file. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to update the configuration object (after it is loaded) and initiate the model (for + example, `output_attentions=True`). Behaves differently depending on whether a `config` is provided or + automatically loaded: + + - If a configuration is provided with `config`, `kwargs` are directly passed to the underlying + model's `__init__` method (we assume all relevant updates to the configuration have already been + done). + - If a configuration is not provided, `kwargs` are first passed to the configuration class + initialization function [`~ConfigMixin.from_config`]. Each key of the `kwargs` that corresponds + to a configuration attribute is used to override said attribute with the supplied `kwargs` value. + Remaining keys that do not correspond to any configuration attribute are passed to the underlying + model's `__init__` function. + + Examples: + + ```python + >>> from diffusers import FlaxUNet2DConditionModel + + >>> # Download model and configuration from huggingface.co and cache. + >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> # Model was saved using *save_pretrained('./test/saved_model/')* (for example purposes, not runnable). + >>> model, params = FlaxUNet2DConditionModel.from_pretrained("./test/saved_model/") + ``` + + If you get the error message below, you need to finetune the weights for your downstream task: + + ```bash + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + ``` + """ + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + config = kwargs.pop("config", None) + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + from_pt = kwargs.pop("from_pt", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", False) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + + user_agent = { + "diffusers": __version__, + "file_type": "model", + "framework": "flax", + } + + # Load config if we don't provide one + if config is None: + config, unused_kwargs = cls.load_config( + pretrained_model_name_or_path, + cache_dir=cache_dir, + return_unused_kwargs=True, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + **kwargs, + ) + + model, model_kwargs = cls.from_config(config, dtype=dtype, return_unused_kwargs=True, **unused_kwargs) + + # Load model + pretrained_path_with_subfolder = ( + pretrained_model_name_or_path + if subfolder is None + else os.path.join(pretrained_model_name_or_path, subfolder) + ) + if os.path.isdir(pretrained_path_with_subfolder): + if from_pt: + if not os.path.isfile(os.path.join(pretrained_path_with_subfolder, WEIGHTS_NAME)): + raise EnvironmentError( + f"Error no file named {WEIGHTS_NAME} found in directory {pretrained_path_with_subfolder} " + ) + model_file = os.path.join(pretrained_path_with_subfolder, WEIGHTS_NAME) + elif os.path.isfile(os.path.join(pretrained_path_with_subfolder, FLAX_WEIGHTS_NAME)): + # Load from a Flax checkpoint + model_file = os.path.join(pretrained_path_with_subfolder, FLAX_WEIGHTS_NAME) + # Check if pytorch weights exist instead + elif os.path.isfile(os.path.join(pretrained_path_with_subfolder, WEIGHTS_NAME)): + raise EnvironmentError( + f"{WEIGHTS_NAME} file found in directory {pretrained_path_with_subfolder}. Please load the model" + " using `from_pt=True`." + ) + else: + raise EnvironmentError( + f"Error no file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME} found in directory " + f"{pretrained_path_with_subfolder}." + ) + else: + try: + model_file = hf_hub_download( + pretrained_model_name_or_path, + filename=FLAX_WEIGHTS_NAME if not from_pt else WEIGHTS_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + subfolder=subfolder, + revision=revision, + ) + + except RepositoryNotFoundError: + raise EnvironmentError( + f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier " + "listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a " + "token having permission to this repo with `token` or log in with `hf auth login`." + ) + except RevisionNotFoundError: + raise EnvironmentError( + f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for " + "this model name. Check the model page at " + f"'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions." + ) + except EntryNotFoundError: + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named {FLAX_WEIGHTS_NAME}." + ) + except HfHubHTTPError as err: + raise EnvironmentError( + f"There was a specific connection error when trying to load {pretrained_model_name_or_path}:\n" + f"{err}" + ) + except ValueError: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME}.\nCheckout your" + " internet connection or see how to run the library in offline mode at" + " 'https://huggingface.co/docs/transformers/installation#offline-mode'." + ) + except EnvironmentError: + raise EnvironmentError( + f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing a file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME}." + ) + + if from_pt: + if is_torch_available(): + from .modeling_utils import load_state_dict + else: + raise EnvironmentError( + "Can't load the model in PyTorch format because PyTorch is not installed. " + "Please, install PyTorch or use native Flax weights." + ) + + # Step 1: Get the pytorch file + pytorch_model_file = load_state_dict(model_file) + + # Step 2: Convert the weights + state = convert_pytorch_state_dict_to_flax(pytorch_model_file, model) + else: + try: + with open(model_file, "rb") as state_f: + state = from_bytes(cls, state_f.read()) + except (UnpicklingError, msgpack.exceptions.ExtraData) as e: + try: + with open(model_file) as f: + if f.read().startswith("version"): + raise OSError( + "You seem to have cloned a repository without having git-lfs installed. Please" + " install git-lfs and run `git lfs install` followed by `git lfs pull` in the" + " folder you cloned." + ) + else: + raise ValueError from e + except (UnicodeDecodeError, ValueError): + raise EnvironmentError(f"Unable to convert {model_file} to Flax deserializable object. ") + # make sure all arrays are stored as jnp.ndarray + # NOTE: This is to prevent a bug this will be fixed in Flax >= v0.3.4: + # https://github.com/google/flax/issues/1261 + state = jax.tree_util.tree_map(lambda x: jax.device_put(x, jax.local_devices(backend="cpu")[0]), state) + + # flatten dicts + state = flatten_dict(state) + + params_shape_tree = jax.eval_shape(model.init_weights, rng=jax.random.PRNGKey(0)) + required_params = set(flatten_dict(unfreeze(params_shape_tree)).keys()) + + shape_state = flatten_dict(unfreeze(params_shape_tree)) + + missing_keys = required_params - set(state.keys()) + unexpected_keys = set(state.keys()) - required_params + + if missing_keys: + logger.warning( + f"The checkpoint {pretrained_model_name_or_path} is missing required keys: {missing_keys}. " + "Make sure to call model.init_weights to initialize the missing weights." + ) + cls._missing_keys = missing_keys + + for key in state.keys(): + if key in shape_state and state[key].shape != shape_state[key].shape: + raise ValueError( + f"Trying to load the pretrained weight for {key} failed: checkpoint has shape " + f"{state[key].shape} which is incompatible with the model shape {shape_state[key].shape}. " + ) + + # remove unexpected keys to not be saved again + for unexpected_key in unexpected_keys: + del state[unexpected_key] + + if len(unexpected_keys) > 0: + logger.warning( + f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when" + f" initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are" + f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or" + " with another architecture." + ) + else: + logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n") + + if len(missing_keys) > 0: + logger.warning( + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably" + " TRAIN this model on a down-stream task to be able to use it for predictions and inference." + ) + else: + logger.info( + f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at" + f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the checkpoint" + f" was trained on, you can already use {model.__class__.__name__} for predictions without further" + " training." + ) + + return model, unflatten_dict(state) + + def save_pretrained( + self, + save_directory: str | os.PathLike, + params: dict | FrozenDict, + is_main_process: bool = True, + push_to_hub: bool = False, + **kwargs, + ): + """ + Save a model and its configuration file to a directory so that it can be reloaded using the + [`~FlaxModelMixin.from_pretrained`] class method. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to save a model and its configuration file to. Will be created if it doesn't exist. + params (`dict | FrozenDict`): + A `PyTree` of model parameters. + is_main_process (`bool`, *optional*, defaults to `True`): + Whether the process calling this is the main process or not. Useful during distributed training and you + need to call this function on all processes. In this case, set `is_main_process=True` only on the main + process to avoid race conditions. + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the + repository you want to push to with `repo_id` (will default to the name of `save_directory` in your + namespace). + kwargs (`dict[str, Any]`, *optional*): + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + if os.path.isfile(save_directory): + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") + return + + os.makedirs(save_directory, exist_ok=True) + + if push_to_hub: + commit_message = kwargs.pop("commit_message", None) + private = kwargs.pop("private", None) + create_pr = kwargs.pop("create_pr", False) + token = kwargs.pop("token", None) + repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id + + model_to_save = self + + # Attach architecture to the config + # Save the config + if is_main_process: + model_to_save.save_config(save_directory) + + # save model + output_model_file = os.path.join(save_directory, FLAX_WEIGHTS_NAME) + with open(output_model_file, "wb") as f: + model_bytes = to_bytes(params) + f.write(model_bytes) + + logger.info(f"Model weights saved in {output_model_file}") + + if push_to_hub: + self._upload_folder( + save_directory, + repo_id, + token=token, + commit_message=commit_message, + create_pr=create_pr, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_outputs.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_outputs.py new file mode 100644 index 0000000000000000000000000000000000000000..0120a34d9052fe6b499b519ba4366e7a088f7910 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_outputs.py @@ -0,0 +1,31 @@ +from dataclasses import dataclass + +from ..utils import BaseOutput + + +@dataclass +class AutoencoderKLOutput(BaseOutput): + """ + Output of AutoencoderKL encoding method. + + Args: + latent_dist (`DiagonalGaussianDistribution`): + Encoded outputs of `Encoder` represented as the mean and logvar of `DiagonalGaussianDistribution`. + `DiagonalGaussianDistribution` allows for sampling latents from the distribution. + """ + + latent_dist: "DiagonalGaussianDistribution" # noqa: F821 + + +@dataclass +class Transformer2DModelOutput(BaseOutput): + """ + The output of [`Transformer2DModel`]. + + Args: + sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` or `(batch size, num_vector_embeds - 1, num_latent_pixels)` if [`Transformer2DModel`] is discrete): + The hidden states output conditioned on the `encoder_hidden_states` input. If discrete, returns probability + distributions for the unnoised latent pixels. + """ + + sample: "torch.Tensor" # noqa: F821 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_pytorch_flax_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_pytorch_flax_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ada55073dd5555084913b1bffbe747329509a182 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_pytorch_flax_utils.py @@ -0,0 +1,161 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch - Flax general utilities.""" + +from pickle import UnpicklingError + +import jax +import jax.numpy as jnp +import numpy as np +from flax.serialization import from_bytes +from flax.traverse_util import flatten_dict + +from ..utils import logging + + +logger = logging.get_logger(__name__) + + +##################### +# Flax => PyTorch # +##################### + + +# from https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_flax_pytorch_utils.py#L224-L352 +def load_flax_checkpoint_in_pytorch_model(pt_model, model_file): + try: + with open(model_file, "rb") as flax_state_f: + flax_state = from_bytes(None, flax_state_f.read()) + except UnpicklingError as e: + try: + with open(model_file) as f: + if f.read().startswith("version"): + raise OSError( + "You seem to have cloned a repository without having git-lfs installed. Please" + " install git-lfs and run `git lfs install` followed by `git lfs pull` in the" + " folder you cloned." + ) + else: + raise ValueError from e + except (UnicodeDecodeError, ValueError): + raise EnvironmentError(f"Unable to convert {model_file} to Flax deserializable object. ") + + return load_flax_weights_in_pytorch_model(pt_model, flax_state) + + +def load_flax_weights_in_pytorch_model(pt_model, flax_state): + """Load flax checkpoints in a PyTorch model""" + + try: + import torch # noqa: F401 + except ImportError: + logger.error( + "Loading Flax weights in PyTorch requires both PyTorch and Flax to be installed. Please see" + " https://pytorch.org/ and https://flax.readthedocs.io/en/latest/installation.html for installation" + " instructions." + ) + raise + + # check if we have bf16 weights + is_type_bf16 = flatten_dict(jax.tree_util.tree_map(lambda x: x.dtype == jnp.bfloat16, flax_state)).values() + if any(is_type_bf16): + # convert all weights to fp32 if they are bf16 since torch.from_numpy can-not handle bf16 + + # and bf16 is not fully supported in PT yet. + logger.warning( + "Found ``bfloat16`` weights in Flax model. Casting all ``bfloat16`` weights to ``float32`` " + "before loading those in PyTorch model." + ) + flax_state = jax.tree_util.tree_map( + lambda params: params.astype(np.float32) if params.dtype == jnp.bfloat16 else params, flax_state + ) + + pt_model.base_model_prefix = "" + + flax_state_dict = flatten_dict(flax_state, sep=".") + pt_model_dict = pt_model.state_dict() + + # keep track of unexpected & missing keys + unexpected_keys = [] + missing_keys = set(pt_model_dict.keys()) + + for flax_key_tuple, flax_tensor in flax_state_dict.items(): + flax_key_tuple_array = flax_key_tuple.split(".") + + if flax_key_tuple_array[-1] == "kernel" and flax_tensor.ndim == 4: + flax_key_tuple_array = flax_key_tuple_array[:-1] + ["weight"] + flax_tensor = jnp.transpose(flax_tensor, (3, 2, 0, 1)) + elif flax_key_tuple_array[-1] == "kernel": + flax_key_tuple_array = flax_key_tuple_array[:-1] + ["weight"] + flax_tensor = flax_tensor.T + elif flax_key_tuple_array[-1] == "scale": + flax_key_tuple_array = flax_key_tuple_array[:-1] + ["weight"] + + if "time_embedding" not in flax_key_tuple_array: + for i, flax_key_tuple_string in enumerate(flax_key_tuple_array): + flax_key_tuple_array[i] = ( + flax_key_tuple_string.replace("_0", ".0") + .replace("_1", ".1") + .replace("_2", ".2") + .replace("_3", ".3") + .replace("_4", ".4") + .replace("_5", ".5") + .replace("_6", ".6") + .replace("_7", ".7") + .replace("_8", ".8") + .replace("_9", ".9") + ) + + flax_key = ".".join(flax_key_tuple_array) + + if flax_key in pt_model_dict: + if flax_tensor.shape != pt_model_dict[flax_key].shape: + raise ValueError( + f"Flax checkpoint seems to be incorrect. Weight {flax_key_tuple} was expected " + f"to be of shape {pt_model_dict[flax_key].shape}, but is {flax_tensor.shape}." + ) + else: + # add weight to pytorch dict + flax_tensor = np.asarray(flax_tensor) if not isinstance(flax_tensor, np.ndarray) else flax_tensor + pt_model_dict[flax_key] = torch.from_numpy(flax_tensor) + # remove from missing keys + missing_keys.remove(flax_key) + else: + # weight is not expected by PyTorch model + unexpected_keys.append(flax_key) + + pt_model.load_state_dict(pt_model_dict) + + # re-transform missing_keys to list + missing_keys = list(missing_keys) + + if len(unexpected_keys) > 0: + logger.warning( + "Some weights of the Flax model were not used when initializing the PyTorch model" + f" {pt_model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are initializing" + f" {pt_model.__class__.__name__} from a Flax model trained on another task or with another architecture" + " (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).\n- This" + f" IS NOT expected if you are initializing {pt_model.__class__.__name__} from a Flax model that you expect" + " to be exactly identical (e.g. initializing a BertForSequenceClassification model from a" + " FlaxBertForSequenceClassification model)." + ) + if len(missing_keys) > 0: + logger.warning( + f"Some weights of {pt_model.__class__.__name__} were not initialized from the Flax model and are newly" + f" initialized: {missing_keys}\nYou should probably TRAIN this model on a down-stream task to be able to" + " use it for predictions and inference." + ) + + return pt_model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0901840679e303ee2f9a738d421e42f0e89db504 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/modeling_utils.py @@ -0,0 +1,2045 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import functools +import inspect +import itertools +import json +import os +import re +import shutil +import tempfile +from collections import OrderedDict +from contextlib import ExitStack, contextmanager +from functools import wraps +from pathlib import Path +from typing import Any, Callable, ContextManager, Type + +import safetensors +import torch +import torch.utils.checkpoint +from huggingface_hub import DDUFEntry, create_repo, split_torch_state_dict_into_shards +from huggingface_hub.utils import validate_hf_hub_args +from torch import Tensor, nn +from typing_extensions import Self + +from .. import __version__ +from ..quantizers import DiffusersAutoQuantizer, DiffusersQuantizer +from ..quantizers.quantization_config import QuantizationMethod +from ..utils import ( + CONFIG_NAME, + FLAX_WEIGHTS_NAME, + HF_ENABLE_PARALLEL_LOADING, + SAFE_WEIGHTS_INDEX_NAME, + SAFETENSORS_WEIGHTS_NAME, + WEIGHTS_INDEX_NAME, + WEIGHTS_NAME, + _add_variant, + _get_checkpoint_shard_files, + _get_model_file, + deprecate, + is_accelerate_available, + is_bitsandbytes_available, + is_bitsandbytes_version, + is_peft_available, + is_torch_version, + logging, +) +from ..utils.distributed_utils import is_torch_dist_rank_zero +from ..utils.hub_utils import PushToHubMixin, load_or_create_model_card, populate_model_card +from ..utils.torch_utils import empty_device_cache +from ._modeling_parallel import ContextParallelConfig, ContextParallelModelPlan, ParallelConfig +from .model_loading_utils import ( + _caching_allocator_warmup, + _determine_device_map, + _expand_device_map, + _fetch_index_file, + _fetch_index_file_legacy, + _load_shard_file, + _load_shard_files_with_threadpool, + load_state_dict, +) + + +class ContextManagers: + """ + Wrapper for `contextlib.ExitStack` which enters a collection of context managers. Adaptation of `ContextManagers` + in the `fastcore` library. + """ + + def __init__(self, context_managers: list[ContextManager]): + self.context_managers = context_managers + self.stack = ExitStack() + + def __enter__(self): + for context_manager in self.context_managers: + self.stack.enter_context(context_manager) + + def __exit__(self, *args, **kwargs): + self.stack.__exit__(*args, **kwargs) + + +logger = logging.get_logger(__name__) + +_REGEX_SHARD = re.compile(r"(.*?)-\d{5}-of-\d{5}") + +TORCH_INIT_FUNCTIONS = { + "uniform_": nn.init.uniform_, + "normal_": nn.init.normal_, + "trunc_normal_": nn.init.trunc_normal_, + "constant_": nn.init.constant_, + "xavier_uniform_": nn.init.xavier_uniform_, + "xavier_normal_": nn.init.xavier_normal_, + "kaiming_uniform_": nn.init.kaiming_uniform_, + "kaiming_normal_": nn.init.kaiming_normal_, + "uniform": nn.init.uniform, + "normal": nn.init.normal, + "xavier_uniform": nn.init.xavier_uniform, + "xavier_normal": nn.init.xavier_normal, + "kaiming_uniform": nn.init.kaiming_uniform, + "kaiming_normal": nn.init.kaiming_normal, +} + +if is_torch_version(">=", "1.9.0"): + _LOW_CPU_MEM_USAGE_DEFAULT = True +else: + _LOW_CPU_MEM_USAGE_DEFAULT = False + + +if is_accelerate_available(): + import accelerate + from accelerate import dispatch_model + from accelerate.utils import load_offloaded_weights, save_offload_index + + +def get_parameter_device(parameter: torch.nn.Module) -> torch.device: + from ..hooks.group_offloading import _get_group_onload_device + + try: + # Try to get the onload device from the group offloading hook + return _get_group_onload_device(parameter) + except ValueError: + pass + + try: + # If the onload device is not available due to no group offloading hooks, try to get the device + # from the first parameter or buffer + parameters_and_buffers = itertools.chain(parameter.parameters(), parameter.buffers()) + return next(parameters_and_buffers).device + except StopIteration: + # For torch.nn.DataParallel compatibility in PyTorch 1.5 + + def find_tensor_attributes(module: torch.nn.Module) -> list[tuple[str, Tensor]]: + tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)] + return tuples + + gen = parameter._named_members(get_members_fn=find_tensor_attributes) + first_tuple = next(gen) + return first_tuple[1].device + + +def get_parameter_dtype(parameter: torch.nn.Module) -> torch.dtype: + """ + Returns the first found floating dtype in parameters if there is one, otherwise returns the last dtype it found. + """ + # 1. Check if we have attached any dtype modifying hooks (eg. layerwise casting) + if isinstance(parameter, nn.Module): + for name, submodule in parameter.named_modules(): + if not hasattr(submodule, "_diffusers_hook"): + continue + registry = submodule._diffusers_hook + hook = registry.get_hook("layerwise_casting") + if hook is not None: + return hook.compute_dtype + + # 2. If no dtype modifying hooks are attached, return the dtype of the first floating point parameter/buffer + last_dtype = None + + for name, param in parameter.named_parameters(): + last_dtype = param.dtype + if ( + hasattr(parameter, "_keep_in_fp32_modules") + and parameter._keep_in_fp32_modules + and any(m in name for m in parameter._keep_in_fp32_modules) + ): + continue + + if param.is_floating_point(): + return param.dtype + + for buffer in parameter.buffers(): + last_dtype = buffer.dtype + if buffer.is_floating_point(): + return buffer.dtype + + if last_dtype is not None: + # if no floating dtype was found return whatever the first dtype is + return last_dtype + + # For nn.DataParallel compatibility in PyTorch > 1.5 + def find_tensor_attributes(module: nn.Module) -> list[tuple[str, Tensor]]: + tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)] + return tuples + + gen = parameter._named_members(get_members_fn=find_tensor_attributes) + last_tuple = None + for tuple in gen: + last_tuple = tuple + if tuple[1].is_floating_point(): + return tuple[1].dtype + + if last_tuple is not None: + # fallback to the last dtype + return last_tuple[1].dtype + + +@contextmanager +def no_init_weights(): + """ + Context manager to globally disable weight initialization to speed up loading large models. To do that, all the + torch.nn.init function are all replaced with skip. + """ + + def _skip_init(*args, **kwargs): + pass + + for name, init_func in TORCH_INIT_FUNCTIONS.items(): + setattr(torch.nn.init, name, _skip_init) + try: + yield + finally: + # Restore the original initialization functions + for name, init_func in TORCH_INIT_FUNCTIONS.items(): + setattr(torch.nn.init, name, init_func) + + +class ModelMixin(torch.nn.Module, PushToHubMixin): + r""" + Base class for all models. + + [`ModelMixin`] takes care of storing the model configuration and provides methods for loading, downloading and + saving models. + + - **config_name** ([`str`]) -- Filename to save a model to when calling [`~models.ModelMixin.save_pretrained`]. + """ + + config_name = CONFIG_NAME + _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"] + _supports_gradient_checkpointing = False + _keys_to_ignore_on_load_unexpected = None + _no_split_modules = None + _keep_in_fp32_modules = None + _skip_layerwise_casting_patterns = None + _supports_group_offloading = True + _repeated_blocks = [] + _parallel_config = None + _cp_plan = None + _skip_keys = None + + def __init__(self): + super().__init__() + + self._gradient_checkpointing_func = None + + def __getattr__(self, name: str) -> Any: + """The only reason we overwrite `getattr` here is to gracefully deprecate accessing + config attributes directly. See https://github.com/huggingface/diffusers/pull/3129 We need to overwrite + __getattr__ here in addition so that we don't trigger `torch.nn.Module`'s __getattr__': + https://pytorch.org/docs/stable/_modules/torch/nn/modules/module.html#Module + """ + + is_in_config = "_internal_dict" in self.__dict__ and hasattr(self.__dict__["_internal_dict"], name) + is_attribute = name in self.__dict__ + + if is_in_config and not is_attribute: + deprecation_message = f"Accessing config attribute `{name}` directly via '{type(self).__name__}' object attribute is deprecated. Please access '{name}' over '{type(self).__name__}'s config object instead, e.g. 'unet.config.{name}'." + deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False, stacklevel=3) + return self._internal_dict[name] + + # call PyTorch's https://pytorch.org/docs/stable/_modules/torch/nn/modules/module.html#Module + return super().__getattr__(name) + + @property + def is_gradient_checkpointing(self) -> bool: + """ + Whether gradient checkpointing is activated for this model or not. + """ + return any(hasattr(m, "gradient_checkpointing") and m.gradient_checkpointing for m in self.modules()) + + def enable_gradient_checkpointing(self, gradient_checkpointing_func: Callable | None = None) -> None: + """ + Activates gradient checkpointing for the current model (may be referred to as *activation checkpointing* or + *checkpoint activations* in other frameworks). + + Args: + gradient_checkpointing_func (`Callable`, *optional*): + The function to use for gradient checkpointing. If `None`, the default PyTorch checkpointing function + is used (`torch.utils.checkpoint.checkpoint`). + """ + if not self._supports_gradient_checkpointing: + raise ValueError( + f"{self.__class__.__name__} does not support gradient checkpointing. Please make sure to set the boolean attribute " + f"`_supports_gradient_checkpointing` to `True` in the class definition." + ) + + if gradient_checkpointing_func is None: + + def _gradient_checkpointing_func(module, *args): + ckpt_kwargs = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {} + return torch.utils.checkpoint.checkpoint( + module.__call__, + *args, + **ckpt_kwargs, + ) + + gradient_checkpointing_func = _gradient_checkpointing_func + + self._set_gradient_checkpointing(enable=True, gradient_checkpointing_func=gradient_checkpointing_func) + + def disable_gradient_checkpointing(self) -> None: + """ + Deactivates gradient checkpointing for the current model (may be referred to as *activation checkpointing* or + *checkpoint activations* in other frameworks). + """ + if self._supports_gradient_checkpointing: + self._set_gradient_checkpointing(enable=False) + + def set_use_npu_flash_attention(self, valid: bool) -> None: + r""" + Set the switch for the npu flash attention. + """ + + def fn_recursive_set_npu_flash_attention(module: torch.nn.Module): + if hasattr(module, "set_use_npu_flash_attention"): + module.set_use_npu_flash_attention(valid) + + for child in module.children(): + fn_recursive_set_npu_flash_attention(child) + + for module in self.children(): + if isinstance(module, torch.nn.Module): + fn_recursive_set_npu_flash_attention(module) + + def enable_npu_flash_attention(self) -> None: + r""" + Enable npu flash attention from torch_npu + + """ + self.set_use_npu_flash_attention(True) + + def disable_npu_flash_attention(self) -> None: + r""" + disable npu flash attention from torch_npu + + """ + self.set_use_npu_flash_attention(False) + + def set_use_xla_flash_attention( + self, use_xla_flash_attention: bool, partition_spec: Callable | None = None, **kwargs + ) -> None: + # Recursively walk through all the children. + # Any children which exposes the set_use_xla_flash_attention method + # gets the message + def fn_recursive_set_flash_attention(module: torch.nn.Module): + if hasattr(module, "set_use_xla_flash_attention"): + module.set_use_xla_flash_attention(use_xla_flash_attention, partition_spec, **kwargs) + + for child in module.children(): + fn_recursive_set_flash_attention(child) + + for module in self.children(): + if isinstance(module, torch.nn.Module): + fn_recursive_set_flash_attention(module) + + def enable_xla_flash_attention(self, partition_spec: Callable | None = None, **kwargs): + r""" + Enable the flash attention pallals kernel for torch_xla. + """ + self.set_use_xla_flash_attention(True, partition_spec, **kwargs) + + def disable_xla_flash_attention(self): + r""" + Disable the flash attention pallals kernel for torch_xla. + """ + self.set_use_xla_flash_attention(False) + + def set_use_memory_efficient_attention_xformers(self, valid: bool, attention_op: Callable | None = None) -> None: + # Recursively walk through all the children. + # Any children which exposes the set_use_memory_efficient_attention_xformers method + # gets the message + def fn_recursive_set_mem_eff(module: torch.nn.Module): + if hasattr(module, "set_use_memory_efficient_attention_xformers"): + module.set_use_memory_efficient_attention_xformers(valid, attention_op) + + for child in module.children(): + fn_recursive_set_mem_eff(child) + + for module in self.children(): + if isinstance(module, torch.nn.Module): + fn_recursive_set_mem_eff(module) + + def enable_xformers_memory_efficient_attention(self, attention_op: Callable | None = None) -> None: + r""" + Enable memory efficient attention from [xFormers](https://facebookresearch.github.io/xformers/). + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up during + inference. Speed up during training is not guaranteed. + + > [!WARNING] > ⚠️ When memory efficient attention and sliced attention are both enabled, memory efficient + attention takes > precedent. + + Parameters: + attention_op (`Callable`, *optional*): + Override the default `None` operator for use as `op` argument to the + [`memory_efficient_attention()`](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention) + function of xFormers. + + Examples: + + ```py + >>> import torch + >>> from diffusers import UNet2DConditionModel + >>> from xformers.ops import MemoryEfficientAttentionFlashAttentionOp + + >>> model = UNet2DConditionModel.from_pretrained( + ... "stabilityai/stable-diffusion-2-1", subfolder="unet", torch_dtype=torch.float16 + ... ) + >>> model = model.to("cuda") + >>> model.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp) + ``` + """ + self.set_use_memory_efficient_attention_xformers(True, attention_op) + + def disable_xformers_memory_efficient_attention(self) -> None: + r""" + Disable memory efficient attention from [xFormers](https://facebookresearch.github.io/xformers/). + """ + self.set_use_memory_efficient_attention_xformers(False) + + def enable_layerwise_casting( + self, + storage_dtype: torch.dtype = torch.float8_e4m3fn, + compute_dtype: torch.dtype | None = None, + skip_modules_pattern: tuple[str, ...] | None = None, + skip_modules_classes: tuple[Type[torch.nn.Module], ...] | None = None, + non_blocking: bool = False, + ) -> None: + r""" + Activates layerwise casting for the current model. + + Layerwise casting is a technique that casts the model weights to a lower precision dtype for storage but + upcasts them on-the-fly to a higher precision dtype for computation. This process can significantly reduce the + memory footprint from model weights, but may lead to some quality degradation in the outputs. Most degradations + are negligible, mostly stemming from weight casting in normalization and modulation layers. + + By default, most models in diffusers set the `_skip_layerwise_casting_patterns` attribute to ignore patch + embedding, positional embedding and normalization layers. This is because these layers are most likely + precision-critical for quality. If you wish to change this behavior, you can set the + `_skip_layerwise_casting_patterns` attribute to `None`, or call + [`~hooks.layerwise_casting.apply_layerwise_casting`] with custom arguments. + + Example: + Using [`~models.ModelMixin.enable_layerwise_casting`]: + + ```python + >>> from diffusers import CogVideoXTransformer3DModel + + >>> transformer = CogVideoXTransformer3DModel.from_pretrained( + ... "THUDM/CogVideoX-5b", subfolder="transformer", torch_dtype=torch.bfloat16 + ... ) + + >>> # Enable layerwise casting via the model, which ignores certain modules by default + >>> transformer.enable_layerwise_casting(storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16) + ``` + + Args: + storage_dtype (`torch.dtype`): + The dtype to which the model should be cast for storage. + compute_dtype (`torch.dtype`): + The dtype to which the model weights should be cast during the forward pass. + skip_modules_pattern (`tuple[str, ...]`, *optional*): + A list of patterns to match the names of the modules to skip during the layerwise casting process. If + set to `None`, default skip patterns are used to ignore certain internal layers of modules and PEFT + layers. + skip_modules_classes (`tuple[Type[torch.nn.Module], ...]`, *optional*): + A list of module classes to skip during the layerwise casting process. + non_blocking (`bool`, *optional*, defaults to `False`): + If `True`, the weight casting operations are non-blocking. + """ + from ..hooks import apply_layerwise_casting + + user_provided_patterns = True + if skip_modules_pattern is None: + from ..hooks.layerwise_casting import DEFAULT_SKIP_MODULES_PATTERN + + skip_modules_pattern = DEFAULT_SKIP_MODULES_PATTERN + user_provided_patterns = False + if self._keep_in_fp32_modules is not None: + skip_modules_pattern += tuple(self._keep_in_fp32_modules) + if self._skip_layerwise_casting_patterns is not None: + skip_modules_pattern += tuple(self._skip_layerwise_casting_patterns) + skip_modules_pattern = tuple(set(skip_modules_pattern)) + + if is_peft_available() and not user_provided_patterns: + # By default, we want to skip all peft layers because they have a very low memory footprint. + # If users want to apply layerwise casting on peft layers as well, they can utilize the + # `~diffusers.hooks.layerwise_casting.apply_layerwise_casting` function which provides + # them with more flexibility and control. + + from peft.tuners.loha.layer import LoHaLayer + from peft.tuners.lokr.layer import LoKrLayer + from peft.tuners.lora.layer import LoraLayer + + for layer in (LoHaLayer, LoKrLayer, LoraLayer): + skip_modules_pattern += tuple(layer.adapter_layer_names) + + if compute_dtype is None: + logger.info("`compute_dtype` not provided when enabling layerwise casting. Using dtype of the model.") + compute_dtype = self.dtype + + apply_layerwise_casting( + self, storage_dtype, compute_dtype, skip_modules_pattern, skip_modules_classes, non_blocking + ) + + def enable_group_offload( + self, + onload_device: torch.device, + offload_device: torch.device = torch.device("cpu"), + offload_type: str = "block_level", + num_blocks_per_group: int | None = None, + non_blocking: bool = False, + use_stream: bool = False, + record_stream: bool = False, + low_cpu_mem_usage=False, + offload_to_disk_path: str | None = None, + block_modules: str | None = None, + exclude_kwargs: str | None = None, + ) -> None: + r""" + Activates group offloading for the current model. + + See [`~hooks.group_offloading.apply_group_offloading`] for more information. + + Example: + + ```python + >>> from diffusers import CogVideoXTransformer3DModel + + >>> transformer = CogVideoXTransformer3DModel.from_pretrained( + ... "THUDM/CogVideoX-5b", subfolder="transformer", torch_dtype=torch.bfloat16 + ... ) + + >>> transformer.enable_group_offload( + ... onload_device=torch.device("cuda"), + ... offload_device=torch.device("cpu"), + ... offload_type="leaf_level", + ... use_stream=True, + ... ) + ``` + """ + from ..hooks import apply_group_offloading + + if getattr(self, "enable_tiling", None) is not None and getattr(self, "use_tiling", False) and use_stream: + msg = ( + "Applying group offloading on autoencoders, with CUDA streams, may not work as expected if the first " + "forward pass is executed with tiling enabled. Please make sure to either:\n" + "1. Run a forward pass with small input shapes.\n" + "2. Or, run a forward pass with tiling disabled (can still use small dummy inputs)." + ) + logger.warning(msg) + if not self._supports_group_offloading: + raise ValueError( + f"{self.__class__.__name__} does not support group offloading. Please make sure to set the boolean attribute " + f"`_supports_group_offloading` to `True` in the class definition. If you believe this is a mistake, please " + f"open an issue at https://github.com/huggingface/diffusers/issues." + ) + + apply_group_offloading( + module=self, + onload_device=onload_device, + offload_device=offload_device, + offload_type=offload_type, + num_blocks_per_group=num_blocks_per_group, + non_blocking=non_blocking, + use_stream=use_stream, + record_stream=record_stream, + low_cpu_mem_usage=low_cpu_mem_usage, + offload_to_disk_path=offload_to_disk_path, + block_modules=block_modules, + exclude_kwargs=exclude_kwargs, + ) + + def set_attention_backend(self, backend: str) -> None: + """ + Set the attention backend for the model. + + Args: + backend (`str`): + The name of the backend to set. Must be one of the available backends defined in + `AttentionBackendName`. Available backends can be found in + `diffusers.attention_dispatch.AttentionBackendName`. Defaults to torch native scaled dot product + attention as backend. + """ + from .attention import AttentionModuleMixin + from .attention_dispatch import ( + AttentionBackendName, + _AttentionBackendRegistry, + _check_attention_backend_requirements, + _maybe_download_kernel_for_backend, + ) + + # TODO: the following will not be required when everything is refactored to AttentionModuleMixin + from .attention_processor import Attention, MochiAttention + + logger.warning("Attention backends are an experimental feature and the API may be subject to change.") + attention_classes = (Attention, MochiAttention, AttentionModuleMixin) + + parallel_config_set = False + for module in self.modules(): + if not isinstance(module, attention_classes): + continue + processor = module.processor + if getattr(processor, "_parallel_config", None) is not None: + parallel_config_set = True + break + + backend = backend.lower() + available_backends = {x.value for x in AttentionBackendName.__members__.values()} + if backend not in available_backends: + raise ValueError(f"`{backend=}` must be one of the following: " + ", ".join(available_backends)) + + backend = AttentionBackendName(backend) + if parallel_config_set and not _AttentionBackendRegistry._is_context_parallel_available(backend): + compatible_backends = sorted(_AttentionBackendRegistry._supports_context_parallel) + raise ValueError( + f"Context parallelism is enabled but current attention backend '{backend.value}' " + f"does not support context parallelism. " + f"Please set a compatible attention backend: {compatible_backends} using `model.set_attention_backend()`." + ) + + _check_attention_backend_requirements(backend) + _maybe_download_kernel_for_backend(backend) + + for module in self.modules(): + if not isinstance(module, attention_classes): + continue + processor = module.processor + if processor is None or not hasattr(processor, "_attention_backend"): + continue + processor._attention_backend = backend + + # Important to set the active backend so that it propagates gracefully throughout. + _AttentionBackendRegistry.set_active_backend(backend) + + def reset_attention_backend(self) -> None: + """ + Resets the attention backend for the model. Following calls to `forward` will use the environment default, if + set, or the torch native scaled dot product attention. + """ + from .attention import AttentionModuleMixin + from .attention_processor import Attention, MochiAttention + + logger.warning("Attention backends are an experimental feature and the API may be subject to change.") + + attention_classes = (Attention, MochiAttention, AttentionModuleMixin) + for module in self.modules(): + if not isinstance(module, attention_classes): + continue + processor = module.processor + if processor is None or not hasattr(processor, "_attention_backend"): + continue + processor._attention_backend = None + + def save_pretrained( + self, + save_directory: str | os.PathLike, + is_main_process: bool = True, + save_function: Callable | None = None, + safe_serialization: bool = True, + variant: str | None = None, + max_shard_size: int | str = "10GB", + push_to_hub: bool = False, + **kwargs, + ): + """ + Save a model and its configuration file to a directory so that it can be reloaded using the + [`~models.ModelMixin.from_pretrained`] class method. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to save a model and its configuration file to. Will be created if it doesn't exist. + is_main_process (`bool`, *optional*, defaults to `True`): + Whether the process calling this is the main process or not. Useful during distributed training and you + need to call this function on all processes. In this case, set `is_main_process=True` only on the main + process to avoid race conditions. + save_function (`Callable`): + The function to use to save the state dictionary. Useful during distributed training when you need to + replace `torch.save` with another method. Can be configured with the environment variable + `DIFFUSERS_SAVE_MODE`. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`. + variant (`str`, *optional*): + If specified, weights are saved in the format `pytorch_model..bin`. + max_shard_size (`int` or `str`, defaults to `"10GB"`): + The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size + lower than this size. If expressed as a string, needs to be digits followed by a unit (like `"5GB"`). + If expressed as an integer, the unit is bytes. Note that this limit will be decreased after a certain + period of time (starting from Oct 2024) to allow users to upgrade to the latest version of `diffusers`. + This is to establish a common default size for this argument across different libraries in the Hugging + Face ecosystem (`transformers`, and `accelerate`, for example). + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the + repository you want to push to with `repo_id` (will default to the name of `save_directory` in your + namespace). + kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + if os.path.isfile(save_directory): + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") + return + + hf_quantizer = getattr(self, "hf_quantizer", None) + if hf_quantizer is not None: + quantization_serializable = ( + hf_quantizer is not None + and isinstance(hf_quantizer, DiffusersQuantizer) + and hf_quantizer.is_serializable + ) + if not quantization_serializable: + raise ValueError( + f"The model is quantized with {hf_quantizer.quantization_config.quant_method} and is not serializable - check out the warnings from" + " the logger on the traceback to understand the reason why the quantized model is not serializable." + ) + + weights_name = SAFETENSORS_WEIGHTS_NAME if safe_serialization else WEIGHTS_NAME + weights_name = _add_variant(weights_name, variant) + weights_name_pattern = weights_name.replace(".bin", "{suffix}.bin").replace( + ".safetensors", "{suffix}.safetensors" + ) + + os.makedirs(save_directory, exist_ok=True) + + if push_to_hub: + commit_message = kwargs.pop("commit_message", None) + private = kwargs.pop("private", None) + create_pr = kwargs.pop("create_pr", False) + token = kwargs.pop("token", None) + repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id + + # Only save the model itself if we are using distributed training + model_to_save = self + + # Attach architecture to the config + # Save the config + if is_main_process: + model_to_save.save_config(save_directory) + + # Save the model + state_dict = model_to_save.state_dict() + + # Save the model + state_dict_split = split_torch_state_dict_into_shards( + state_dict, max_shard_size=max_shard_size, filename_pattern=weights_name_pattern + ) + + # Clean the folder from a previous save + if is_main_process: + for filename in os.listdir(save_directory): + if filename in state_dict_split.filename_to_tensors.keys(): + continue + full_filename = os.path.join(save_directory, filename) + if not os.path.isfile(full_filename): + continue + weights_without_ext = weights_name_pattern.replace(".bin", "").replace(".safetensors", "") + weights_without_ext = weights_without_ext.replace("{suffix}", "") + filename_without_ext = filename.replace(".bin", "").replace(".safetensors", "") + # make sure that file to be deleted matches format of sharded file, e.g. pytorch_model-00001-of-00005 + if ( + filename.startswith(weights_without_ext) + and _REGEX_SHARD.fullmatch(filename_without_ext) is not None + ): + os.remove(full_filename) + + for filename, tensors in state_dict_split.filename_to_tensors.items(): + shard = {tensor: state_dict[tensor].contiguous() for tensor in tensors} + filepath = os.path.join(save_directory, filename) + if safe_serialization: + # At some point we will need to deal better with save_function (used for TPU and other distributed + # joyfulness), but for now this enough. + safetensors.torch.save_file(shard, filepath, metadata={"format": "pt"}) + else: + torch.save(shard, filepath) + + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = SAFE_WEIGHTS_INDEX_NAME if safe_serialization else WEIGHTS_INDEX_NAME + save_index_file = os.path.join(save_directory, _add_variant(save_index_file, variant)) + # Save the index as well + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + logger.info( + f"The model is bigger than the maximum size per checkpoint ({max_shard_size}) and is going to be " + f"split in {len(state_dict_split.filename_to_tensors)} checkpoint shards. You can find where each parameters has been saved in the " + f"index located at {save_index_file}." + ) + else: + path_to_weights = os.path.join(save_directory, weights_name) + logger.info(f"Model weights saved in {path_to_weights}") + + if push_to_hub: + # Create a new empty model card and eventually tag it + model_card = load_or_create_model_card(repo_id, token=token) + model_card = populate_model_card(model_card) + model_card.save(Path(save_directory, "README.md").as_posix()) + + self._upload_folder( + save_directory, + repo_id, + token=token, + commit_message=commit_message, + create_pr=create_pr, + ) + + def dequantize(self): + """ + Potentially dequantize the model in case it has been quantized by a quantization method that support + dequantization. + """ + hf_quantizer = getattr(self, "hf_quantizer", None) + + if hf_quantizer is None: + raise ValueError("You need to first quantize your model in order to dequantize it") + + return hf_quantizer.dequantize(self) + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_name_or_path: str | os.PathLike | None, **kwargs) -> Self: + r""" + Instantiate a pretrained PyTorch model from a pretrained model configuration. + + The model is set in evaluation mode - `model.eval()` - by default, and dropout modules are deactivated. To + train the model, set it back in training mode with `model.train()`. + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + with [`~ModelMixin.save_pretrained`]. + + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + torch_dtype (`torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model with another dtype. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info (`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + from_flax (`bool`, *optional*, defaults to `False`): + Load the model weights from a Flax checkpoint save file. + subfolder (`str`, *optional*, defaults to `""`): + The subfolder location of a model file within a larger model repository on the Hub or locally. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you're downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + device_map (`int | str | torch.device` or `dict[str, int | str | torch.device]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be defined for each + parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the + same device. Defaults to `None`, meaning that the model will be loaded on CPU. + + Examples: + + ```py + >>> from diffusers import AutoModel + >>> import torch + + >>> # This works. + >>> model = AutoModel.from_pretrained( + ... "stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", device_map="cuda" + ... ) + >>> # This also works (integer accelerator device ID). + >>> model = AutoModel.from_pretrained( + ... "stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", device_map=0 + ... ) + >>> # Specifying a supported offloading strategy like "auto" also works. + >>> model = AutoModel.from_pretrained( + ... "stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", device_map="auto" + ... ) + >>> # Specifying a dictionary as `device_map` also works. + >>> model = AutoModel.from_pretrained( + ... "stabilityai/stable-diffusion-xl-base-1.0", + ... subfolder="unet", + ... device_map={"": torch.device("cuda")}, + ... ) + ``` + + Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For + more information about each option see [designing a device + map](https://huggingface.co/docs/accelerate/en/concept_guides/big_model_inference#the-devicemap). You + can also refer to the [Diffusers-specific + documentation](https://huggingface.co/docs/diffusers/main/en/training/distributed_inference#model-sharding) + for more concrete examples. + max_memory (`Dict`, *optional*): + A dictionary device identifier for the maximum memory. Will default to the maximum memory available for + each GPU and the available CPU RAM if unset. + offload_folder (`str` or `os.PathLike`, *optional*): + The path to offload weights if `device_map` contains the value `"disk"`. + offload_state_dict (`bool`, *optional*): + If `True`, temporarily offloads the CPU state dict to the hard drive to avoid running out of CPU RAM if + the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to `True` + when there is some disk offload. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + variant (`str`, *optional*): + Load weights from a specified `variant` filename such as `"fp16"` or `"ema"`. This is ignored when + loading `from_flax`. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the `safetensors` weights are downloaded if they're available **and** if the + `safetensors` library is installed. If set to `True`, the model is forcibly loaded from `safetensors` + weights. If set to `False`, `safetensors` weights are not loaded. + disable_mmap ('bool', *optional*, defaults to 'False'): + Whether to disable mmap when loading a Safetensors model. This option can perform better when the model + is on a network mount or hard drive, which may not handle the seeky-ness of mmap very well. + + > [!TIP] > To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in + with `hf > auth login`. You can also activate the special > + ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use this method in a > + firewalled environment. + + Example: + + ```py + from diffusers import UNet2DConditionModel + + unet = UNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet") + ``` + + If you get the error message below, you need to finetune the weights for your downstream task: + + ```bash + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + ``` + """ + cache_dir = kwargs.pop("cache_dir", None) + ignore_mismatched_sizes = kwargs.pop("ignore_mismatched_sizes", False) + force_download = kwargs.pop("force_download", False) + from_flax = kwargs.pop("from_flax", False) + proxies = kwargs.pop("proxies", None) + output_loading_info = kwargs.pop("output_loading_info", False) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + torch_dtype = kwargs.pop("torch_dtype", None) + subfolder = kwargs.pop("subfolder", None) + device_map = kwargs.pop("device_map", None) + max_memory = kwargs.pop("max_memory", None) + offload_folder = kwargs.pop("offload_folder", None) + offload_state_dict = kwargs.pop("offload_state_dict", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + variant = kwargs.pop("variant", None) + use_safetensors = kwargs.pop("use_safetensors", None) + quantization_config = kwargs.pop("quantization_config", None) + dduf_entries: dict[str, DDUFEntry] | None = kwargs.pop("dduf_entries", None) + disable_mmap = kwargs.pop("disable_mmap", False) + parallel_config: ParallelConfig | ContextParallelConfig | None = kwargs.pop("parallel_config", None) + + is_parallel_loading_enabled = HF_ENABLE_PARALLEL_LOADING + if is_parallel_loading_enabled and not low_cpu_mem_usage: + raise NotImplementedError("Parallel loading is not supported when not using `low_cpu_mem_usage`.") + + if torch_dtype is not None and not isinstance(torch_dtype, torch.dtype): + torch_dtype = torch.float32 + logger.warning( + f"Passed `torch_dtype` {torch_dtype} is not a `torch.dtype`. Defaulting to `torch.float32`." + ) + + allow_pickle = False + if use_safetensors is None: + use_safetensors = True + allow_pickle = True + + if low_cpu_mem_usage and not is_accelerate_available(): + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if device_map is not None and not is_accelerate_available(): + raise NotImplementedError( + "Loading and dispatching requires `accelerate`. Please make sure to install accelerate or set" + " `device_map=None`. You can install accelerate with `pip install accelerate`." + ) + + # Check if we can handle device_map and dispatching the weights + if device_map is not None and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Loading and dispatching requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `device_map=None`." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + if low_cpu_mem_usage is False and device_map is not None: + raise ValueError( + f"You cannot set `low_cpu_mem_usage` to `False` while using device_map={device_map} for loading and" + " dispatching. Please make sure to set `low_cpu_mem_usage=True`." + ) + + # change device_map into a map if we passed an int, a str or a torch.device + if isinstance(device_map, torch.device): + device_map = {"": device_map} + elif isinstance(device_map, str) and device_map not in ["auto", "balanced", "balanced_low_0", "sequential"]: + try: + device_map = {"": torch.device(device_map)} + except RuntimeError: + raise ValueError( + "When passing device_map as a string, the value needs to be a device name (e.g. cpu, cuda:0) or " + f"'auto', 'balanced', 'balanced_low_0', 'sequential' but found {device_map}." + ) + elif isinstance(device_map, int): + if device_map < 0: + raise ValueError( + "You can't pass device_map as a negative int. If you want to put the model on the cpu, pass device_map = 'cpu' " + ) + else: + device_map = {"": device_map} + + if device_map is not None: + if low_cpu_mem_usage is None: + low_cpu_mem_usage = True + elif not low_cpu_mem_usage: + raise ValueError("Passing along a `device_map` requires `low_cpu_mem_usage=True`") + + if low_cpu_mem_usage: + if device_map is not None and not is_torch_version(">=", "1.10"): + # The max memory utils require PyTorch >= 1.10 to have torch.cuda.mem_get_info. + raise ValueError("`low_cpu_mem_usage` and `device_map` require PyTorch >= 1.10.") + + user_agent = { + "diffusers": __version__, + "file_type": "model", + "framework": "pytorch", + } + unused_kwargs = {} + + # Load config if we don't provide a configuration + config_path = pretrained_model_name_or_path + + # load config + config, unused_kwargs, commit_hash = cls.load_config( + config_path, + cache_dir=cache_dir, + return_unused_kwargs=True, + return_commit_hash=True, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + dduf_entries=dduf_entries, + **kwargs, + ) + # no in-place modification of the original config. + config = copy.deepcopy(config) + + # determine initial quantization config. + ####################################### + pre_quantized = "quantization_config" in config and config["quantization_config"] is not None + if pre_quantized or quantization_config is not None: + if pre_quantized: + config["quantization_config"] = DiffusersAutoQuantizer.merge_quantization_configs( + config["quantization_config"], quantization_config + ) + else: + config["quantization_config"] = quantization_config + hf_quantizer = DiffusersAutoQuantizer.from_config( + config["quantization_config"], pre_quantized=pre_quantized + ) + else: + hf_quantizer = None + + if hf_quantizer is not None: + hf_quantizer.validate_environment(torch_dtype=torch_dtype, from_flax=from_flax, device_map=device_map) + torch_dtype = hf_quantizer.update_torch_dtype(torch_dtype) + device_map = hf_quantizer.update_device_map(device_map) + + # In order to ensure popular quantization methods are supported. Can be disable with `disable_telemetry` + user_agent["quant"] = hf_quantizer.quantization_config.quant_method.value + + # Force-set to `True` for more mem efficiency + if low_cpu_mem_usage is None: + low_cpu_mem_usage = True + logger.info("Set `low_cpu_mem_usage` to True as `hf_quantizer` is not None.") + elif not low_cpu_mem_usage: + raise ValueError("`low_cpu_mem_usage` cannot be False or None when using quantization.") + + # Check if `_keep_in_fp32_modules` is not None + use_keep_in_fp32_modules = cls._keep_in_fp32_modules is not None and ( + hf_quantizer is None or getattr(hf_quantizer, "use_keep_in_fp32_modules", False) + ) + + if use_keep_in_fp32_modules: + keep_in_fp32_modules = cls._keep_in_fp32_modules + if not isinstance(keep_in_fp32_modules, list): + keep_in_fp32_modules = [keep_in_fp32_modules] + + if low_cpu_mem_usage is None: + low_cpu_mem_usage = True + logger.info("Set `low_cpu_mem_usage` to True as `_keep_in_fp32_modules` is not None.") + elif not low_cpu_mem_usage: + raise ValueError("`low_cpu_mem_usage` cannot be False when `keep_in_fp32_modules` is True.") + else: + keep_in_fp32_modules = [] + + is_sharded = False + resolved_model_file = None + + # Determine if we're loading from a directory of sharded checkpoints. + sharded_metadata = None + index_file = None + is_local = os.path.isdir(pretrained_model_name_or_path) + index_file_kwargs = { + "is_local": is_local, + "pretrained_model_name_or_path": pretrained_model_name_or_path, + "subfolder": subfolder or "", + "use_safetensors": use_safetensors, + "cache_dir": cache_dir, + "variant": variant, + "force_download": force_download, + "proxies": proxies, + "local_files_only": local_files_only, + "token": token, + "revision": revision, + "user_agent": user_agent, + "commit_hash": commit_hash, + "dduf_entries": dduf_entries, + } + index_file = _fetch_index_file(**index_file_kwargs) + # In case the index file was not found we still have to consider the legacy format. + # this becomes applicable when the variant is not None. + if variant is not None and (index_file is None or not os.path.exists(index_file)): + index_file = _fetch_index_file_legacy(**index_file_kwargs) + if index_file is not None and (dduf_entries or index_file.is_file()): + is_sharded = True + + if is_sharded and from_flax: + raise ValueError("Loading of sharded checkpoints is not supported when `from_flax=True`.") + + # load model + if from_flax: + resolved_model_file = _get_model_file( + pretrained_model_name_or_path, + weights_name=FLAX_WEIGHTS_NAME, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + commit_hash=commit_hash, + ) + model = cls.from_config(config, **unused_kwargs) + + # Convert the weights + from .modeling_pytorch_flax_utils import load_flax_checkpoint_in_pytorch_model + + model = load_flax_checkpoint_in_pytorch_model(model, resolved_model_file) + else: + # in the case it is sharded, we have already the index + if is_sharded: + resolved_model_file, sharded_metadata = _get_checkpoint_shard_files( + pretrained_model_name_or_path, + index_file, + cache_dir=cache_dir, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + revision=revision, + subfolder=subfolder or "", + dduf_entries=dduf_entries, + ) + elif use_safetensors: + try: + resolved_model_file = _get_model_file( + pretrained_model_name_or_path, + weights_name=_add_variant(SAFETENSORS_WEIGHTS_NAME, variant), + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + commit_hash=commit_hash, + dduf_entries=dduf_entries, + ) + + except IOError as e: + logger.error(f"An error occurred while trying to fetch {pretrained_model_name_or_path}: {e}") + if not allow_pickle: + raise + logger.warning( + "Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead." + ) + + if resolved_model_file is None and not is_sharded: + resolved_model_file = _get_model_file( + pretrained_model_name_or_path, + weights_name=_add_variant(WEIGHTS_NAME, variant), + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + commit_hash=commit_hash, + dduf_entries=dduf_entries, + ) + + if not isinstance(resolved_model_file, list): + resolved_model_file = [resolved_model_file] + + # set dtype to instantiate the model under: + # 1. If torch_dtype is not None, we use that dtype + # 2. If torch_dtype is float8, we don't use _set_default_torch_dtype and we downcast after loading the model + dtype_orig = None + if torch_dtype is not None and not torch_dtype == getattr(torch, "float8_e4m3fn", None): + if not isinstance(torch_dtype, torch.dtype): + raise ValueError( + f"{torch_dtype} needs to be of type `torch.dtype`, e.g. `torch.float16`, but is {type(torch_dtype)}." + ) + dtype_orig = cls._set_default_torch_dtype(torch_dtype) + + init_contexts = [no_init_weights()] + + if low_cpu_mem_usage: + init_contexts.append(accelerate.init_empty_weights()) + + with ContextManagers(init_contexts): + model = cls.from_config(config, **unused_kwargs) + + if dtype_orig is not None: + torch.set_default_dtype(dtype_orig) + + state_dict = None + if not is_sharded: + # Time to load the checkpoint + state_dict = load_state_dict(resolved_model_file[0], disable_mmap=disable_mmap, dduf_entries=dduf_entries) + # We only fix it for non sharded checkpoints as we don't need it yet for sharded one. + model._fix_state_dict_keys_on_load(state_dict) + + if is_sharded: + loaded_keys = sharded_metadata["all_checkpoint_keys"] + else: + loaded_keys = list(state_dict.keys()) + + if hf_quantizer is not None: + hf_quantizer.preprocess_model( + model=model, device_map=device_map, keep_in_fp32_modules=keep_in_fp32_modules + ) + + # Now that the model is loaded, we can determine the device_map + device_map = _determine_device_map( + model, device_map, max_memory, torch_dtype, keep_in_fp32_modules, hf_quantizer + ) + if hf_quantizer is not None: + hf_quantizer.validate_environment(device_map=device_map) + + ( + model, + missing_keys, + unexpected_keys, + mismatched_keys, + offload_index, + error_msgs, + ) = cls._load_pretrained_model( + model, + state_dict, + resolved_model_file, + pretrained_model_name_or_path, + loaded_keys, + ignore_mismatched_sizes=ignore_mismatched_sizes, + low_cpu_mem_usage=low_cpu_mem_usage, + device_map=device_map, + offload_folder=offload_folder, + offload_state_dict=offload_state_dict, + dtype=torch_dtype, + hf_quantizer=hf_quantizer, + keep_in_fp32_modules=keep_in_fp32_modules, + dduf_entries=dduf_entries, + is_parallel_loading_enabled=is_parallel_loading_enabled, + disable_mmap=disable_mmap, + ) + loading_info = { + "missing_keys": missing_keys, + "unexpected_keys": unexpected_keys, + "mismatched_keys": mismatched_keys, + "error_msgs": error_msgs, + } + + # Dispatch model with hooks on all devices if necessary + if device_map is not None: + device_map_kwargs = { + "device_map": device_map, + "offload_dir": offload_folder, + "offload_index": offload_index, + } + dispatch_model(model, **device_map_kwargs) + + if hf_quantizer is not None: + hf_quantizer.postprocess_model(model) + model.hf_quantizer = hf_quantizer + + if ( + torch_dtype is not None + and torch_dtype == getattr(torch, "float8_e4m3fn", None) + and hf_quantizer is None + and not use_keep_in_fp32_modules + ): + model = model.to(torch_dtype) + + if hf_quantizer is not None: + # We also make sure to purge `_pre_quantization_dtype` when we serialize + # the model config because `_pre_quantization_dtype` is `torch.dtype`, not JSON serializable. + model.register_to_config(_name_or_path=pretrained_model_name_or_path, _pre_quantization_dtype=torch_dtype) + else: + model.register_to_config(_name_or_path=pretrained_model_name_or_path) + + # Set model in evaluation mode to deactivate DropOut modules by default + model.eval() + + if parallel_config is not None: + model.enable_parallelism(config=parallel_config) + + if output_loading_info: + return model, loading_info + + return model + + # Adapted from `transformers`. + @wraps(torch.nn.Module.cuda) + def cuda(self, *args, **kwargs): + from ..hooks.group_offloading import _is_group_offload_enabled + + # Checks if the model has been loaded in 4-bit or 8-bit with BNB + if getattr(self, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES: + if getattr(self, "is_loaded_in_8bit", False) and is_bitsandbytes_version("<", "0.48.0"): + raise ValueError( + "Calling `cuda()` is not supported for `8-bit` quantized models with the installed version of bitsandbytes. " + f"The current device is `{self.device}`. If you intended to move the model, please install bitsandbytes >= 0.48.0." + ) + elif getattr(self, "is_loaded_in_4bit", False) and is_bitsandbytes_version("<", "0.43.2"): + raise ValueError( + "Calling `cuda()` is not supported for `4-bit` quantized models with the installed version of bitsandbytes. " + f"The current device is `{self.device}`. If you intended to move the model, please install bitsandbytes >= 0.43.2." + ) + + # Checks if group offloading is enabled + if _is_group_offload_enabled(self): + logger.warning( + f"The module '{self.__class__.__name__}' is group offloaded and moving it using `.cuda()` is not supported." + ) + return self + + return super().cuda(*args, **kwargs) + + # Adapted from `transformers`. + @wraps(torch.nn.Module.to) + def to(self, *args, **kwargs): + from ..hooks.group_offloading import _is_group_offload_enabled + + device_arg_or_kwarg_present = any(isinstance(arg, torch.device) for arg in args) or "device" in kwargs + dtype_present_in_args = "dtype" in kwargs + + # Try converting arguments to torch.device in case they are passed as strings + for arg in args: + if not isinstance(arg, str): + continue + try: + torch.device(arg) + device_arg_or_kwarg_present = True + except RuntimeError: + pass + + if not dtype_present_in_args: + for arg in args: + if isinstance(arg, torch.dtype): + dtype_present_in_args = True + break + + if getattr(self, "is_quantized", False): + if dtype_present_in_args: + raise ValueError( + "Casting a quantized model to a new `dtype` is unsupported. To set the dtype of unquantized layers, please " + "use the `torch_dtype` argument when loading the model using `from_pretrained` or `from_single_file`" + ) + + if getattr(self, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES: + if getattr(self, "is_loaded_in_8bit", False) and is_bitsandbytes_version("<", "0.48.0"): + raise ValueError( + "Calling `to()` is not supported for `8-bit` quantized models with the installed version of bitsandbytes. " + f"The current device is `{self.device}`. If you intended to move the model, please install bitsandbytes >= 0.48.0." + ) + elif getattr(self, "is_loaded_in_4bit", False) and is_bitsandbytes_version("<", "0.43.2"): + raise ValueError( + "Calling `to()` is not supported for `4-bit` quantized models with the installed version of bitsandbytes. " + f"The current device is `{self.device}`. If you intended to move the model, please install bitsandbytes >= 0.43.2." + ) + if _is_group_offload_enabled(self) and device_arg_or_kwarg_present: + logger.warning( + f"The module '{self.__class__.__name__}' is group offloaded and moving it using `.to()` is not supported." + ) + return self + + return super().to(*args, **kwargs) + + # Taken from `transformers`. + def half(self, *args): + # Checks if the model is quantized + if getattr(self, "is_quantized", False): + raise ValueError( + "`.half()` is not supported for quantized model. Please use the model as it is, since the" + " model has already been cast to the correct `dtype`." + ) + else: + return super().half(*args) + + # Taken from `transformers`. + def float(self, *args): + # Checks if the model is quantized + if getattr(self, "is_quantized", False): + raise ValueError( + "`.float()` is not supported for quantized model. Please use the model as it is, since the" + " model has already been cast to the correct `dtype`." + ) + else: + return super().float(*args) + + def compile_repeated_blocks(self, *args, **kwargs): + """ + Compiles *only* the frequently repeated sub-modules of a model (e.g. the Transformer layers) instead of + compiling the entire model. This technique—often called **regional compilation** (see the PyTorch recipe + https://docs.pytorch.org/tutorials/recipes/regional_compilation.html) can reduce end-to-end compile time + substantially, while preserving the runtime speed-ups you would expect from a full `torch.compile`. + + The set of sub-modules to compile is discovered by the presence of **`_repeated_blocks`** attribute in the + model definition. Define this attribute on your model subclass as a list/tuple of class names (strings). Every + module whose class name matches will be compiled. + + Once discovered, each matching sub-module is compiled by calling `submodule.compile(*args, **kwargs)`. Any + positional or keyword arguments you supply to `compile_repeated_blocks` are forwarded verbatim to + `torch.compile`. + """ + repeated_blocks = getattr(self, "_repeated_blocks", None) + + if not repeated_blocks: + raise ValueError( + "`_repeated_blocks` attribute is empty. " + f"Set `_repeated_blocks` for the class `{self.__class__.__name__}` to benefit from faster compilation. " + ) + has_compiled_region = False + for submod in self.modules(): + if submod.__class__.__name__ in repeated_blocks: + submod.compile(*args, **kwargs) + has_compiled_region = True + + if not has_compiled_region: + raise ValueError( + f"Regional compilation failed because {repeated_blocks} classes are not found in the model. " + ) + + def enable_parallelism( + self, + *, + config: ParallelConfig | ContextParallelConfig, + cp_plan: dict[str, ContextParallelModelPlan] | None = None, + ): + logger.warning( + "`enable_parallelism` is an experimental feature. The API may change in the future and breaking changes may be introduced at any time without warning." + ) + + if not torch.distributed.is_available() and not torch.distributed.is_initialized(): + raise RuntimeError( + "torch.distributed must be available and initialized before calling `enable_parallelism`." + ) + + from ..hooks.context_parallel import apply_context_parallel + from .attention import AttentionModuleMixin + from .attention_dispatch import AttentionBackendName, _AttentionBackendRegistry + from .attention_processor import Attention, MochiAttention + + if isinstance(config, ContextParallelConfig): + config = ParallelConfig(context_parallel_config=config) + + rank = torch.distributed.get_rank() + world_size = torch.distributed.get_world_size() + device_type = torch._C._get_accelerator().type + device_module = torch.get_device_module(device_type) + device = torch.device(device_type, rank % device_module.device_count()) + + attention_classes = (Attention, MochiAttention, AttentionModuleMixin) + + if config.context_parallel_config is not None: + for module in self.modules(): + if not isinstance(module, attention_classes): + continue + + processor = module.processor + if processor is None or not hasattr(processor, "_attention_backend"): + continue + + attention_backend = processor._attention_backend + if attention_backend is None: + attention_backend, _ = _AttentionBackendRegistry.get_active_backend() + else: + attention_backend = AttentionBackendName(attention_backend) + + if not _AttentionBackendRegistry._is_context_parallel_available(attention_backend): + compatible_backends = sorted(_AttentionBackendRegistry._supports_context_parallel) + raise ValueError( + f"Context parallelism is enabled but the attention processor '{processor.__class__.__name__}' " + f"is using backend '{attention_backend.value}' which does not support context parallelism. " + f"Please set a compatible attention backend: {compatible_backends} using `model.set_attention_backend()` before " + f"calling `model.enable_parallelism()`." + ) + + # All modules use the same attention processor and backend. We don't need to + # iterate over all modules after checking the first processor + break + + mesh = None + if config.context_parallel_config is not None: + cp_config = config.context_parallel_config + mesh = torch.distributed.device_mesh.init_device_mesh( + device_type=device_type, + mesh_shape=cp_config.mesh_shape, + mesh_dim_names=cp_config.mesh_dim_names, + ) + + config.setup(rank, world_size, device, mesh=mesh) + self._parallel_config = config + + for module in self.modules(): + if not isinstance(module, attention_classes): + continue + processor = module.processor + if processor is None or not hasattr(processor, "_parallel_config"): + continue + processor._parallel_config = config + + if config.context_parallel_config is not None: + if cp_plan is None and self._cp_plan is None: + raise ValueError( + "`cp_plan` must be provided either as an argument or set in the model's `_cp_plan` attribute." + ) + cp_plan = cp_plan if cp_plan is not None else self._cp_plan + apply_context_parallel(self, config.context_parallel_config, cp_plan) + + @classmethod + def _load_pretrained_model( + cls, + model, + state_dict: OrderedDict, + resolved_model_file: list[str], + pretrained_model_name_or_path: str | os.PathLike, + loaded_keys: list[str], + ignore_mismatched_sizes: bool = False, + assign_to_params_buffers: bool = False, + hf_quantizer: DiffusersQuantizer | None = None, + low_cpu_mem_usage: bool = True, + dtype: str | torch.dtype | None = None, + keep_in_fp32_modules: list[str] | None = None, + device_map: str | int | torch.device | dict[str, str | int | torch.device] = None, + offload_state_dict: bool | None = None, + offload_folder: str | os.PathLike | None = None, + dduf_entries: dict[str, DDUFEntry] | None = None, + is_parallel_loading_enabled: bool | None = False, + disable_mmap: bool = False, + ): + model_state_dict = model.state_dict() + expected_keys = list(model_state_dict.keys()) + missing_keys = list(set(expected_keys) - set(loaded_keys)) + if hf_quantizer is not None: + missing_keys = hf_quantizer.update_missing_keys(model, missing_keys, prefix="") + unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + # Some models may have keys that are not in the state by design, removing them before needlessly warning + # the user. + if cls._keys_to_ignore_on_load_unexpected is not None: + for pat in cls._keys_to_ignore_on_load_unexpected: + unexpected_keys = [k for k in unexpected_keys if re.search(pat, k) is None] + + mismatched_keys = [] + error_msgs = [] + + # Deal with offload + if device_map is not None and "disk" in device_map.values(): + if offload_folder is None: + raise ValueError( + "The current `device_map` had weights offloaded to the disk. Please provide an `offload_folder`" + " for them. Alternatively, make sure you have `safetensors` installed if the model you are using" + " offers the weights in this format." + ) + else: + os.makedirs(offload_folder, exist_ok=True) + if offload_state_dict is None: + offload_state_dict = True + + # If a device map has been used, we can speedup the load time by warming up the device caching allocator. + # If we don't warmup, each tensor allocation on device calls to the allocator for memory (effectively, a + # lot of individual calls to device malloc). We can, however, preallocate the memory required by the + # tensors using their expected shape and not performing any initialization of the memory (empty data). + # When the actual device allocations happen, the allocator already has a pool of unused device memory + # that it can re-use for faster loading of the model. + if device_map is not None: + expanded_device_map = _expand_device_map(device_map, expected_keys) + _caching_allocator_warmup(model, expanded_device_map, dtype, hf_quantizer) + + offload_index = {} if device_map is not None and "disk" in device_map.values() else None + state_dict_folder, state_dict_index = None, None + if offload_state_dict: + state_dict_folder = tempfile.mkdtemp() + state_dict_index = {} + + if state_dict is not None: + # load_state_dict will manage the case where we pass a dict instead of a file + # if state dict is not None, it means that we don't need to read the files from resolved_model_file also + resolved_model_file = [state_dict] + + # Prepare the loading function sharing the attributes shared between them. + load_fn = functools.partial( + _load_shard_files_with_threadpool if is_parallel_loading_enabled else _load_shard_file, + model=model, + model_state_dict=model_state_dict, + device_map=device_map, + dtype=dtype, + hf_quantizer=hf_quantizer, + keep_in_fp32_modules=keep_in_fp32_modules, + dduf_entries=dduf_entries, + loaded_keys=loaded_keys, + unexpected_keys=unexpected_keys, + offload_index=offload_index, + offload_folder=offload_folder, + state_dict_index=state_dict_index, + state_dict_folder=state_dict_folder, + ignore_mismatched_sizes=ignore_mismatched_sizes, + low_cpu_mem_usage=low_cpu_mem_usage, + disable_mmap=disable_mmap, + ) + + if is_parallel_loading_enabled: + offload_index, state_dict_index, _mismatched_keys, _error_msgs = load_fn(resolved_model_file) + error_msgs += _error_msgs + mismatched_keys += _mismatched_keys + else: + shard_files = resolved_model_file + if len(resolved_model_file) > 1: + shard_tqdm_kwargs = {"desc": "Loading checkpoint shards"} + if not is_torch_dist_rank_zero(): + shard_tqdm_kwargs["disable"] = True + shard_files = logging.tqdm(resolved_model_file, **shard_tqdm_kwargs) + + for shard_file in shard_files: + offload_index, state_dict_index, _mismatched_keys, _error_msgs = load_fn(shard_file) + error_msgs += _error_msgs + mismatched_keys += _mismatched_keys + + empty_device_cache() + + if offload_index is not None and len(offload_index) > 0: + save_offload_index(offload_index, offload_folder) + offload_index = None + + if offload_state_dict: + load_offloaded_weights(model, state_dict_index, state_dict_folder) + shutil.rmtree(state_dict_folder) + + if len(error_msgs) > 0: + error_msg = "\n\t".join(error_msgs) + if "size mismatch" in error_msg: + error_msg += ( + "\n\tYou may consider adding `ignore_mismatched_sizes=True` in the model `from_pretrained` method." + ) + raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}") + + if len(unexpected_keys) > 0: + logger.warning( + f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when initializing {cls.__name__}: \n {[', '.join(unexpected_keys)]}" + ) + else: + logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n") + + if len(missing_keys) > 0: + logger.warning( + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably" + " TRAIN this model on a down-stream task to be able to use it for predictions and inference." + ) + elif len(mismatched_keys) == 0: + logger.info( + f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at" + f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the" + f" checkpoint was trained on, you can already use {model.__class__.__name__} for predictions" + " without further training." + ) + if len(mismatched_keys) > 0: + mismatched_warning = "\n".join( + [ + f"- {key}: found shape {shape1} in the checkpoint and {shape2} in the model instantiated" + for key, shape1, shape2 in mismatched_keys + ] + ) + logger.warning( + f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at" + f" {pretrained_model_name_or_path} and are newly initialized because the shapes did not" + f" match:\n{mismatched_warning}\nYou should probably TRAIN this model on a down-stream task to be" + " able to use it for predictions and inference." + ) + + return model, missing_keys, unexpected_keys, mismatched_keys, offload_index, error_msgs + + @classmethod + def _get_signature_keys(cls, obj): + parameters = inspect.signature(obj.__init__).parameters + required_parameters = {k: v for k, v in parameters.items() if v.default == inspect._empty} + optional_parameters = set({k for k, v in parameters.items() if v.default != inspect._empty}) + expected_modules = set(required_parameters.keys()) - {"self"} + + return expected_modules, optional_parameters + + # Adapted from `transformers` modeling_utils.py + def _get_no_split_modules(self, device_map: str): + """ + Get the modules of the model that should not be split when using device_map. We iterate through the modules to + get the underlying `_no_split_modules`. + + Args: + device_map (`str`): + The device map value. Options are ["auto", "balanced", "balanced_low_0", "sequential"] + + Returns: + `list[str]`: list of modules that should not be split + """ + _no_split_modules = set() + modules_to_check = [self] + while len(modules_to_check) > 0: + module = modules_to_check.pop(-1) + # if the module does not appear in _no_split_modules, we also check the children + if module.__class__.__name__ not in _no_split_modules: + if isinstance(module, ModelMixin): + if module._no_split_modules is None: + raise ValueError( + f"{module.__class__.__name__} does not support `device_map='{device_map}'`. To implement support, the model " + "class needs to implement the `_no_split_modules` attribute." + ) + else: + _no_split_modules = _no_split_modules | set(module._no_split_modules) + modules_to_check += list(module.children()) + return list(_no_split_modules) + + @classmethod + def _set_default_torch_dtype(cls, dtype: torch.dtype) -> torch.dtype: + """ + Change the default dtype and return the previous one. This is needed when wanting to instantiate the model + under specific dtype. + + Args: + dtype (`torch.dtype`): + a floating dtype to set to. + + Returns: + `torch.dtype`: the original `dtype` that can be used to restore `torch.set_default_dtype(dtype)` if it was + modified. If it wasn't, returns `None`. + + Note `set_default_dtype` currently only works with floating-point types and asserts if for example, + `torch.int64` is passed. So if a non-float `dtype` is passed this functions will throw an exception. + """ + if not dtype.is_floating_point: + raise ValueError( + f"Can't instantiate {cls.__name__} model under dtype={dtype} since it is not a floating point dtype" + ) + + logger.info(f"Instantiating {cls.__name__} model under default dtype {dtype}.") + dtype_orig = torch.get_default_dtype() + torch.set_default_dtype(dtype) + return dtype_orig + + @property + def device(self) -> torch.device: + """ + `torch.device`: The device on which the module is (assuming that all the module parameters are on the same + device). + """ + return get_parameter_device(self) + + @property + def dtype(self) -> torch.dtype: + """ + `torch.dtype`: The dtype of the module (assuming that all the module parameters have the same dtype). + """ + return get_parameter_dtype(self) + + def num_parameters(self, only_trainable: bool = False, exclude_embeddings: bool = False) -> int: + """ + Get number of (trainable or non-embedding) parameters in the module. + + Args: + only_trainable (`bool`, *optional*, defaults to `False`): + Whether or not to return only the number of trainable parameters. + exclude_embeddings (`bool`, *optional*, defaults to `False`): + Whether or not to return only the number of non-embedding parameters. + + Returns: + `int`: The number of parameters. + + Example: + + ```py + from diffusers import UNet2DConditionModel + + model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="unet") + unet.num_parameters(only_trainable=True) + 859520964 + ``` + """ + is_loaded_in_4bit = getattr(self, "is_loaded_in_4bit", False) + + if is_loaded_in_4bit: + if is_bitsandbytes_available(): + import bitsandbytes as bnb + else: + raise ValueError( + "bitsandbytes is not installed but it seems that the model has been loaded in 4bit precision, something went wrong" + " make sure to install bitsandbytes with `pip install bitsandbytes`. You also need a GPU. " + ) + + if exclude_embeddings: + embedding_param_names = [ + f"{name}.weight" for name, module_type in self.named_modules() if isinstance(module_type, nn.Embedding) + ] + total_parameters = [ + parameter for name, parameter in self.named_parameters() if name not in embedding_param_names + ] + else: + total_parameters = list(self.parameters()) + + total_numel = [] + + for param in total_parameters: + if param.requires_grad or not only_trainable: + # For 4bit models, we need to multiply the number of parameters by 2 as half of the parameters are + # used for the 4bit quantization (uint8 tensors are stored) + if is_loaded_in_4bit and isinstance(param, bnb.nn.Params4bit): + if hasattr(param, "element_size"): + num_bytes = param.element_size() + elif hasattr(param, "quant_storage"): + num_bytes = param.quant_storage.itemsize + else: + num_bytes = 1 + total_numel.append(param.numel() * 2 * num_bytes) + else: + total_numel.append(param.numel()) + + return sum(total_numel) + + def get_memory_footprint(self, return_buffers=True): + r""" + Get the memory footprint of a model. This will return the memory footprint of the current model in bytes. + Useful to benchmark the memory footprint of the current model and design some tests. Solution inspired from the + PyTorch discussions: https://discuss.pytorch.org/t/gpu-memory-that-model-uses/56822/2 + + Arguments: + return_buffers (`bool`, *optional*, defaults to `True`): + Whether to return the size of the buffer tensors in the computation of the memory footprint. Buffers + are tensors that do not require gradients and not registered as parameters. E.g. mean and std in batch + norm layers. Please see: https://discuss.pytorch.org/t/what-pytorch-means-by-buffers/120266/2 + """ + mem = sum([param.nelement() * param.element_size() for param in self.parameters()]) + if return_buffers: + mem_bufs = sum([buf.nelement() * buf.element_size() for buf in self.buffers()]) + mem = mem + mem_bufs + return mem + + def _set_gradient_checkpointing( + self, enable: bool = True, gradient_checkpointing_func: Callable = torch.utils.checkpoint.checkpoint + ) -> None: + is_gradient_checkpointing_set = False + + for name, module in self.named_modules(): + if hasattr(module, "gradient_checkpointing"): + logger.debug(f"Setting `gradient_checkpointing={enable}` for '{name}'") + module._gradient_checkpointing_func = gradient_checkpointing_func + module.gradient_checkpointing = enable + is_gradient_checkpointing_set = True + + if not is_gradient_checkpointing_set: + raise ValueError( + f"The module {self.__class__.__name__} does not support gradient checkpointing. Please make sure to " + f"use a module that supports gradient checkpointing by creating a boolean attribute `gradient_checkpointing`." + ) + + def _fix_state_dict_keys_on_load(self, state_dict: OrderedDict) -> None: + """ + This function fix the state dict of the model to take into account some changes that were made in the model + architecture: + - deprecated attention blocks (happened before we introduced sharded checkpoint, + so this is why we apply this method only when loading non sharded checkpoints for now) + """ + deprecated_attention_block_paths = [] + + def recursive_find_attn_block(name, module): + if hasattr(module, "_from_deprecated_attn_block") and module._from_deprecated_attn_block: + deprecated_attention_block_paths.append(name) + + for sub_name, sub_module in module.named_children(): + sub_name = sub_name if name == "" else f"{name}.{sub_name}" + recursive_find_attn_block(sub_name, sub_module) + + recursive_find_attn_block("", self) + + # NOTE: we have to check if the deprecated parameters are in the state dict + # because it is possible we are loading from a state dict that was already + # converted + + for path in deprecated_attention_block_paths: + # group_norm path stays the same + + # query -> to_q + if f"{path}.query.weight" in state_dict: + state_dict[f"{path}.to_q.weight"] = state_dict.pop(f"{path}.query.weight") + if f"{path}.query.bias" in state_dict: + state_dict[f"{path}.to_q.bias"] = state_dict.pop(f"{path}.query.bias") + + # key -> to_k + if f"{path}.key.weight" in state_dict: + state_dict[f"{path}.to_k.weight"] = state_dict.pop(f"{path}.key.weight") + if f"{path}.key.bias" in state_dict: + state_dict[f"{path}.to_k.bias"] = state_dict.pop(f"{path}.key.bias") + + # value -> to_v + if f"{path}.value.weight" in state_dict: + state_dict[f"{path}.to_v.weight"] = state_dict.pop(f"{path}.value.weight") + if f"{path}.value.bias" in state_dict: + state_dict[f"{path}.to_v.bias"] = state_dict.pop(f"{path}.value.bias") + + # proj_attn -> to_out.0 + if f"{path}.proj_attn.weight" in state_dict: + state_dict[f"{path}.to_out.0.weight"] = state_dict.pop(f"{path}.proj_attn.weight") + if f"{path}.proj_attn.bias" in state_dict: + state_dict[f"{path}.to_out.0.bias"] = state_dict.pop(f"{path}.proj_attn.bias") + return state_dict + + +class LegacyModelMixin(ModelMixin): + r""" + A subclass of `ModelMixin` to resolve class mapping from legacy classes (like `Transformer2DModel`) to more + pipeline-specific classes (like `DiTTransformer2DModel`). + """ + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_name_or_path: str | os.PathLike | None, **kwargs): + # To prevent dependency import problem. + from .model_loading_utils import _fetch_remapped_cls_from_config + + # Create a copy of the kwargs so that we don't mess with the keyword arguments in the downstream calls. + kwargs_copy = kwargs.copy() + + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + + # Load config if we don't provide a configuration + config_path = pretrained_model_name_or_path + + user_agent = { + "diffusers": __version__, + "file_type": "model", + "framework": "pytorch", + } + + # load config + config, _, _ = cls.load_config( + config_path, + cache_dir=cache_dir, + return_unused_kwargs=True, + return_commit_hash=True, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + user_agent=user_agent, + **kwargs, + ) + # resolve remapping + remapped_class = _fetch_remapped_cls_from_config(config, cls) + + if remapped_class is cls: + return super(LegacyModelMixin, remapped_class).from_pretrained( + pretrained_model_name_or_path, **kwargs_copy + ) + else: + return remapped_class.from_pretrained(pretrained_model_name_or_path, **kwargs_copy) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/normalization.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..84ffb67bfd6ac23147e3a7f08416374e5089b1d6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/normalization.py @@ -0,0 +1,647 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numbers + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import is_torch_npu_available, is_torch_version +from .activations import get_activation +from .embeddings import CombinedTimestepLabelEmbeddings, PixArtAlphaCombinedTimestepSizeEmbeddings + + +class AdaLayerNorm(nn.Module): + r""" + Norm layer modified to incorporate timestep embeddings. + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`, *optional*): The size of the embeddings dictionary. + output_dim (`int`, *optional*): + norm_elementwise_affine (`bool`, defaults to `False): + norm_eps (`bool`, defaults to `False`): + chunk_dim (`int`, defaults to `0`): + """ + + def __init__( + self, + embedding_dim: int, + num_embeddings: int | None = None, + output_dim: int | None = None, + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-5, + chunk_dim: int = 0, + ): + super().__init__() + + self.chunk_dim = chunk_dim + output_dim = output_dim or embedding_dim * 2 + + if num_embeddings is not None: + self.emb = nn.Embedding(num_embeddings, embedding_dim) + else: + self.emb = None + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, output_dim) + self.norm = nn.LayerNorm(output_dim // 2, norm_eps, norm_elementwise_affine) + + def forward( + self, x: torch.Tensor, timestep: torch.Tensor | None = None, temb: torch.Tensor | None = None + ) -> torch.Tensor: + if self.emb is not None: + temb = self.emb(timestep) + + temb = self.linear(self.silu(temb)) + + if self.chunk_dim == 1: + # This is a bit weird why we have the order of "shift, scale" here and "scale, shift" in the + # other if-branch. This branch is specific to CogVideoX and OmniGen for now. + shift, scale = temb.chunk(2, dim=1) + shift = shift[:, None, :] + scale = scale[:, None, :] + else: + scale, shift = temb.chunk(2, dim=0) + + x = self.norm(x) * (1 + scale) + shift + return x + + +class FP32LayerNorm(nn.LayerNorm): + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + origin_dtype = inputs.dtype + return F.layer_norm( + inputs.float(), + self.normalized_shape, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ).to(origin_dtype) + + +class SD35AdaLayerNormZeroX(nn.Module): + r""" + Norm layer adaptive layer norm zero (AdaLN-Zero). + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`): The size of the embeddings dictionary. + """ + + def __init__(self, embedding_dim: int, norm_type: str = "layer_norm", bias: bool = True) -> None: + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 9 * embedding_dim, bias=bias) + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6) + else: + raise ValueError(f"Unsupported `norm_type` ({norm_type}) provided. Supported ones are: 'layer_norm'.") + + def forward( + self, + hidden_states: torch.Tensor, + emb: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, ...]: + emb = self.linear(self.silu(emb)) + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp, shift_msa2, scale_msa2, gate_msa2 = emb.chunk( + 9, dim=1 + ) + norm_hidden_states = self.norm(hidden_states) + hidden_states = norm_hidden_states * (1 + scale_msa[:, None]) + shift_msa[:, None] + norm_hidden_states2 = norm_hidden_states * (1 + scale_msa2[:, None]) + shift_msa2[:, None] + return hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp, norm_hidden_states2, gate_msa2 + + +class AdaLayerNormZero(nn.Module): + r""" + Norm layer adaptive layer norm zero (adaLN-Zero). + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`): The size of the embeddings dictionary. + """ + + def __init__(self, embedding_dim: int, num_embeddings: int | None = None, norm_type="layer_norm", bias=True): + super().__init__() + if num_embeddings is not None: + self.emb = CombinedTimestepLabelEmbeddings(num_embeddings, embedding_dim) + else: + self.emb = None + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 6 * embedding_dim, bias=bias) + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6) + elif norm_type == "fp32_layer_norm": + self.norm = FP32LayerNorm(embedding_dim, elementwise_affine=False, bias=False) + else: + raise ValueError( + f"Unsupported `norm_type` ({norm_type}) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'." + ) + + def forward( + self, + x: torch.Tensor, + timestep: torch.Tensor | None = None, + class_labels: torch.LongTensor | None = None, + hidden_dtype: torch.dtype | None = None, + emb: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + if self.emb is not None: + emb = self.emb(timestep, class_labels, hidden_dtype=hidden_dtype) + emb = self.linear(self.silu(emb)) + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = emb.chunk(6, dim=1) + x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None] + return x, gate_msa, shift_mlp, scale_mlp, gate_mlp + + +class AdaLayerNormZeroSingle(nn.Module): + r""" + Norm layer adaptive layer norm zero (adaLN-Zero). + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`): The size of the embeddings dictionary. + """ + + def __init__(self, embedding_dim: int, norm_type="layer_norm", bias=True): + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 3 * embedding_dim, bias=bias) + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6) + else: + raise ValueError( + f"Unsupported `norm_type` ({norm_type}) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'." + ) + + def forward( + self, + x: torch.Tensor, + emb: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + emb = self.linear(self.silu(emb)) + shift_msa, scale_msa, gate_msa = emb.chunk(3, dim=1) + x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None] + return x, gate_msa + + +class LuminaRMSNormZero(nn.Module): + """ + Norm layer adaptive RMS normalization zero. + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + """ + + def __init__(self, embedding_dim: int, norm_eps: float, norm_elementwise_affine: bool): + super().__init__() + self.silu = nn.SiLU() + self.linear = nn.Linear( + min(embedding_dim, 1024), + 4 * embedding_dim, + bias=True, + ) + self.norm = RMSNorm(embedding_dim, eps=norm_eps) + + def forward( + self, + x: torch.Tensor, + emb: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + emb = self.linear(self.silu(emb)) + scale_msa, gate_msa, scale_mlp, gate_mlp = emb.chunk(4, dim=1) + x = self.norm(x) * (1 + scale_msa[:, None]) + + return x, gate_msa, scale_mlp, gate_mlp + + +class AdaLayerNormSingle(nn.Module): + r""" + Norm layer adaptive layer norm single (adaLN-single). + + As proposed in PixArt-Alpha (see: https://huggingface.co/papers/2310.00426; Section 2.3). + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + use_additional_conditions (`bool`): To use additional conditions for normalization or not. + """ + + def __init__(self, embedding_dim: int, use_additional_conditions: bool = False): + super().__init__() + + self.emb = PixArtAlphaCombinedTimestepSizeEmbeddings( + embedding_dim, size_emb_dim=embedding_dim // 3, use_additional_conditions=use_additional_conditions + ) + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 6 * embedding_dim, bias=True) + + def forward( + self, + timestep: torch.Tensor, + added_cond_kwargs: dict[str, torch.Tensor] | None = None, + batch_size: int | None = None, + hidden_dtype: torch.dtype | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + # No modulation happening here. + added_cond_kwargs = added_cond_kwargs or {"resolution": None, "aspect_ratio": None} + embedded_timestep = self.emb(timestep, **added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_dtype) + return self.linear(self.silu(embedded_timestep)), embedded_timestep + + +class AdaGroupNorm(nn.Module): + r""" + GroupNorm layer modified to incorporate timestep embeddings. + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`): The size of the embeddings dictionary. + num_groups (`int`): The number of groups to separate the channels into. + act_fn (`str`, *optional*, defaults to `None`): The activation function to use. + eps (`float`, *optional*, defaults to `1e-5`): The epsilon value to use for numerical stability. + """ + + def __init__( + self, embedding_dim: int, out_dim: int, num_groups: int, act_fn: str | None = None, eps: float = 1e-5 + ): + super().__init__() + self.num_groups = num_groups + self.eps = eps + + if act_fn is None: + self.act = None + else: + self.act = get_activation(act_fn) + + self.linear = nn.Linear(embedding_dim, out_dim * 2) + + def forward(self, x: torch.Tensor, emb: torch.Tensor) -> torch.Tensor: + if self.act: + emb = self.act(emb) + emb = self.linear(emb) + emb = emb[:, :, None, None] + scale, shift = emb.chunk(2, dim=1) + + x = F.group_norm(x, self.num_groups, eps=self.eps) + x = x * (1 + scale) + shift + return x + + +class AdaLayerNormContinuous(nn.Module): + r""" + Adaptive normalization layer with a norm layer (layer_norm or rms_norm). + + Args: + embedding_dim (`int`): Embedding dimension to use during projection. + conditioning_embedding_dim (`int`): Dimension of the input condition. + elementwise_affine (`bool`, defaults to `True`): + Boolean flag to denote if affine transformation should be applied. + eps (`float`, defaults to 1e-5): Epsilon factor. + bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use. + norm_type (`str`, defaults to `"layer_norm"`): + Normalization layer to use. Values supported: "layer_norm", "rms_norm". + """ + + def __init__( + self, + embedding_dim: int, + conditioning_embedding_dim: int, + # NOTE: It is a bit weird that the norm layer can be configured to have scale and shift parameters + # because the output is immediately scaled and shifted by the projected conditioning embeddings. + # Note that AdaLayerNorm does not let the norm layer have scale and shift parameters. + # However, this is how it was implemented in the original code, and it's rather likely you should + # set `elementwise_affine` to False. + elementwise_affine=True, + eps=1e-5, + bias=True, + norm_type="layer_norm", + ): + super().__init__() + self.silu = nn.SiLU() + self.linear = nn.Linear(conditioning_embedding_dim, embedding_dim * 2, bias=bias) + if norm_type == "layer_norm": + self.norm = LayerNorm(embedding_dim, eps, elementwise_affine, bias) + elif norm_type == "rms_norm": + self.norm = RMSNorm(embedding_dim, eps, elementwise_affine) + else: + raise ValueError(f"unknown norm_type {norm_type}") + + def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor: + # convert back to the original dtype in case `conditioning_embedding`` is upcasted to float32 (needed for hunyuanDiT) + emb = self.linear(self.silu(conditioning_embedding).to(x.dtype)) + scale, shift = torch.chunk(emb, 2, dim=1) + x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :] + return x + + +class LuminaLayerNormContinuous(nn.Module): + def __init__( + self, + embedding_dim: int, + conditioning_embedding_dim: int, + # NOTE: It is a bit weird that the norm layer can be configured to have scale and shift parameters + # because the output is immediately scaled and shifted by the projected conditioning embeddings. + # Note that AdaLayerNorm does not let the norm layer have scale and shift parameters. + # However, this is how it was implemented in the original code, and it's rather likely you should + # set `elementwise_affine` to False. + elementwise_affine=True, + eps=1e-5, + bias=True, + norm_type="layer_norm", + out_dim: int | None = None, + ): + super().__init__() + + # AdaLN + self.silu = nn.SiLU() + self.linear_1 = nn.Linear(conditioning_embedding_dim, embedding_dim, bias=bias) + + if norm_type == "layer_norm": + self.norm = LayerNorm(embedding_dim, eps, elementwise_affine, bias) + elif norm_type == "rms_norm": + self.norm = RMSNorm(embedding_dim, eps=eps, elementwise_affine=elementwise_affine) + else: + raise ValueError(f"unknown norm_type {norm_type}") + + self.linear_2 = None + if out_dim is not None: + self.linear_2 = nn.Linear(embedding_dim, out_dim, bias=bias) + + def forward( + self, + x: torch.Tensor, + conditioning_embedding: torch.Tensor, + ) -> torch.Tensor: + # convert back to the original dtype in case `conditioning_embedding`` is upcasted to float32 (needed for hunyuanDiT) + emb = self.linear_1(self.silu(conditioning_embedding).to(x.dtype)) + scale = emb + x = self.norm(x) * (1 + scale)[:, None, :] + + if self.linear_2 is not None: + x = self.linear_2(x) + + return x + + +class CogView3PlusAdaLayerNormZeroTextImage(nn.Module): + r""" + Norm layer adaptive layer norm zero (adaLN-Zero). + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`): The size of the embeddings dictionary. + """ + + def __init__(self, embedding_dim: int, dim: int): + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 12 * dim, bias=True) + self.norm_x = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.norm_c = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + + def forward( + self, + x: torch.Tensor, + context: torch.Tensor, + emb: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + emb = self.linear(self.silu(emb)) + ( + shift_msa, + scale_msa, + gate_msa, + shift_mlp, + scale_mlp, + gate_mlp, + c_shift_msa, + c_scale_msa, + c_gate_msa, + c_shift_mlp, + c_scale_mlp, + c_gate_mlp, + ) = emb.chunk(12, dim=1) + normed_x = self.norm_x(x) + normed_context = self.norm_c(context) + x = normed_x * (1 + scale_msa[:, None]) + shift_msa[:, None] + context = normed_context * (1 + c_scale_msa[:, None]) + c_shift_msa[:, None] + return x, gate_msa, shift_mlp, scale_mlp, gate_mlp, context, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp + + +class CogVideoXLayerNormZero(nn.Module): + def __init__( + self, + conditioning_dim: int, + embedding_dim: int, + elementwise_affine: bool = True, + eps: float = 1e-5, + bias: bool = True, + ) -> None: + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(conditioning_dim, 6 * embedding_dim, bias=bias) + self.norm = nn.LayerNorm(embedding_dim, eps=eps, elementwise_affine=elementwise_affine) + + def forward( + self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, temb: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + shift, scale, gate, enc_shift, enc_scale, enc_gate = self.linear(self.silu(temb)).chunk(6, dim=1) + hidden_states = self.norm(hidden_states) * (1 + scale)[:, None, :] + shift[:, None, :] + encoder_hidden_states = self.norm(encoder_hidden_states) * (1 + enc_scale)[:, None, :] + enc_shift[:, None, :] + return hidden_states, encoder_hidden_states, gate[:, None, :], enc_gate[:, None, :] + + +if is_torch_version(">=", "2.1.0"): + LayerNorm = nn.LayerNorm +else: + # Has optional bias parameter compared to torch layer norm + # TODO: replace with torch layernorm once min required torch version >= 2.1 + class LayerNorm(nn.Module): + r""" + LayerNorm with the bias parameter. + + Args: + dim (`int`): Dimensionality to use for the parameters. + eps (`float`, defaults to 1e-5): Epsilon factor. + elementwise_affine (`bool`, defaults to `True`): + Boolean flag to denote if affine transformation should be applied. + bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use. + """ + + def __init__(self, dim, eps: float = 1e-5, elementwise_affine: bool = True, bias: bool = True): + super().__init__() + + self.eps = eps + + if isinstance(dim, numbers.Integral): + dim = (dim,) + + self.dim = torch.Size(dim) + + if elementwise_affine: + self.weight = nn.Parameter(torch.ones(dim)) + self.bias = nn.Parameter(torch.zeros(dim)) if bias else None + else: + self.weight = None + self.bias = None + + def forward(self, input): + return F.layer_norm(input, self.dim, self.weight, self.bias, self.eps) + + +class RMSNorm(nn.Module): + r""" + RMS Norm as introduced in https://huggingface.co/papers/1910.07467 by Zhang et al. + + Args: + dim (`int`): Number of dimensions to use for `weights`. Only effective when `elementwise_affine` is True. + eps (`float`): Small value to use when calculating the reciprocal of the square-root. + elementwise_affine (`bool`, defaults to `True`): + Boolean flag to denote if affine transformation should be applied. + bias (`bool`, defaults to False): If also training the `bias` param. + """ + + def __init__(self, dim, eps: float, elementwise_affine: bool = True, bias: bool = False): + super().__init__() + + self.eps = eps + self.elementwise_affine = elementwise_affine + + if isinstance(dim, numbers.Integral): + dim = (dim,) + + self.dim = torch.Size(dim) + + self.weight = None + self.bias = None + + if elementwise_affine: + self.weight = nn.Parameter(torch.ones(dim)) + if bias: + self.bias = nn.Parameter(torch.zeros(dim)) + + def forward(self, hidden_states): + if is_torch_npu_available(): + import torch_npu + + if self.weight is not None: + # convert into half-precision if necessary + if self.weight.dtype in [torch.float16, torch.bfloat16]: + hidden_states = hidden_states.to(self.weight.dtype) + hidden_states = torch_npu.npu_rms_norm(hidden_states, self.weight, epsilon=self.eps)[0] + if self.bias is not None: + hidden_states = hidden_states + self.bias + else: + input_dtype = hidden_states.dtype + variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.eps) + + if self.weight is not None: + # convert into half-precision if necessary + if self.weight.dtype in [torch.float16, torch.bfloat16]: + hidden_states = hidden_states.to(self.weight.dtype) + hidden_states = hidden_states * self.weight + if self.bias is not None: + hidden_states = hidden_states + self.bias + else: + hidden_states = hidden_states.to(input_dtype) + + return hidden_states + + +# TODO: (Dhruv) This can be replaced with regular RMSNorm in Mochi once `_keep_in_fp32_modules` is supported +# for sharded checkpoints, see: https://github.com/huggingface/diffusers/issues/10013 +class MochiRMSNorm(nn.Module): + def __init__(self, dim, eps: float, elementwise_affine: bool = True): + super().__init__() + + self.eps = eps + + if isinstance(dim, numbers.Integral): + dim = (dim,) + + self.dim = torch.Size(dim) + + if elementwise_affine: + self.weight = nn.Parameter(torch.ones(dim)) + else: + self.weight = None + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.eps) + + if self.weight is not None: + hidden_states = hidden_states * self.weight + hidden_states = hidden_states.to(input_dtype) + + return hidden_states + + +class GlobalResponseNorm(nn.Module): + r""" + Global response normalization as introduced in ConvNeXt-v2 (https://huggingface.co/papers/2301.00808). + + Args: + dim (`int`): Number of dimensions to use for the `gamma` and `beta`. + """ + + # Taken from https://github.com/facebookresearch/ConvNeXt-V2/blob/3608f67cc1dae164790c5d0aead7bf2d73d9719b/models/utils.py#L105 + def __init__(self, dim): + super().__init__() + self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim)) + self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim)) + + def forward(self, x): + gx = torch.norm(x, p=2, dim=(1, 2), keepdim=True) + nx = gx / (gx.mean(dim=-1, keepdim=True) + 1e-6) + return self.gamma * (x * nx) + self.beta + x + + +class LpNorm(nn.Module): + def __init__(self, p: int = 2, dim: int = -1, eps: float = 1e-12): + super().__init__() + + self.p = p + self.dim = dim + self.eps = eps + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + return F.normalize(hidden_states, p=self.p, dim=self.dim, eps=self.eps) + + +def get_normalization( + norm_type: str = "batch_norm", + num_features: int | None = None, + eps: float = 1e-5, + elementwise_affine: bool = True, + bias: bool = True, +) -> nn.Module: + if norm_type == "rms_norm": + norm = RMSNorm(num_features, eps=eps, elementwise_affine=elementwise_affine, bias=bias) + elif norm_type == "layer_norm": + norm = nn.LayerNorm(num_features, eps=eps, elementwise_affine=elementwise_affine, bias=bias) + elif norm_type == "batch_norm": + norm = nn.BatchNorm2d(num_features, eps=eps, affine=elementwise_affine) + else: + raise ValueError(f"{norm_type=} is not supported.") + return norm diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/resnet.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..df793b534ebb7fbed9eb40d553f971bdec0a8a80 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/resnet.py @@ -0,0 +1,801 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# `TemporalConvLayer` Copyright 2025 Alibaba DAMO-VILAB, The ModelScope Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import deprecate +from .activations import get_activation +from .attention_processor import SpatialNorm +from .downsampling import ( # noqa + Downsample1D, + Downsample2D, + FirDownsample2D, + KDownsample2D, + downsample_2d, +) +from .normalization import AdaGroupNorm +from .upsampling import ( # noqa + FirUpsample2D, + KUpsample2D, + Upsample1D, + Upsample2D, + upfirdn2d_native, + upsample_2d, +) + + +class ResnetBlockCondNorm2D(nn.Module): + r""" + A Resnet block that use normalization layer that incorporate conditioning information. + + Parameters: + in_channels (`int`): The number of channels in the input. + out_channels (`int`, *optional*, default to be `None`): + The number of output channels for the first conv2d layer. If None, same as `in_channels`. + dropout (`float`, *optional*, defaults to `0.0`): The dropout probability to use. + temb_channels (`int`, *optional*, default to `512`): the number of channels in timestep embedding. + groups (`int`, *optional*, default to `32`): The number of groups to use for the first normalization layer. + groups_out (`int`, *optional*, default to None): + The number of groups to use for the second normalization layer. if set to None, same as `groups`. + eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the normalization. + non_linearity (`str`, *optional*, default to `"swish"`): the activation function to use. + time_embedding_norm (`str`, *optional*, default to `"ada_group"` ): + The normalization layer for time embedding `temb`. Currently only support "ada_group" or "spatial". + kernel (`torch.Tensor`, optional, default to None): FIR filter, see + [`~models.resnet.FirUpsample2D`] and [`~models.resnet.FirDownsample2D`]. + output_scale_factor (`float`, *optional*, default to be `1.0`): the scale factor to use for the output. + use_in_shortcut (`bool`, *optional*, default to `True`): + If `True`, add a 1x1 nn.conv2d layer for skip-connection. + up (`bool`, *optional*, default to `False`): If `True`, add an upsample layer. + down (`bool`, *optional*, default to `False`): If `True`, add a downsample layer. + conv_shortcut_bias (`bool`, *optional*, default to `True`): If `True`, adds a learnable bias to the + `conv_shortcut` output. + conv_2d_out_channels (`int`, *optional*, default to `None`): the number of channels in the output. + If None, same as `out_channels`. + """ + + def __init__( + self, + *, + in_channels: int, + out_channels: int | None = None, + conv_shortcut: bool = False, + dropout: float = 0.0, + temb_channels: int = 512, + groups: int = 32, + groups_out: int | None = None, + eps: float = 1e-6, + non_linearity: str = "swish", + time_embedding_norm: str = "ada_group", # ada_group, spatial + output_scale_factor: float = 1.0, + use_in_shortcut: bool | None = None, + up: bool = False, + down: bool = False, + conv_shortcut_bias: bool = True, + conv_2d_out_channels: int | None = None, + ): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.up = up + self.down = down + self.output_scale_factor = output_scale_factor + self.time_embedding_norm = time_embedding_norm + + if groups_out is None: + groups_out = groups + + if self.time_embedding_norm == "ada_group": # ada_group + self.norm1 = AdaGroupNorm(temb_channels, in_channels, groups, eps=eps) + elif self.time_embedding_norm == "spatial": + self.norm1 = SpatialNorm(in_channels, temb_channels) + else: + raise ValueError(f" unsupported time_embedding_norm: {self.time_embedding_norm}") + + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if self.time_embedding_norm == "ada_group": # ada_group + self.norm2 = AdaGroupNorm(temb_channels, out_channels, groups_out, eps=eps) + elif self.time_embedding_norm == "spatial": # spatial + self.norm2 = SpatialNorm(out_channels, temb_channels) + else: + raise ValueError(f" unsupported time_embedding_norm: {self.time_embedding_norm}") + + self.dropout = torch.nn.Dropout(dropout) + + conv_2d_out_channels = conv_2d_out_channels or out_channels + self.conv2 = nn.Conv2d(out_channels, conv_2d_out_channels, kernel_size=3, stride=1, padding=1) + + self.nonlinearity = get_activation(non_linearity) + + self.upsample = self.downsample = None + if self.up: + self.upsample = Upsample2D(in_channels, use_conv=False) + elif self.down: + self.downsample = Downsample2D(in_channels, use_conv=False, padding=1, name="op") + + self.use_in_shortcut = self.in_channels != conv_2d_out_channels if use_in_shortcut is None else use_in_shortcut + + self.conv_shortcut = None + if self.use_in_shortcut: + self.conv_shortcut = nn.Conv2d( + in_channels, + conv_2d_out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=conv_shortcut_bias, + ) + + def forward(self, input_tensor: torch.Tensor, temb: torch.Tensor, *args, **kwargs) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + hidden_states = input_tensor + + hidden_states = self.norm1(hidden_states, temb) + + hidden_states = self.nonlinearity(hidden_states) + + if self.upsample is not None: + # upsample_nearest_nhwc fails with large batch sizes. see https://github.com/huggingface/diffusers/issues/984 + if hidden_states.shape[0] >= 64: + input_tensor = input_tensor.contiguous() + hidden_states = hidden_states.contiguous() + input_tensor = self.upsample(input_tensor) + hidden_states = self.upsample(hidden_states) + + elif self.downsample is not None: + input_tensor = self.downsample(input_tensor) + hidden_states = self.downsample(hidden_states) + + hidden_states = self.conv1(hidden_states) + + hidden_states = self.norm2(hidden_states, temb) + + hidden_states = self.nonlinearity(hidden_states) + + hidden_states = self.dropout(hidden_states) + hidden_states = self.conv2(hidden_states) + + if self.conv_shortcut is not None: + input_tensor = self.conv_shortcut(input_tensor) + + output_tensor = (input_tensor + hidden_states) / self.output_scale_factor + + return output_tensor + + +class ResnetBlock2D(nn.Module): + r""" + A Resnet block. + + Parameters: + in_channels (`int`): The number of channels in the input. + out_channels (`int`, *optional*, default to be `None`): + The number of output channels for the first conv2d layer. If None, same as `in_channels`. + dropout (`float`, *optional*, defaults to `0.0`): The dropout probability to use. + temb_channels (`int`, *optional*, default to `512`): the number of channels in timestep embedding. + groups (`int`, *optional*, default to `32`): The number of groups to use for the first normalization layer. + groups_out (`int`, *optional*, default to None): + The number of groups to use for the second normalization layer. if set to None, same as `groups`. + eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the normalization. + non_linearity (`str`, *optional*, default to `"swish"`): the activation function to use. + time_embedding_norm (`str`, *optional*, default to `"default"` ): Time scale shift config. + By default, apply timestep embedding conditioning with a simple shift mechanism. Choose "scale_shift" for a + stronger conditioning with scale and shift. + kernel (`torch.Tensor`, optional, default to None): FIR filter, see + [`~models.resnet.FirUpsample2D`] and [`~models.resnet.FirDownsample2D`]. + output_scale_factor (`float`, *optional*, default to be `1.0`): the scale factor to use for the output. + use_in_shortcut (`bool`, *optional*, default to `True`): + If `True`, add a 1x1 nn.conv2d layer for skip-connection. + up (`bool`, *optional*, default to `False`): If `True`, add an upsample layer. + down (`bool`, *optional*, default to `False`): If `True`, add a downsample layer. + conv_shortcut_bias (`bool`, *optional*, default to `True`): If `True`, adds a learnable bias to the + `conv_shortcut` output. + conv_2d_out_channels (`int`, *optional*, default to `None`): the number of channels in the output. + If None, same as `out_channels`. + """ + + def __init__( + self, + *, + in_channels: int, + out_channels: int | None = None, + conv_shortcut: bool = False, + dropout: float = 0.0, + temb_channels: int = 512, + groups: int = 32, + groups_out: int | None = None, + pre_norm: bool = True, + eps: float = 1e-6, + non_linearity: str = "swish", + skip_time_act: bool = False, + time_embedding_norm: str = "default", # default, scale_shift, + kernel: torch.Tensor | None = None, + output_scale_factor: float = 1.0, + use_in_shortcut: bool | None = None, + up: bool = False, + down: bool = False, + conv_shortcut_bias: bool = True, + conv_2d_out_channels: int | None = None, + ): + super().__init__() + if time_embedding_norm == "ada_group": + raise ValueError( + "This class cannot be used with `time_embedding_norm==ada_group`, please use `ResnetBlockCondNorm2D` instead", + ) + if time_embedding_norm == "spatial": + raise ValueError( + "This class cannot be used with `time_embedding_norm==spatial`, please use `ResnetBlockCondNorm2D` instead", + ) + + self.pre_norm = True + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.up = up + self.down = down + self.output_scale_factor = output_scale_factor + self.time_embedding_norm = time_embedding_norm + self.skip_time_act = skip_time_act + + if groups_out is None: + groups_out = groups + + self.norm1 = torch.nn.GroupNorm(num_groups=groups, num_channels=in_channels, eps=eps, affine=True) + + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if temb_channels is not None: + if self.time_embedding_norm == "default": + self.time_emb_proj = nn.Linear(temb_channels, out_channels) + elif self.time_embedding_norm == "scale_shift": + self.time_emb_proj = nn.Linear(temb_channels, 2 * out_channels) + else: + raise ValueError(f"unknown time_embedding_norm : {self.time_embedding_norm} ") + else: + self.time_emb_proj = None + + self.norm2 = torch.nn.GroupNorm(num_groups=groups_out, num_channels=out_channels, eps=eps, affine=True) + + self.dropout = torch.nn.Dropout(dropout) + conv_2d_out_channels = conv_2d_out_channels or out_channels + self.conv2 = nn.Conv2d(out_channels, conv_2d_out_channels, kernel_size=3, stride=1, padding=1) + + self.nonlinearity = get_activation(non_linearity) + + self.upsample = self.downsample = None + if self.up: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.upsample = lambda x: upsample_2d(x, kernel=fir_kernel) + elif kernel == "sde_vp": + self.upsample = partial(F.interpolate, scale_factor=2.0, mode="nearest") + else: + self.upsample = Upsample2D(in_channels, use_conv=False) + elif self.down: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.downsample = lambda x: downsample_2d(x, kernel=fir_kernel) + elif kernel == "sde_vp": + self.downsample = partial(F.avg_pool2d, kernel_size=2, stride=2) + else: + self.downsample = Downsample2D(in_channels, use_conv=False, padding=1, name="op") + + self.use_in_shortcut = self.in_channels != conv_2d_out_channels if use_in_shortcut is None else use_in_shortcut + + self.conv_shortcut = None + if self.use_in_shortcut: + self.conv_shortcut = nn.Conv2d( + in_channels, + conv_2d_out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=conv_shortcut_bias, + ) + + def forward(self, input_tensor: torch.Tensor, temb: torch.Tensor, *args, **kwargs) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + hidden_states = input_tensor + + hidden_states = self.norm1(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + + if self.upsample is not None: + # upsample_nearest_nhwc fails with large batch sizes. see https://github.com/huggingface/diffusers/issues/984 + if hidden_states.shape[0] >= 64: + input_tensor = input_tensor.contiguous() + hidden_states = hidden_states.contiguous() + input_tensor = self.upsample(input_tensor) + hidden_states = self.upsample(hidden_states) + elif self.downsample is not None: + input_tensor = self.downsample(input_tensor) + hidden_states = self.downsample(hidden_states) + + hidden_states = self.conv1(hidden_states) + + if self.time_emb_proj is not None: + if not self.skip_time_act: + temb = self.nonlinearity(temb) + temb = self.time_emb_proj(temb)[:, :, None, None] + + if self.time_embedding_norm == "default": + if temb is not None: + hidden_states = hidden_states + temb + hidden_states = self.norm2(hidden_states) + elif self.time_embedding_norm == "scale_shift": + if temb is None: + raise ValueError( + f" `temb` should not be None when `time_embedding_norm` is {self.time_embedding_norm}" + ) + time_scale, time_shift = torch.chunk(temb, 2, dim=1) + hidden_states = self.norm2(hidden_states) + hidden_states = hidden_states * (1 + time_scale) + time_shift + else: + hidden_states = self.norm2(hidden_states) + + hidden_states = self.nonlinearity(hidden_states) + + hidden_states = self.dropout(hidden_states) + hidden_states = self.conv2(hidden_states) + + if self.conv_shortcut is not None: + # Only use contiguous() during training to avoid DDP gradient stride mismatch warning. + # In inference mode (eval or no_grad), skip contiguous() for better performance, especially on CPU. + # Issue: https://github.com/huggingface/diffusers/issues/12975 + if self.training: + input_tensor = input_tensor.contiguous() + input_tensor = self.conv_shortcut(input_tensor) + + output_tensor = (input_tensor + hidden_states) / self.output_scale_factor + + return output_tensor + + +# unet_rl.py +def rearrange_dims(tensor: torch.Tensor) -> torch.Tensor: + if len(tensor.shape) == 2: + return tensor[:, :, None] + if len(tensor.shape) == 3: + return tensor[:, :, None, :] + elif len(tensor.shape) == 4: + return tensor[:, :, 0, :] + else: + raise ValueError(f"`len(tensor)`: {len(tensor)} has to be 2, 3 or 4.") + + +class Conv1dBlock(nn.Module): + """ + Conv1d --> GroupNorm --> Mish + + Parameters: + inp_channels (`int`): Number of input channels. + out_channels (`int`): Number of output channels. + kernel_size (`int` or `tuple`): Size of the convolving kernel. + n_groups (`int`, default `8`): Number of groups to separate the channels into. + activation (`str`, defaults to `mish`): Name of the activation function. + """ + + def __init__( + self, + inp_channels: int, + out_channels: int, + kernel_size: int | tuple[int, int], + n_groups: int = 8, + activation: str = "mish", + ): + super().__init__() + + self.conv1d = nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2) + self.group_norm = nn.GroupNorm(n_groups, out_channels) + self.mish = get_activation(activation) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + intermediate_repr = self.conv1d(inputs) + intermediate_repr = rearrange_dims(intermediate_repr) + intermediate_repr = self.group_norm(intermediate_repr) + intermediate_repr = rearrange_dims(intermediate_repr) + output = self.mish(intermediate_repr) + return output + + +# unet_rl.py +class ResidualTemporalBlock1D(nn.Module): + """ + Residual 1D block with temporal convolutions. + + Parameters: + inp_channels (`int`): Number of input channels. + out_channels (`int`): Number of output channels. + embed_dim (`int`): Embedding dimension. + kernel_size (`int` or `tuple`): Size of the convolving kernel. + activation (`str`, defaults `mish`): It is possible to choose the right activation function. + """ + + def __init__( + self, + inp_channels: int, + out_channels: int, + embed_dim: int, + kernel_size: int | tuple[int, int] = 5, + activation: str = "mish", + ): + super().__init__() + self.conv_in = Conv1dBlock(inp_channels, out_channels, kernel_size) + self.conv_out = Conv1dBlock(out_channels, out_channels, kernel_size) + + self.time_emb_act = get_activation(activation) + self.time_emb = nn.Linear(embed_dim, out_channels) + + self.residual_conv = ( + nn.Conv1d(inp_channels, out_channels, 1) if inp_channels != out_channels else nn.Identity() + ) + + def forward(self, inputs: torch.Tensor, t: torch.Tensor) -> torch.Tensor: + """ + Args: + inputs : [ batch_size x inp_channels x horizon ] + t : [ batch_size x embed_dim ] + + returns: + out : [ batch_size x out_channels x horizon ] + """ + t = self.time_emb_act(t) + t = self.time_emb(t) + out = self.conv_in(inputs) + rearrange_dims(t) + out = self.conv_out(out) + return out + self.residual_conv(inputs) + + +class TemporalConvLayer(nn.Module): + """ + Temporal convolutional layer that can be used for video (sequence of images) input Code mostly copied from: + https://github.com/modelscope/modelscope/blob/1509fdb973e5871f37148a4b5e5964cafd43e64d/modelscope/models/multi_modal/video_synthesis/unet_sd.py#L1016 + + Parameters: + in_dim (`int`): Number of input channels. + out_dim (`int`): Number of output channels. + dropout (`float`, *optional*, defaults to `0.0`): The dropout probability to use. + """ + + def __init__( + self, + in_dim: int, + out_dim: int | None = None, + dropout: float = 0.0, + norm_num_groups: int = 32, + ): + super().__init__() + out_dim = out_dim or in_dim + self.in_dim = in_dim + self.out_dim = out_dim + + # conv layers + self.conv1 = nn.Sequential( + nn.GroupNorm(norm_num_groups, in_dim), + nn.SiLU(), + nn.Conv3d(in_dim, out_dim, (3, 1, 1), padding=(1, 0, 0)), + ) + self.conv2 = nn.Sequential( + nn.GroupNorm(norm_num_groups, out_dim), + nn.SiLU(), + nn.Dropout(dropout), + nn.Conv3d(out_dim, in_dim, (3, 1, 1), padding=(1, 0, 0)), + ) + self.conv3 = nn.Sequential( + nn.GroupNorm(norm_num_groups, out_dim), + nn.SiLU(), + nn.Dropout(dropout), + nn.Conv3d(out_dim, in_dim, (3, 1, 1), padding=(1, 0, 0)), + ) + self.conv4 = nn.Sequential( + nn.GroupNorm(norm_num_groups, out_dim), + nn.SiLU(), + nn.Dropout(dropout), + nn.Conv3d(out_dim, in_dim, (3, 1, 1), padding=(1, 0, 0)), + ) + + # zero out the last layer params,so the conv block is identity + nn.init.zeros_(self.conv4[-1].weight) + nn.init.zeros_(self.conv4[-1].bias) + + def forward(self, hidden_states: torch.Tensor, num_frames: int = 1) -> torch.Tensor: + hidden_states = ( + hidden_states[None, :].reshape((-1, num_frames) + hidden_states.shape[1:]).permute(0, 2, 1, 3, 4) + ) + + identity = hidden_states + hidden_states = self.conv1(hidden_states) + hidden_states = self.conv2(hidden_states) + hidden_states = self.conv3(hidden_states) + hidden_states = self.conv4(hidden_states) + + hidden_states = identity + hidden_states + + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).reshape( + (hidden_states.shape[0] * hidden_states.shape[2], -1) + hidden_states.shape[3:] + ) + return hidden_states + + +class TemporalResnetBlock(nn.Module): + r""" + A Resnet block. + + Parameters: + in_channels (`int`): The number of channels in the input. + out_channels (`int`, *optional*, default to be `None`): + The number of output channels for the first conv2d layer. If None, same as `in_channels`. + temb_channels (`int`, *optional*, default to `512`): the number of channels in timestep embedding. + eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the normalization. + """ + + def __init__( + self, + in_channels: int, + out_channels: int | None = None, + temb_channels: int = 512, + eps: float = 1e-6, + ): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + + kernel_size = (3, 1, 1) + padding = [k // 2 for k in kernel_size] + + self.norm1 = torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=eps, affine=True) + self.conv1 = nn.Conv3d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=1, + padding=padding, + ) + + if temb_channels is not None: + self.time_emb_proj = nn.Linear(temb_channels, out_channels) + else: + self.time_emb_proj = None + + self.norm2 = torch.nn.GroupNorm(num_groups=32, num_channels=out_channels, eps=eps, affine=True) + + self.dropout = torch.nn.Dropout(0.0) + self.conv2 = nn.Conv3d( + out_channels, + out_channels, + kernel_size=kernel_size, + stride=1, + padding=padding, + ) + + self.nonlinearity = get_activation("silu") + + self.use_in_shortcut = self.in_channels != out_channels + + self.conv_shortcut = None + if self.use_in_shortcut: + self.conv_shortcut = nn.Conv3d( + in_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + ) + + def forward(self, input_tensor: torch.Tensor, temb: torch.Tensor) -> torch.Tensor: + hidden_states = input_tensor + + hidden_states = self.norm1(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + hidden_states = self.conv1(hidden_states) + + if self.time_emb_proj is not None: + temb = self.nonlinearity(temb) + temb = self.time_emb_proj(temb)[:, :, :, None, None] + temb = temb.permute(0, 2, 1, 3, 4) + hidden_states = hidden_states + temb + + hidden_states = self.norm2(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.conv2(hidden_states) + + if self.conv_shortcut is not None: + input_tensor = self.conv_shortcut(input_tensor) + + output_tensor = input_tensor + hidden_states + + return output_tensor + + +# VideoResBlock +class SpatioTemporalResBlock(nn.Module): + r""" + A SpatioTemporal Resnet block. + + Parameters: + in_channels (`int`): The number of channels in the input. + out_channels (`int`, *optional*, default to be `None`): + The number of output channels for the first conv2d layer. If None, same as `in_channels`. + temb_channels (`int`, *optional*, default to `512`): the number of channels in timestep embedding. + eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the spatial resenet. + temporal_eps (`float`, *optional*, defaults to `eps`): The epsilon to use for the temporal resnet. + merge_factor (`float`, *optional*, defaults to `0.5`): The merge factor to use for the temporal mixing. + merge_strategy (`str`, *optional*, defaults to `learned_with_images`): + The merge strategy to use for the temporal mixing. + switch_spatial_to_temporal_mix (`bool`, *optional*, defaults to `False`): + If `True`, switch the spatial and temporal mixing. + """ + + def __init__( + self, + in_channels: int, + out_channels: int | None = None, + temb_channels: int = 512, + eps: float = 1e-6, + temporal_eps: float | None = None, + merge_factor: float = 0.5, + merge_strategy="learned_with_images", + switch_spatial_to_temporal_mix: bool = False, + ): + super().__init__() + + self.spatial_res_block = ResnetBlock2D( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=eps, + ) + + self.temporal_res_block = TemporalResnetBlock( + in_channels=out_channels if out_channels is not None else in_channels, + out_channels=out_channels if out_channels is not None else in_channels, + temb_channels=temb_channels, + eps=temporal_eps if temporal_eps is not None else eps, + ) + + self.time_mixer = AlphaBlender( + alpha=merge_factor, + merge_strategy=merge_strategy, + switch_spatial_to_temporal_mix=switch_spatial_to_temporal_mix, + ) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor | None = None, + image_only_indicator: torch.Tensor | None = None, + ): + num_frames = image_only_indicator.shape[-1] + hidden_states = self.spatial_res_block(hidden_states, temb) + + batch_frames, channels, height, width = hidden_states.shape + batch_size = batch_frames // num_frames + + hidden_states_mix = ( + hidden_states[None, :].reshape(batch_size, num_frames, channels, height, width).permute(0, 2, 1, 3, 4) + ) + hidden_states = ( + hidden_states[None, :].reshape(batch_size, num_frames, channels, height, width).permute(0, 2, 1, 3, 4) + ) + + if temb is not None: + temb = temb.reshape(batch_size, num_frames, -1) + + hidden_states = self.temporal_res_block(hidden_states, temb) + hidden_states = self.time_mixer( + x_spatial=hidden_states_mix, + x_temporal=hidden_states, + image_only_indicator=image_only_indicator, + ) + + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).reshape(batch_frames, channels, height, width) + return hidden_states + + +class AlphaBlender(nn.Module): + r""" + A module to blend spatial and temporal features. + + Parameters: + alpha (`float`): The initial value of the blending factor. + merge_strategy (`str`, *optional*, defaults to `learned_with_images`): + The merge strategy to use for the temporal mixing. + switch_spatial_to_temporal_mix (`bool`, *optional*, defaults to `False`): + If `True`, switch the spatial and temporal mixing. + """ + + strategies = ["learned", "fixed", "learned_with_images"] + + def __init__( + self, + alpha: float, + merge_strategy: str = "learned_with_images", + switch_spatial_to_temporal_mix: bool = False, + ): + super().__init__() + self.merge_strategy = merge_strategy + self.switch_spatial_to_temporal_mix = switch_spatial_to_temporal_mix # For TemporalVAE + + if merge_strategy not in self.strategies: + raise ValueError(f"merge_strategy needs to be in {self.strategies}") + + if self.merge_strategy == "fixed": + self.register_buffer("mix_factor", torch.Tensor([alpha])) + elif self.merge_strategy == "learned" or self.merge_strategy == "learned_with_images": + self.register_parameter("mix_factor", torch.nn.Parameter(torch.Tensor([alpha]))) + else: + raise ValueError(f"Unknown merge strategy {self.merge_strategy}") + + def get_alpha(self, image_only_indicator: torch.Tensor, ndims: int) -> torch.Tensor: + if self.merge_strategy == "fixed": + alpha = self.mix_factor + + elif self.merge_strategy == "learned": + alpha = torch.sigmoid(self.mix_factor) + + elif self.merge_strategy == "learned_with_images": + if image_only_indicator is None: + raise ValueError("Please provide image_only_indicator to use learned_with_images merge strategy") + + alpha = torch.where( + image_only_indicator.bool(), + torch.ones(1, 1, device=image_only_indicator.device), + torch.sigmoid(self.mix_factor)[..., None], + ) + + # (batch, channel, frames, height, width) + if ndims == 5: + alpha = alpha[:, None, :, None, None] + # (batch*frames, height*width, channels) + elif ndims == 3: + alpha = alpha.reshape(-1)[:, None, None] + else: + raise ValueError(f"Unexpected ndims {ndims}. Dimensions should be 3 or 5") + + else: + raise NotImplementedError + + return alpha + + def forward( + self, + x_spatial: torch.Tensor, + x_temporal: torch.Tensor, + image_only_indicator: torch.Tensor | None = None, + ) -> torch.Tensor: + alpha = self.get_alpha(image_only_indicator, x_spatial.ndim) + alpha = alpha.to(x_spatial.dtype) + + if self.switch_spatial_to_temporal_mix: + alpha = 1.0 - alpha + + x = alpha * x_spatial + (1.0 - alpha) * x_temporal + return x diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/resnet_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/resnet_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..9bedaa9a36b656ed0922d6dd14d515be80d14d51 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/resnet_flax.py @@ -0,0 +1,144 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import flax.linen as nn +import jax +import jax.numpy as jnp + +from ..utils import logging + + +logger = logging.get_logger(__name__) + + +class FlaxUpsample2D(nn.Module): + out_channels: int + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + self.conv = nn.Conv( + self.out_channels, + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + def __call__(self, hidden_states): + batch, height, width, channels = hidden_states.shape + hidden_states = jax.image.resize( + hidden_states, + shape=(batch, height * 2, width * 2, channels), + method="nearest", + ) + hidden_states = self.conv(hidden_states) + return hidden_states + + +class FlaxDownsample2D(nn.Module): + out_channels: int + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + self.conv = nn.Conv( + self.out_channels, + kernel_size=(3, 3), + strides=(2, 2), + padding=((1, 1), (1, 1)), # padding="VALID", + dtype=self.dtype, + ) + + def __call__(self, hidden_states): + # pad = ((0, 0), (0, 1), (0, 1), (0, 0)) # pad height and width dim + # hidden_states = jnp.pad(hidden_states, pad_width=pad) + hidden_states = self.conv(hidden_states) + return hidden_states + + +class FlaxResnetBlock2D(nn.Module): + in_channels: int + out_channels: int = None + dropout_prob: float = 0.0 + use_nin_shortcut: bool = None + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + out_channels = self.in_channels if self.out_channels is None else self.out_channels + + self.norm1 = nn.GroupNorm(num_groups=32, epsilon=1e-5) + self.conv1 = nn.Conv( + out_channels, + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + self.time_emb_proj = nn.Dense(out_channels, dtype=self.dtype) + + self.norm2 = nn.GroupNorm(num_groups=32, epsilon=1e-5) + self.dropout = nn.Dropout(self.dropout_prob) + self.conv2 = nn.Conv( + out_channels, + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + use_nin_shortcut = self.in_channels != out_channels if self.use_nin_shortcut is None else self.use_nin_shortcut + + self.conv_shortcut = None + if use_nin_shortcut: + self.conv_shortcut = nn.Conv( + out_channels, + kernel_size=(1, 1), + strides=(1, 1), + padding="VALID", + dtype=self.dtype, + ) + + def __call__(self, hidden_states, temb, deterministic=True): + residual = hidden_states + hidden_states = self.norm1(hidden_states) + hidden_states = nn.swish(hidden_states) + hidden_states = self.conv1(hidden_states) + + temb = self.time_emb_proj(nn.swish(temb)) + temb = jnp.expand_dims(jnp.expand_dims(temb, 1), 1) + hidden_states = hidden_states + temb + + hidden_states = self.norm2(hidden_states) + hidden_states = nn.swish(hidden_states) + hidden_states = self.dropout(hidden_states, deterministic) + hidden_states = self.conv2(hidden_states) + + if self.conv_shortcut is not None: + residual = self.conv_shortcut(residual) + + return hidden_states + residual diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..45157ee91808b13e18b3bc9de033d91d313c8b26 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__init__.py @@ -0,0 +1,54 @@ +from ...utils import is_torch_available + + +if is_torch_available(): + from .auraflow_transformer_2d import AuraFlowTransformer2DModel + from .cogvideox_transformer_3d import CogVideoXTransformer3DModel + from .consisid_transformer_3d import ConsisIDTransformer3DModel + from .dit_transformer_2d import DiTTransformer2DModel + from .dual_transformer_2d import DualTransformer2DModel + from .hunyuan_transformer_2d import HunyuanDiT2DModel + from .latte_transformer_3d import LatteTransformer3DModel + from .lumina_nextdit2d import LuminaNextDiT2DModel + from .pixart_transformer_2d import PixArtTransformer2DModel + from .prior_transformer import PriorTransformer + from .sana_transformer import SanaTransformer2DModel + from .stable_audio_transformer import StableAudioDiTModel + from .t5_film_transformer import T5FilmDecoder + from .transformer_2d import Transformer2DModel + from .transformer_allegro import AllegroTransformer3DModel + from .transformer_bria import BriaTransformer2DModel + from .transformer_bria_fibo import BriaFiboTransformer2DModel + from .transformer_chroma import ChromaTransformer2DModel + from .transformer_chronoedit import ChronoEditTransformer3DModel + from .transformer_cogview3plus import CogView3PlusTransformer2DModel + from .transformer_cogview4 import CogView4Transformer2DModel + from .transformer_cosmos import CosmosTransformer3DModel + from .transformer_easyanimate import EasyAnimateTransformer3DModel + from .transformer_flux import FluxTransformer2DModel + from .transformer_flux2 import Flux2Transformer2DModel + from .transformer_glm_image import GlmImageTransformer2DModel + from .transformer_helios import HeliosTransformer3DModel + from .transformer_hidream_image import HiDreamImageTransformer2DModel + from .transformer_hunyuan_video import HunyuanVideoTransformer3DModel + from .transformer_hunyuan_video15 import HunyuanVideo15Transformer3DModel + from .transformer_hunyuan_video_framepack import HunyuanVideoFramepackTransformer3DModel + from .transformer_hunyuanimage import HunyuanImageTransformer2DModel + from .transformer_kandinsky import Kandinsky5Transformer3DModel + from .transformer_longcat_image import LongCatImageTransformer2DModel + from .transformer_ltx import LTXVideoTransformer3DModel + from .transformer_ltx2 import LTX2VideoTransformer3DModel + from .transformer_lumina2 import Lumina2Transformer2DModel + from .transformer_mochi import MochiTransformer3DModel + from .transformer_omnigen import OmniGenTransformer2DModel + from .transformer_ovis_image import OvisImageTransformer2DModel + from .transformer_prx import PRXTransformer2DModel + from .transformer_qwenimage import QwenImageTransformer2DModel + from .transformer_sana_video import SanaVideoTransformer3DModel + from .transformer_sd3 import SD3Transformer2DModel + from .transformer_skyreels_v2 import SkyReelsV2Transformer3DModel + from .transformer_temporal import TransformerTemporalModel + from .transformer_wan import WanTransformer3DModel + from .transformer_wan_animate import WanAnimateTransformer3DModel + from .transformer_wan_vace import WanVACETransformer3DModel + from .transformer_z_image import ZImageTransformer2DModel diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d5ed5d18179d0a111e431d4b14b768c3d46f98e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/auraflow_transformer_2d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/auraflow_transformer_2d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc71b421b090c3f57ba24e406b80b77b1c3588b8 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/auraflow_transformer_2d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/cogvideox_transformer_3d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/cogvideox_transformer_3d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d86f4ed40f6c3fd1ad667d0c2c4fbf6b336aabf8 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/cogvideox_transformer_3d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/consisid_transformer_3d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/consisid_transformer_3d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d99baad5b4425da5a398ebae91fa5947eb25083c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/consisid_transformer_3d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/dit_transformer_2d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/dit_transformer_2d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e21edc254befcac0c5d81c013870fb89a3fd333d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/dit_transformer_2d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/dual_transformer_2d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/dual_transformer_2d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af8f5a80803ae103cfbd7e2969a13498c98bc916 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/dual_transformer_2d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/hunyuan_transformer_2d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/hunyuan_transformer_2d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a5342aa68d74441faa5f1798760de5969cc959e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/hunyuan_transformer_2d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/latte_transformer_3d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/latte_transformer_3d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba80efed09e6b9c3a5c87505317b4887e196e681 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/latte_transformer_3d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/lumina_nextdit2d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/lumina_nextdit2d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd99732a01bfee9cef1713d7e471242fee5a8fbe Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/lumina_nextdit2d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/pixart_transformer_2d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/pixart_transformer_2d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1509ec791add86d5f526e1dea162b8711279043c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/pixart_transformer_2d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/prior_transformer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/prior_transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c5ea0f1f556e89f1667e1dc576b1f30734ed210 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/prior_transformer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/sana_transformer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/sana_transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62f73c29d3de1a38a282a1cf008885afefa22f6c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/sana_transformer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/stable_audio_transformer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/stable_audio_transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4decd5925ed0edc29800f3dc7c3decdc14452186 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/stable_audio_transformer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/t5_film_transformer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/t5_film_transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..adbf0c900fabbecf80296f86c13f39dc1af25a79 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/t5_film_transformer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_2d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_2d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..84c19881e5b06fa72651ded8edb2ebea856d0087 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_2d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_allegro.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_allegro.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f28fee35c3b78a4413c3b790cd8ae62f28acbcb9 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_allegro.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_bria.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_bria.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7bc9ed5ced7c43fd1618729886875995cb424498 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_bria.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_bria_fibo.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_bria_fibo.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87ae7481ced1dbf78d787d36b5ee5c5c87029151 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_bria_fibo.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_chroma.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_chroma.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50e54e26a9522e0f76efa99a77894ec0e280f073 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_chroma.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_chronoedit.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_chronoedit.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af6f06b1802f97ee84dee8ccc32161b6bf376a6d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_chronoedit.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cogview3plus.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cogview3plus.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4cad8586e92e8105e21fb1a324cdf01104817d0f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cogview3plus.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cogview4.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cogview4.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8e5a9601e2603c6793db5e264a629d10a09cf50 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cogview4.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cosmos.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cosmos.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b915b2b82958a0e98b455429c40becaf9fb3a76 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_cosmos.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_easyanimate.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_easyanimate.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ebc725a057a0ef885458f924ba18dea608eebb35 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_easyanimate.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_flux.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_flux.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6076a5c67283197fe6e4daf4d9a147fe633e71b5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_flux.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_flux2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_flux2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e485593aa0293823e6ab522a9a07f6ec21aa572a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_flux2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_glm_image.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_glm_image.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d3315161d1c7564a2eb6b4cd1a834aa22d30763 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_glm_image.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_helios.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_helios.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ead0ad4620115419759ba4ca06c86fd4767c8537 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_helios.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hidream_image.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hidream_image.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56c54fda6f29287988b995c2cb85dfc20705d03a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hidream_image.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c57e0101950ad008e3798fbe3b2b6d21f34e3694 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video15.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video15.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33c46617b5beb958aa25c624da3df11a7d53f34e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video15.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video_framepack.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video_framepack.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab9d912e52593778ec1eb2bfcb6c7de3f9ae1944 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuan_video_framepack.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuanimage.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuanimage.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e7cae73988c83b0c37b2e9cf9de1076fc4ace4e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_hunyuanimage.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_kandinsky.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_kandinsky.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d4ac7a394060843b8f8a9221af8b07a33375a2a2 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_kandinsky.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_longcat_image.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_longcat_image.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..660704c44bd845133a0840af9fb429534dba9a2c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_longcat_image.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ltx.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ltx.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f939440e1ae42199619df6525b0242cb443db93 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ltx.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ltx2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ltx2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45abf52c0757cd695e3fa675ff9fd293891bdf16 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ltx2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_lumina2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_lumina2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c60563dd3044691819390f8378da3f9d96bc677a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_lumina2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_mochi.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_mochi.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c958fde1530159a1971c75a85272629b1d100f66 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_mochi.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_omnigen.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_omnigen.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94c1bb3b1734d4e14abe8eaf5ca12d9f4f8fefc7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_omnigen.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ovis_image.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ovis_image.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ca8c5305f9441962d3d96267eb1a883f62af2d5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_ovis_image.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_prx.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_prx.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d991f6f31a5db256e57e00d7f79ec1cb97028ee7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_prx.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_qwenimage.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_qwenimage.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b2fb1b0cfc73a20589539a8e8dae374fd0c060d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_qwenimage.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_sana_video.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_sana_video.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6538f16d49c770623f3f5f04e3dca0a6f105a62d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_sana_video.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_sd3.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_sd3.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63bce6461b8fe6260d5a2aa3130881ccc7c9f4cb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_sd3.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_skyreels_v2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_skyreels_v2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea57dbe76b1624b0a7488566a57acc49ce287f97 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_skyreels_v2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_temporal.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_temporal.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2f0ee4cadbb91e08a792f0f27cb6e24e9f7a84c5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_temporal.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e92d386e4642ee6338fec5431065225a8e7ea09 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan_animate.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan_animate.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c974819e2b26088fbe33af8d0a8b0a3ce58388cb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan_animate.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan_vace.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan_vace.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86574c5e13b877701f5d47fbbb68601cae629775 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_wan_vace.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_z_image.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_z_image.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..420dea31589e3751e83cd2257a99c00411027e1c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/__pycache__/transformer_z_image.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/auraflow_transformer_2d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/auraflow_transformer_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..3fa4df7387849e164a16f8794cbfe4c868818766 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/auraflow_transformer_2d.py @@ -0,0 +1,478 @@ +# Copyright 2025 AuraFlow Authors, The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionMixin +from ..attention_processor import ( + Attention, + AuraFlowAttnProcessor2_0, + FusedAuraFlowAttnProcessor2_0, +) +from ..embeddings import TimestepEmbedding, Timesteps +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormZero, FP32LayerNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +# Taken from the original aura flow inference code. +def find_multiple(n: int, k: int) -> int: + if n % k == 0: + return n + return n + k - (n % k) + + +# Aura Flow patch embed doesn't use convs for projections. +# Additionally, it uses learned positional embeddings. +class AuraFlowPatchEmbed(nn.Module): + def __init__( + self, + height=224, + width=224, + patch_size=16, + in_channels=3, + embed_dim=768, + pos_embed_max_size=None, + ): + super().__init__() + + self.num_patches = (height // patch_size) * (width // patch_size) + self.pos_embed_max_size = pos_embed_max_size + + self.proj = nn.Linear(patch_size * patch_size * in_channels, embed_dim) + self.pos_embed = nn.Parameter(torch.randn(1, pos_embed_max_size, embed_dim) * 0.1) + + self.patch_size = patch_size + self.height, self.width = height // patch_size, width // patch_size + self.base_size = height // patch_size + + def pe_selection_index_based_on_dim(self, h, w): + # select subset of positional embedding based on H, W, where H, W is size of latent + # PE will be viewed as 2d-grid, and H/p x W/p of the PE will be selected + # because original input are in flattened format, we have to flatten this 2d grid as well. + h_p, w_p = h // self.patch_size, w // self.patch_size + h_max, w_max = int(self.pos_embed_max_size**0.5), int(self.pos_embed_max_size**0.5) + + # Calculate the top-left corner indices for the centered patch grid + starth = h_max // 2 - h_p // 2 + startw = w_max // 2 - w_p // 2 + + # Generate the row and column indices for the desired patch grid + rows = torch.arange(starth, starth + h_p, device=self.pos_embed.device) + cols = torch.arange(startw, startw + w_p, device=self.pos_embed.device) + + # Create a 2D grid of indices + row_indices, col_indices = torch.meshgrid(rows, cols, indexing="ij") + + # Convert the 2D grid indices to flattened 1D indices + selected_indices = (row_indices * w_max + col_indices).flatten() + + return selected_indices + + def forward(self, latent) -> torch.Tensor: + batch_size, num_channels, height, width = latent.size() + latent = latent.view( + batch_size, + num_channels, + height // self.patch_size, + self.patch_size, + width // self.patch_size, + self.patch_size, + ) + latent = latent.permute(0, 2, 4, 1, 3, 5).flatten(-3).flatten(1, 2) + latent = self.proj(latent) + pe_index = self.pe_selection_index_based_on_dim(height, width) + return latent + self.pos_embed[:, pe_index] + + +# Taken from the original Aura flow inference code. +# Our feedforward only has GELU but Aura uses SiLU. +class AuraFlowFeedForward(nn.Module): + def __init__(self, dim, hidden_dim=None) -> None: + super().__init__() + if hidden_dim is None: + hidden_dim = 4 * dim + + final_hidden_dim = int(2 * hidden_dim / 3) + final_hidden_dim = find_multiple(final_hidden_dim, 256) + + self.linear_1 = nn.Linear(dim, final_hidden_dim, bias=False) + self.linear_2 = nn.Linear(dim, final_hidden_dim, bias=False) + self.out_projection = nn.Linear(final_hidden_dim, dim, bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = F.silu(self.linear_1(x)) * self.linear_2(x) + x = self.out_projection(x) + return x + + +class AuraFlowPreFinalBlock(nn.Module): + def __init__(self, embedding_dim: int, conditioning_embedding_dim: int): + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(conditioning_embedding_dim, embedding_dim * 2, bias=False) + + def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor: + emb = self.linear(self.silu(conditioning_embedding).to(x.dtype)) + scale, shift = torch.chunk(emb, 2, dim=1) + x = x * (1 + scale)[:, None, :] + shift[:, None, :] + return x + + +@maybe_allow_in_graph +class AuraFlowSingleTransformerBlock(nn.Module): + """Similar to `AuraFlowJointTransformerBlock` with a single DiT instead of an MMDiT.""" + + def __init__(self, dim, num_attention_heads, attention_head_dim): + super().__init__() + + self.norm1 = AdaLayerNormZero(dim, bias=False, norm_type="fp32_layer_norm") + + processor = AuraFlowAttnProcessor2_0() + self.attn = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=attention_head_dim, + heads=num_attention_heads, + qk_norm="fp32_layer_norm", + out_dim=dim, + bias=False, + out_bias=False, + processor=processor, + ) + + self.norm2 = FP32LayerNorm(dim, elementwise_affine=False, bias=False) + self.ff = AuraFlowFeedForward(dim, dim * 4) + + def forward( + self, + hidden_states: torch.FloatTensor, + temb: torch.FloatTensor, + attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor: + residual = hidden_states + attention_kwargs = attention_kwargs or {} + + # Norm + Projection. + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + + # Attention. + attn_output = self.attn(hidden_states=norm_hidden_states, **attention_kwargs) + + # Process attention outputs for the `hidden_states`. + hidden_states = self.norm2(residual + gate_msa.unsqueeze(1) * attn_output) + hidden_states = hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + ff_output = self.ff(hidden_states) + hidden_states = gate_mlp.unsqueeze(1) * ff_output + hidden_states = residual + hidden_states + + return hidden_states + + +@maybe_allow_in_graph +class AuraFlowJointTransformerBlock(nn.Module): + r""" + Transformer block for Aura Flow. Similar to SD3 MMDiT. Differences (non-exhaustive): + + * QK Norm in the attention blocks + * No bias in the attention blocks + * Most LayerNorms are in FP32 + + Parameters: + dim (`int`): The number of channels in the input and output. + num_attention_heads (`int`): The number of heads to use for multi-head attention. + attention_head_dim (`int`): The number of channels in each head. + is_last (`bool`): Boolean to determine if this is the last block in the model. + """ + + def __init__(self, dim, num_attention_heads, attention_head_dim): + super().__init__() + + self.norm1 = AdaLayerNormZero(dim, bias=False, norm_type="fp32_layer_norm") + self.norm1_context = AdaLayerNormZero(dim, bias=False, norm_type="fp32_layer_norm") + + processor = AuraFlowAttnProcessor2_0() + self.attn = Attention( + query_dim=dim, + cross_attention_dim=None, + added_kv_proj_dim=dim, + added_proj_bias=False, + dim_head=attention_head_dim, + heads=num_attention_heads, + qk_norm="fp32_layer_norm", + out_dim=dim, + bias=False, + out_bias=False, + processor=processor, + context_pre_only=False, + ) + + self.norm2 = FP32LayerNorm(dim, elementwise_affine=False, bias=False) + self.ff = AuraFlowFeedForward(dim, dim * 4) + self.norm2_context = FP32LayerNorm(dim, elementwise_affine=False, bias=False) + self.ff_context = AuraFlowFeedForward(dim, dim * 4) + + def forward( + self, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor, + temb: torch.FloatTensor, + attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + residual = hidden_states + residual_context = encoder_hidden_states + attention_kwargs = attention_kwargs or {} + + # Norm + Projection. + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + + # Attention. + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + **attention_kwargs, + ) + + # Process attention outputs for the `hidden_states`. + hidden_states = self.norm2(residual + gate_msa.unsqueeze(1) * attn_output) + hidden_states = hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + hidden_states = gate_mlp.unsqueeze(1) * self.ff(hidden_states) + hidden_states = residual + hidden_states + + # Process attention outputs for the `encoder_hidden_states`. + encoder_hidden_states = self.norm2_context(residual_context + c_gate_msa.unsqueeze(1) * context_attn_output) + encoder_hidden_states = encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + encoder_hidden_states = c_gate_mlp.unsqueeze(1) * self.ff_context(encoder_hidden_states) + encoder_hidden_states = residual_context + encoder_hidden_states + + return encoder_hidden_states, hidden_states + + +class AuraFlowTransformer2DModel(ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): + r""" + A 2D Transformer model as introduced in AuraFlow (https://blog.fal.ai/auraflow/). + + Parameters: + sample_size (`int`): The width of the latent images. This is fixed during training since + it is used to learn a number of position embeddings. + patch_size (`int`): Patch size to turn the input data into small patches. + in_channels (`int`, *optional*, defaults to 4): The number of channels in the input. + num_mmdit_layers (`int`, *optional*, defaults to 4): The number of layers of MMDiT Transformer blocks to use. + num_single_dit_layers (`int`, *optional*, defaults to 32): + The number of layers of Transformer blocks to use. These blocks use concatenated image and text + representations. + attention_head_dim (`int`, *optional*, defaults to 256): The number of channels in each head. + num_attention_heads (`int`, *optional*, defaults to 12): The number of heads to use for multi-head attention. + joint_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use. + caption_projection_dim (`int`): Number of dimensions to use when projecting the `encoder_hidden_states`. + out_channels (`int`, defaults to 4): Number of output channels. + pos_embed_max_size (`int`, defaults to 1024): Maximum positions to embed from the image latents. + """ + + _no_split_modules = ["AuraFlowJointTransformerBlock", "AuraFlowSingleTransformerBlock", "AuraFlowPatchEmbed"] + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + sample_size: int = 64, + patch_size: int = 2, + in_channels: int = 4, + num_mmdit_layers: int = 4, + num_single_dit_layers: int = 32, + attention_head_dim: int = 256, + num_attention_heads: int = 12, + joint_attention_dim: int = 2048, + caption_projection_dim: int = 3072, + out_channels: int = 4, + pos_embed_max_size: int = 1024, + ): + super().__init__() + default_out_channels = in_channels + self.out_channels = out_channels if out_channels is not None else default_out_channels + self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim + + self.pos_embed = AuraFlowPatchEmbed( + height=self.config.sample_size, + width=self.config.sample_size, + patch_size=self.config.patch_size, + in_channels=self.config.in_channels, + embed_dim=self.inner_dim, + pos_embed_max_size=pos_embed_max_size, + ) + + self.context_embedder = nn.Linear( + self.config.joint_attention_dim, self.config.caption_projection_dim, bias=False + ) + self.time_step_embed = Timesteps(num_channels=256, downscale_freq_shift=0, scale=1000, flip_sin_to_cos=True) + self.time_step_proj = TimestepEmbedding(in_channels=256, time_embed_dim=self.inner_dim) + + self.joint_transformer_blocks = nn.ModuleList( + [ + AuraFlowJointTransformerBlock( + dim=self.inner_dim, + num_attention_heads=self.config.num_attention_heads, + attention_head_dim=self.config.attention_head_dim, + ) + for i in range(self.config.num_mmdit_layers) + ] + ) + self.single_transformer_blocks = nn.ModuleList( + [ + AuraFlowSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=self.config.num_attention_heads, + attention_head_dim=self.config.attention_head_dim, + ) + for _ in range(self.config.num_single_dit_layers) + ] + ) + + self.norm_out = AuraFlowPreFinalBlock(self.inner_dim, self.inner_dim) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=False) + + # https://huggingface.co/papers/2309.16588 + # prevents artifacts in the attention maps + self.register_tokens = nn.Parameter(torch.randn(1, 8, self.inner_dim) * 0.02) + + self.gradient_checkpointing = False + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections with FusedAttnProcessor2_0->FusedAuraFlowAttnProcessor2_0 + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is 🧪 experimental. + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + self.set_attn_processor(FusedAuraFlowAttnProcessor2_0()) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + > [!WARNING] > This API is 🧪 experimental. + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.FloatTensor, + encoder_hidden_states: torch.FloatTensor = None, + timestep: torch.LongTensor = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + height, width = hidden_states.shape[-2:] + + # Apply patch embedding, timestep embedding, and project the caption embeddings. + hidden_states = self.pos_embed(hidden_states) # takes care of adding positional embeddings too. + temb = self.time_step_embed(timestep).to(dtype=next(self.parameters()).dtype) + temb = self.time_step_proj(temb) + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + encoder_hidden_states = torch.cat( + [self.register_tokens.repeat(encoder_hidden_states.size(0), 1, 1), encoder_hidden_states], dim=1 + ) + + # MMDiT blocks. + for index_block, block in enumerate(self.joint_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + attention_kwargs=attention_kwargs, + ) + + # Single DiT blocks that combine the `hidden_states` (image) and `encoder_hidden_states` (text) + if len(self.single_transformer_blocks) > 0: + encoder_seq_len = encoder_hidden_states.size(1) + combined_hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + for index_block, block in enumerate(self.single_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + combined_hidden_states = self._gradient_checkpointing_func( + block, + combined_hidden_states, + temb, + ) + + else: + combined_hidden_states = block( + hidden_states=combined_hidden_states, temb=temb, attention_kwargs=attention_kwargs + ) + + hidden_states = combined_hidden_states[:, encoder_seq_len:] + + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + # unpatchify + patch_size = self.config.patch_size + out_channels = self.config.out_channels + height = height // patch_size + width = width // patch_size + + hidden_states = hidden_states.reshape( + shape=(hidden_states.shape[0], height, width, patch_size, patch_size, out_channels) + ) + hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(hidden_states.shape[0], out_channels, height * patch_size, width * patch_size) + ) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/cogvideox_transformer_3d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/cogvideox_transformer_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..4b8beeeb6fe333b4d114c404759802604a8cf50a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/cogvideox_transformer_3d.py @@ -0,0 +1,445 @@ +# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import Attention, AttentionMixin, FeedForward +from ..attention_processor import CogVideoXAttnProcessor2_0, FusedCogVideoXAttnProcessor2_0 +from ..cache_utils import CacheMixin +from ..embeddings import CogVideoXPatchEmbed, TimestepEmbedding, Timesteps +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNorm, CogVideoXLayerNormZero + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@maybe_allow_in_graph +class CogVideoXBlock(nn.Module): + r""" + Transformer block used in [CogVideoX](https://github.com/THUDM/CogVideo) model. + + Parameters: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`): + The number of channels in each head. + time_embed_dim (`int`): + The number of channels in timestep embedding. + dropout (`float`, defaults to `0.0`): + The dropout probability to use. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to be used in feed-forward. + attention_bias (`bool`, defaults to `False`): + Whether or not to use bias in attention projection layers. + qk_norm (`bool`, defaults to `True`): + Whether or not to use normalization after query and key projections in Attention. + norm_elementwise_affine (`bool`, defaults to `True`): + Whether to use learnable elementwise affine parameters for normalization. + norm_eps (`float`, defaults to `1e-5`): + Epsilon value for normalization layers. + final_dropout (`bool` defaults to `False`): + Whether to apply a final dropout after the last feed-forward layer. + ff_inner_dim (`int`, *optional*, defaults to `None`): + Custom hidden dimension of Feed-forward layer. If not provided, `4 * dim` is used. + ff_bias (`bool`, defaults to `True`): + Whether or not to use bias in Feed-forward layer. + attention_out_bias (`bool`, defaults to `True`): + Whether or not to use bias in Attention output projection layer. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + time_embed_dim: int, + dropout: float = 0.0, + activation_fn: str = "gelu-approximate", + attention_bias: bool = False, + qk_norm: bool = True, + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-5, + final_dropout: bool = True, + ff_inner_dim: int | None = None, + ff_bias: bool = True, + attention_out_bias: bool = True, + ): + super().__init__() + + # 1. Self Attention + self.norm1 = CogVideoXLayerNormZero(time_embed_dim, dim, norm_elementwise_affine, norm_eps, bias=True) + + self.attn1 = Attention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + qk_norm="layer_norm" if qk_norm else None, + eps=1e-6, + bias=attention_bias, + out_bias=attention_out_bias, + processor=CogVideoXAttnProcessor2_0(), + ) + + # 2. Feed Forward + self.norm2 = CogVideoXLayerNormZero(time_embed_dim, dim, norm_elementwise_affine, norm_eps, bias=True) + + self.ff = FeedForward( + dim, + dropout=dropout, + activation_fn=activation_fn, + final_dropout=final_dropout, + inner_dim=ff_inner_dim, + bias=ff_bias, + ) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_length = encoder_hidden_states.size(1) + attention_kwargs = attention_kwargs or {} + + # norm & modulate + norm_hidden_states, norm_encoder_hidden_states, gate_msa, enc_gate_msa = self.norm1( + hidden_states, encoder_hidden_states, temb + ) + + # attention + attn_hidden_states, attn_encoder_hidden_states = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + **attention_kwargs, + ) + + hidden_states = hidden_states + gate_msa * attn_hidden_states + encoder_hidden_states = encoder_hidden_states + enc_gate_msa * attn_encoder_hidden_states + + # norm & modulate + norm_hidden_states, norm_encoder_hidden_states, gate_ff, enc_gate_ff = self.norm2( + hidden_states, encoder_hidden_states, temb + ) + + # feed-forward + norm_hidden_states = torch.cat([norm_encoder_hidden_states, norm_hidden_states], dim=1) + ff_output = self.ff(norm_hidden_states) + + hidden_states = hidden_states + gate_ff * ff_output[:, text_seq_length:] + encoder_hidden_states = encoder_hidden_states + enc_gate_ff * ff_output[:, :text_seq_length] + + return hidden_states, encoder_hidden_states + + +class CogVideoXTransformer3DModel(ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin, CacheMixin): + """ + A Transformer model for video-like data in [CogVideoX](https://github.com/THUDM/CogVideo). + + Parameters: + num_attention_heads (`int`, defaults to `30`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `64`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `16`): + The number of channels in the output. + flip_sin_to_cos (`bool`, defaults to `True`): + Whether to flip the sin to cos in the time embedding. + time_embed_dim (`int`, defaults to `512`): + Output dimension of timestep embeddings. + ofs_embed_dim (`int`, defaults to `512`): + Output dimension of "ofs" embeddings used in CogVideoX-5b-I2B in version 1.5 + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + num_layers (`int`, defaults to `30`): + The number of layers of Transformer blocks to use. + dropout (`float`, defaults to `0.0`): + The dropout probability to use. + attention_bias (`bool`, defaults to `True`): + Whether to use bias in the attention projection layers. + sample_width (`int`, defaults to `90`): + The width of the input latents. + sample_height (`int`, defaults to `60`): + The height of the input latents. + sample_frames (`int`, defaults to `49`): + The number of frames in the input latents. Note that this parameter was incorrectly initialized to 49 + instead of 13 because CogVideoX processed 13 latent frames at once in its default and recommended settings, + but cannot be changed to the correct value to ensure backwards compatibility. To create a transformer with + K latent frames, the correct value to pass here would be: ((K - 1) * temporal_compression_ratio + 1). + patch_size (`int`, defaults to `2`): + The size of the patches to use in the patch embedding layer. + temporal_compression_ratio (`int`, defaults to `4`): + The compression ratio across the temporal dimension. See documentation for `sample_frames`. + max_text_seq_length (`int`, defaults to `226`): + The maximum sequence length of the input text embeddings. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to use in feed-forward. + timestep_activation_fn (`str`, defaults to `"silu"`): + Activation function to use when generating the timestep embeddings. + norm_elementwise_affine (`bool`, defaults to `True`): + Whether to use elementwise affine in normalization layers. + norm_eps (`float`, defaults to `1e-5`): + The epsilon value to use in normalization layers. + spatial_interpolation_scale (`float`, defaults to `1.875`): + Scaling factor to apply in 3D positional embeddings across spatial dimensions. + temporal_interpolation_scale (`float`, defaults to `1.0`): + Scaling factor to apply in 3D positional embeddings across temporal dimensions. + """ + + _skip_layerwise_casting_patterns = ["patch_embed", "norm"] + _supports_gradient_checkpointing = True + _no_split_modules = ["CogVideoXBlock", "CogVideoXPatchEmbed"] + + @register_to_config + def __init__( + self, + num_attention_heads: int = 30, + attention_head_dim: int = 64, + in_channels: int = 16, + out_channels: int | None = 16, + flip_sin_to_cos: bool = True, + freq_shift: int = 0, + time_embed_dim: int = 512, + ofs_embed_dim: int | None = None, + text_embed_dim: int = 4096, + num_layers: int = 30, + dropout: float = 0.0, + attention_bias: bool = True, + sample_width: int = 90, + sample_height: int = 60, + sample_frames: int = 49, + patch_size: int = 2, + patch_size_t: int | None = None, + temporal_compression_ratio: int = 4, + max_text_seq_length: int = 226, + activation_fn: str = "gelu-approximate", + timestep_activation_fn: str = "silu", + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-5, + spatial_interpolation_scale: float = 1.875, + temporal_interpolation_scale: float = 1.0, + use_rotary_positional_embeddings: bool = False, + use_learned_positional_embeddings: bool = False, + patch_bias: bool = True, + ): + super().__init__() + inner_dim = num_attention_heads * attention_head_dim + + if not use_rotary_positional_embeddings and use_learned_positional_embeddings: + raise ValueError( + "There are no CogVideoX checkpoints available with disable rotary embeddings and learned positional " + "embeddings. If you're using a custom model and/or believe this should be supported, please open an " + "issue at https://github.com/huggingface/diffusers/issues." + ) + + # 1. Patch embedding + self.patch_embed = CogVideoXPatchEmbed( + patch_size=patch_size, + patch_size_t=patch_size_t, + in_channels=in_channels, + embed_dim=inner_dim, + text_embed_dim=text_embed_dim, + bias=patch_bias, + sample_width=sample_width, + sample_height=sample_height, + sample_frames=sample_frames, + temporal_compression_ratio=temporal_compression_ratio, + max_text_seq_length=max_text_seq_length, + spatial_interpolation_scale=spatial_interpolation_scale, + temporal_interpolation_scale=temporal_interpolation_scale, + use_positional_embeddings=not use_rotary_positional_embeddings, + use_learned_positional_embeddings=use_learned_positional_embeddings, + ) + self.embedding_dropout = nn.Dropout(dropout) + + # 2. Time embeddings and ofs embedding(Only CogVideoX1.5-5B I2V have) + + self.time_proj = Timesteps(inner_dim, flip_sin_to_cos, freq_shift) + self.time_embedding = TimestepEmbedding(inner_dim, time_embed_dim, timestep_activation_fn) + + self.ofs_proj = None + self.ofs_embedding = None + if ofs_embed_dim: + self.ofs_proj = Timesteps(ofs_embed_dim, flip_sin_to_cos, freq_shift) + self.ofs_embedding = TimestepEmbedding( + ofs_embed_dim, ofs_embed_dim, timestep_activation_fn + ) # same as time embeddings, for ofs + + # 3. Define spatio-temporal transformers blocks + self.transformer_blocks = nn.ModuleList( + [ + CogVideoXBlock( + dim=inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + time_embed_dim=time_embed_dim, + dropout=dropout, + activation_fn=activation_fn, + attention_bias=attention_bias, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + ) + for _ in range(num_layers) + ] + ) + self.norm_final = nn.LayerNorm(inner_dim, norm_eps, norm_elementwise_affine) + + # 4. Output blocks + self.norm_out = AdaLayerNorm( + embedding_dim=time_embed_dim, + output_dim=2 * inner_dim, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + chunk_dim=1, + ) + + if patch_size_t is None: + # For CogVideox 1.0 + output_dim = patch_size * patch_size * out_channels + else: + # For CogVideoX 1.5 + output_dim = patch_size * patch_size * patch_size_t * out_channels + + self.proj_out = nn.Linear(inner_dim, output_dim) + + self.gradient_checkpointing = False + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections with FusedAttnProcessor2_0->FusedCogVideoXAttnProcessor2_0 + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is 🧪 experimental. + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + self.set_attn_processor(FusedCogVideoXAttnProcessor2_0()) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + > [!WARNING] > This API is 🧪 experimental. + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: int | float | torch.LongTensor, + timestep_cond: torch.Tensor | None = None, + ofs: int | float | torch.LongTensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + batch_size, num_frames, channels, height, width = hidden_states.shape + + # 1. Time embedding + timesteps = timestep + t_emb = self.time_proj(timesteps) + + # timesteps does not contain any weights and will always return f32 tensors + # but time_embedding might actually be running in fp16. so we need to cast here. + # there might be better ways to encapsulate this. + t_emb = t_emb.to(dtype=hidden_states.dtype) + emb = self.time_embedding(t_emb, timestep_cond) + + if self.ofs_embedding is not None: + ofs_emb = self.ofs_proj(ofs) + ofs_emb = ofs_emb.to(dtype=hidden_states.dtype) + ofs_emb = self.ofs_embedding(ofs_emb) + emb = emb + ofs_emb + + # 2. Patch embedding + hidden_states = self.patch_embed(encoder_hidden_states, hidden_states) + hidden_states = self.embedding_dropout(hidden_states) + + text_seq_length = encoder_hidden_states.shape[1] + encoder_hidden_states = hidden_states[:, :text_seq_length] + hidden_states = hidden_states[:, text_seq_length:] + + # 3. Transformer blocks + for i, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + emb, + image_rotary_emb, + attention_kwargs, + ) + else: + hidden_states, encoder_hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=emb, + image_rotary_emb=image_rotary_emb, + attention_kwargs=attention_kwargs, + ) + + hidden_states = self.norm_final(hidden_states) + + # 4. Final block + hidden_states = self.norm_out(hidden_states, temb=emb) + hidden_states = self.proj_out(hidden_states) + + # 5. Unpatchify + p = self.config.patch_size + p_t = self.config.patch_size_t + + if p_t is None: + output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p) + output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4) + else: + output = hidden_states.reshape( + batch_size, (num_frames + p_t - 1) // p_t, height // p, width // p, -1, p_t, p, p + ) + output = output.permute(0, 1, 5, 4, 2, 6, 3, 7).flatten(6, 7).flatten(4, 5).flatten(1, 2) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/consisid_transformer_3d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/consisid_transformer_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..64a58e39436632ad87f2b50c644becfe331b063e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/consisid_transformer_3d.py @@ -0,0 +1,711 @@ +# Copyright 2025 ConsisID Authors and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import Attention, AttentionMixin, FeedForward +from ..attention_processor import CogVideoXAttnProcessor2_0 +from ..embeddings import CogVideoXPatchEmbed, TimestepEmbedding, Timesteps +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNorm, CogVideoXLayerNormZero + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class PerceiverAttention(nn.Module): + def __init__(self, dim: int, dim_head: int = 64, heads: int = 8, kv_dim: int | None = None): + super().__init__() + + self.scale = dim_head**-0.5 + self.dim_head = dim_head + self.heads = heads + inner_dim = dim_head * heads + + self.norm1 = nn.LayerNorm(dim if kv_dim is None else kv_dim) + self.norm2 = nn.LayerNorm(dim) + + self.to_q = nn.Linear(dim, inner_dim, bias=False) + self.to_kv = nn.Linear(dim if kv_dim is None else kv_dim, inner_dim * 2, bias=False) + self.to_out = nn.Linear(inner_dim, dim, bias=False) + + def forward(self, image_embeds: torch.Tensor, latents: torch.Tensor) -> torch.Tensor: + # Apply normalization + image_embeds = self.norm1(image_embeds) + latents = self.norm2(latents) + + batch_size, seq_len, _ = latents.shape # Get batch size and sequence length + + # Compute query, key, and value matrices + query = self.to_q(latents) + kv_input = torch.cat((image_embeds, latents), dim=-2) + key, value = self.to_kv(kv_input).chunk(2, dim=-1) + + # Reshape the tensors for multi-head attention + query = query.reshape(query.size(0), -1, self.heads, self.dim_head).transpose(1, 2) + key = key.reshape(key.size(0), -1, self.heads, self.dim_head).transpose(1, 2) + value = value.reshape(value.size(0), -1, self.heads, self.dim_head).transpose(1, 2) + + # attention + scale = 1 / math.sqrt(math.sqrt(self.dim_head)) + weight = (query * scale) @ (key * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + output = weight @ value + + # Reshape and return the final output + output = output.permute(0, 2, 1, 3).reshape(batch_size, seq_len, -1) + + return self.to_out(output) + + +class LocalFacialExtractor(nn.Module): + def __init__( + self, + id_dim: int = 1280, + vit_dim: int = 1024, + depth: int = 10, + dim_head: int = 64, + heads: int = 16, + num_id_token: int = 5, + num_queries: int = 32, + output_dim: int = 2048, + ff_mult: int = 4, + num_scale: int = 5, + ): + super().__init__() + + # Storing identity token and query information + self.num_id_token = num_id_token + self.vit_dim = vit_dim + self.num_queries = num_queries + assert depth % num_scale == 0 + self.depth = depth // num_scale + self.num_scale = num_scale + scale = vit_dim**-0.5 + + # Learnable latent query embeddings + self.latents = nn.Parameter(torch.randn(1, num_queries, vit_dim) * scale) + # Projection layer to map the latent output to the desired dimension + self.proj_out = nn.Parameter(scale * torch.randn(vit_dim, output_dim)) + + # Attention and ConsisIDFeedForward layer stack + self.layers = nn.ModuleList([]) + for _ in range(depth): + self.layers.append( + nn.ModuleList( + [ + PerceiverAttention(dim=vit_dim, dim_head=dim_head, heads=heads), # Perceiver Attention layer + nn.Sequential( + nn.LayerNorm(vit_dim), + nn.Linear(vit_dim, vit_dim * ff_mult, bias=False), + nn.GELU(), + nn.Linear(vit_dim * ff_mult, vit_dim, bias=False), + ), # ConsisIDFeedForward layer + ] + ) + ) + + # Mappings for each of the 5 different ViT features + for i in range(num_scale): + setattr( + self, + f"mapping_{i}", + nn.Sequential( + nn.Linear(vit_dim, vit_dim), + nn.LayerNorm(vit_dim), + nn.LeakyReLU(), + nn.Linear(vit_dim, vit_dim), + nn.LayerNorm(vit_dim), + nn.LeakyReLU(), + nn.Linear(vit_dim, vit_dim), + ), + ) + + # Mapping for identity embedding vectors + self.id_embedding_mapping = nn.Sequential( + nn.Linear(id_dim, vit_dim), + nn.LayerNorm(vit_dim), + nn.LeakyReLU(), + nn.Linear(vit_dim, vit_dim), + nn.LayerNorm(vit_dim), + nn.LeakyReLU(), + nn.Linear(vit_dim, vit_dim * num_id_token), + ) + + def forward(self, id_embeds: torch.Tensor, vit_hidden_states: list[torch.Tensor]) -> torch.Tensor: + # Repeat latent queries for the batch size + latents = self.latents.repeat(id_embeds.size(0), 1, 1) + + # Map the identity embedding to tokens + id_embeds = self.id_embedding_mapping(id_embeds) + id_embeds = id_embeds.reshape(-1, self.num_id_token, self.vit_dim) + + # Concatenate identity tokens with the latent queries + latents = torch.cat((latents, id_embeds), dim=1) + + # Process each of the num_scale visual feature inputs + for i in range(self.num_scale): + vit_feature = getattr(self, f"mapping_{i}")(vit_hidden_states[i]) + ctx_feature = torch.cat((id_embeds, vit_feature), dim=1) + + # Pass through the PerceiverAttention and ConsisIDFeedForward layers + for attn, ff in self.layers[i * self.depth : (i + 1) * self.depth]: + latents = attn(ctx_feature, latents) + latents + latents = ff(latents) + latents + + # Retain only the query latents + latents = latents[:, : self.num_queries] + # Project the latents to the output dimension + latents = latents @ self.proj_out + return latents + + +class PerceiverCrossAttention(nn.Module): + def __init__(self, dim: int = 3072, dim_head: int = 128, heads: int = 16, kv_dim: int = 2048): + super().__init__() + + self.scale = dim_head**-0.5 + self.dim_head = dim_head + self.heads = heads + inner_dim = dim_head * heads + + # Layer normalization to stabilize training + self.norm1 = nn.LayerNorm(dim if kv_dim is None else kv_dim) + self.norm2 = nn.LayerNorm(dim) + + # Linear transformations to produce queries, keys, and values + self.to_q = nn.Linear(dim, inner_dim, bias=False) + self.to_kv = nn.Linear(dim if kv_dim is None else kv_dim, inner_dim * 2, bias=False) + self.to_out = nn.Linear(inner_dim, dim, bias=False) + + def forward(self, image_embeds: torch.Tensor, hidden_states: torch.Tensor) -> torch.Tensor: + # Apply layer normalization to the input image and latent features + image_embeds = self.norm1(image_embeds) + hidden_states = self.norm2(hidden_states) + + batch_size, seq_len, _ = hidden_states.shape + + # Compute queries, keys, and values + query = self.to_q(hidden_states) + key, value = self.to_kv(image_embeds).chunk(2, dim=-1) + + # Reshape tensors to split into attention heads + query = query.reshape(query.size(0), -1, self.heads, self.dim_head).transpose(1, 2) + key = key.reshape(key.size(0), -1, self.heads, self.dim_head).transpose(1, 2) + value = value.reshape(value.size(0), -1, self.heads, self.dim_head).transpose(1, 2) + + # Compute attention weights + scale = 1 / math.sqrt(math.sqrt(self.dim_head)) + weight = (query * scale) @ (key * scale).transpose(-2, -1) # More stable scaling than post-division + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) + + # Compute the output via weighted combination of values + out = weight @ value + + # Reshape and permute to prepare for final linear transformation + out = out.permute(0, 2, 1, 3).reshape(batch_size, seq_len, -1) + + return self.to_out(out) + + +@maybe_allow_in_graph +class ConsisIDBlock(nn.Module): + r""" + Transformer block used in [ConsisID](https://github.com/PKU-YuanGroup/ConsisID) model. + + Parameters: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`): + The number of channels in each head. + time_embed_dim (`int`): + The number of channels in timestep embedding. + dropout (`float`, defaults to `0.0`): + The dropout probability to use. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to be used in feed-forward. + attention_bias (`bool`, defaults to `False`): + Whether or not to use bias in attention projection layers. + qk_norm (`bool`, defaults to `True`): + Whether or not to use normalization after query and key projections in Attention. + norm_elementwise_affine (`bool`, defaults to `True`): + Whether to use learnable elementwise affine parameters for normalization. + norm_eps (`float`, defaults to `1e-5`): + Epsilon value for normalization layers. + final_dropout (`bool` defaults to `False`): + Whether to apply a final dropout after the last feed-forward layer. + ff_inner_dim (`int`, *optional*, defaults to `None`): + Custom hidden dimension of Feed-forward layer. If not provided, `4 * dim` is used. + ff_bias (`bool`, defaults to `True`): + Whether or not to use bias in Feed-forward layer. + attention_out_bias (`bool`, defaults to `True`): + Whether or not to use bias in Attention output projection layer. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + time_embed_dim: int, + dropout: float = 0.0, + activation_fn: str = "gelu-approximate", + attention_bias: bool = False, + qk_norm: bool = True, + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-5, + final_dropout: bool = True, + ff_inner_dim: int | None = None, + ff_bias: bool = True, + attention_out_bias: bool = True, + ): + super().__init__() + + # 1. Self Attention + self.norm1 = CogVideoXLayerNormZero(time_embed_dim, dim, norm_elementwise_affine, norm_eps, bias=True) + + self.attn1 = Attention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + qk_norm="layer_norm" if qk_norm else None, + eps=1e-6, + bias=attention_bias, + out_bias=attention_out_bias, + processor=CogVideoXAttnProcessor2_0(), + ) + + # 2. Feed Forward + self.norm2 = CogVideoXLayerNormZero(time_embed_dim, dim, norm_elementwise_affine, norm_eps, bias=True) + + self.ff = FeedForward( + dim, + dropout=dropout, + activation_fn=activation_fn, + final_dropout=final_dropout, + inner_dim=ff_inner_dim, + bias=ff_bias, + ) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_length = encoder_hidden_states.size(1) + + # norm & modulate + norm_hidden_states, norm_encoder_hidden_states, gate_msa, enc_gate_msa = self.norm1( + hidden_states, encoder_hidden_states, temb + ) + + # attention + attn_hidden_states, attn_encoder_hidden_states = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + ) + + hidden_states = hidden_states + gate_msa * attn_hidden_states + encoder_hidden_states = encoder_hidden_states + enc_gate_msa * attn_encoder_hidden_states + + # norm & modulate + norm_hidden_states, norm_encoder_hidden_states, gate_ff, enc_gate_ff = self.norm2( + hidden_states, encoder_hidden_states, temb + ) + + # feed-forward + norm_hidden_states = torch.cat([norm_encoder_hidden_states, norm_hidden_states], dim=1) + ff_output = self.ff(norm_hidden_states) + + hidden_states = hidden_states + gate_ff * ff_output[:, text_seq_length:] + encoder_hidden_states = encoder_hidden_states + enc_gate_ff * ff_output[:, :text_seq_length] + + return hidden_states, encoder_hidden_states + + +class ConsisIDTransformer3DModel(ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin): + """ + A Transformer model for video-like data in [ConsisID](https://github.com/PKU-YuanGroup/ConsisID). + + Parameters: + num_attention_heads (`int`, defaults to `30`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `64`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `16`): + The number of channels in the output. + flip_sin_to_cos (`bool`, defaults to `True`): + Whether to flip the sin to cos in the time embedding. + time_embed_dim (`int`, defaults to `512`): + Output dimension of timestep embeddings. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + num_layers (`int`, defaults to `30`): + The number of layers of Transformer blocks to use. + dropout (`float`, defaults to `0.0`): + The dropout probability to use. + attention_bias (`bool`, defaults to `True`): + Whether to use bias in the attention projection layers. + sample_width (`int`, defaults to `90`): + The width of the input latents. + sample_height (`int`, defaults to `60`): + The height of the input latents. + sample_frames (`int`, defaults to `49`): + The number of frames in the input latents. Note that this parameter was incorrectly initialized to 49 + instead of 13 because ConsisID processed 13 latent frames at once in its default and recommended settings, + but cannot be changed to the correct value to ensure backwards compatibility. To create a transformer with + K latent frames, the correct value to pass here would be: ((K - 1) * temporal_compression_ratio + 1). + patch_size (`int`, defaults to `2`): + The size of the patches to use in the patch embedding layer. + temporal_compression_ratio (`int`, defaults to `4`): + The compression ratio across the temporal dimension. See documentation for `sample_frames`. + max_text_seq_length (`int`, defaults to `226`): + The maximum sequence length of the input text embeddings. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to use in feed-forward. + timestep_activation_fn (`str`, defaults to `"silu"`): + Activation function to use when generating the timestep embeddings. + norm_elementwise_affine (`bool`, defaults to `True`): + Whether to use elementwise affine in normalization layers. + norm_eps (`float`, defaults to `1e-5`): + The epsilon value to use in normalization layers. + spatial_interpolation_scale (`float`, defaults to `1.875`): + Scaling factor to apply in 3D positional embeddings across spatial dimensions. + temporal_interpolation_scale (`float`, defaults to `1.0`): + Scaling factor to apply in 3D positional embeddings across temporal dimensions. + is_train_face (`bool`, defaults to `False`): + Whether to use enable the identity-preserving module during the training process. When set to `True`, the + model will focus on identity-preserving tasks. + is_kps (`bool`, defaults to `False`): + Whether to enable keypoint for global facial extractor. If `True`, keypoints will be in the model. + cross_attn_interval (`int`, defaults to `2`): + The interval between cross-attention layers in the Transformer architecture. A larger value may reduce the + frequency of cross-attention computations, which can help reduce computational overhead. + cross_attn_dim_head (`int`, optional, defaults to `128`): + The dimensionality of each attention head in the cross-attention layers of the Transformer architecture. A + larger value increases the capacity to attend to more complex patterns, but also increases memory and + computation costs. + cross_attn_num_heads (`int`, optional, defaults to `16`): + The number of attention heads in the cross-attention layers. More heads allow for more parallel attention + mechanisms, capturing diverse relationships between different components of the input, but can also + increase computational requirements. + LFE_id_dim (`int`, optional, defaults to `1280`): + The dimensionality of the identity vector used in the Local Facial Extractor (LFE). This vector represents + the identity features of a face, which are important for tasks like face recognition and identity + preservation across different frames. + LFE_vit_dim (`int`, optional, defaults to `1024`): + The dimension of the vision transformer (ViT) output used in the Local Facial Extractor (LFE). This value + dictates the size of the transformer-generated feature vectors that will be processed for facial feature + extraction. + LFE_depth (`int`, optional, defaults to `10`): + The number of layers in the Local Facial Extractor (LFE). Increasing the depth allows the model to capture + more complex representations of facial features, but also increases the computational load. + LFE_dim_head (`int`, optional, defaults to `64`): + The dimensionality of each attention head in the Local Facial Extractor (LFE). This parameter affects how + finely the model can process and focus on different parts of the facial features during the extraction + process. + LFE_num_heads (`int`, optional, defaults to `16`): + The number of attention heads in the Local Facial Extractor (LFE). More heads can improve the model's + ability to capture diverse facial features, but at the cost of increased computational complexity. + LFE_num_id_token (`int`, optional, defaults to `5`): + The number of identity tokens used in the Local Facial Extractor (LFE). This defines how many + identity-related tokens the model will process to ensure face identity preservation during feature + extraction. + LFE_num_querie (`int`, optional, defaults to `32`): + The number of query tokens used in the Local Facial Extractor (LFE). These tokens are used to capture + high-frequency face-related information that aids in accurate facial feature extraction. + LFE_output_dim (`int`, optional, defaults to `2048`): + The output dimension of the Local Facial Extractor (LFE). This dimension determines the size of the feature + vectors produced by the LFE module, which will be used for subsequent tasks such as face recognition or + tracking. + LFE_ff_mult (`int`, optional, defaults to `4`): + The multiplication factor applied to the feed-forward network's hidden layer size in the Local Facial + Extractor (LFE). A higher value increases the model's capacity to learn more complex facial feature + transformations, but also increases the computation and memory requirements. + LFE_num_scale (`int`, optional, defaults to `5`): + The number of different scales visual feature. A higher value increases the model's capacity to learn more + complex facial feature transformations, but also increases the computation and memory requirements. + local_face_scale (`float`, defaults to `1.0`): + A scaling factor used to adjust the importance of local facial features in the model. This can influence + how strongly the model focuses on high frequency face-related content. + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + num_attention_heads: int = 30, + attention_head_dim: int = 64, + in_channels: int = 16, + out_channels: int | None = 16, + flip_sin_to_cos: bool = True, + freq_shift: int = 0, + time_embed_dim: int = 512, + text_embed_dim: int = 4096, + num_layers: int = 30, + dropout: float = 0.0, + attention_bias: bool = True, + sample_width: int = 90, + sample_height: int = 60, + sample_frames: int = 49, + patch_size: int = 2, + temporal_compression_ratio: int = 4, + max_text_seq_length: int = 226, + activation_fn: str = "gelu-approximate", + timestep_activation_fn: str = "silu", + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-5, + spatial_interpolation_scale: float = 1.875, + temporal_interpolation_scale: float = 1.0, + use_rotary_positional_embeddings: bool = False, + use_learned_positional_embeddings: bool = False, + is_train_face: bool = False, + is_kps: bool = False, + cross_attn_interval: int = 2, + cross_attn_dim_head: int = 128, + cross_attn_num_heads: int = 16, + LFE_id_dim: int = 1280, + LFE_vit_dim: int = 1024, + LFE_depth: int = 10, + LFE_dim_head: int = 64, + LFE_num_heads: int = 16, + LFE_num_id_token: int = 5, + LFE_num_querie: int = 32, + LFE_output_dim: int = 2048, + LFE_ff_mult: int = 4, + LFE_num_scale: int = 5, + local_face_scale: float = 1.0, + ): + super().__init__() + inner_dim = num_attention_heads * attention_head_dim + + if not use_rotary_positional_embeddings and use_learned_positional_embeddings: + raise ValueError( + "There are no ConsisID checkpoints available with disable rotary embeddings and learned positional " + "embeddings. If you're using a custom model and/or believe this should be supported, please open an " + "issue at https://github.com/huggingface/diffusers/issues." + ) + + # 1. Patch embedding + self.patch_embed = CogVideoXPatchEmbed( + patch_size=patch_size, + in_channels=in_channels, + embed_dim=inner_dim, + text_embed_dim=text_embed_dim, + bias=True, + sample_width=sample_width, + sample_height=sample_height, + sample_frames=sample_frames, + temporal_compression_ratio=temporal_compression_ratio, + max_text_seq_length=max_text_seq_length, + spatial_interpolation_scale=spatial_interpolation_scale, + temporal_interpolation_scale=temporal_interpolation_scale, + use_positional_embeddings=not use_rotary_positional_embeddings, + use_learned_positional_embeddings=use_learned_positional_embeddings, + ) + self.embedding_dropout = nn.Dropout(dropout) + + # 2. Time embeddings + self.time_proj = Timesteps(inner_dim, flip_sin_to_cos, freq_shift) + self.time_embedding = TimestepEmbedding(inner_dim, time_embed_dim, timestep_activation_fn) + + # 3. Define spatio-temporal transformers blocks + self.transformer_blocks = nn.ModuleList( + [ + ConsisIDBlock( + dim=inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + time_embed_dim=time_embed_dim, + dropout=dropout, + activation_fn=activation_fn, + attention_bias=attention_bias, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + ) + for _ in range(num_layers) + ] + ) + self.norm_final = nn.LayerNorm(inner_dim, norm_eps, norm_elementwise_affine) + + # 4. Output blocks + self.norm_out = AdaLayerNorm( + embedding_dim=time_embed_dim, + output_dim=2 * inner_dim, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + chunk_dim=1, + ) + self.proj_out = nn.Linear(inner_dim, patch_size * patch_size * out_channels) + + self.is_train_face = is_train_face + self.is_kps = is_kps + + # 5. Define identity-preserving config + if is_train_face: + # LFE configs + self.LFE_id_dim = LFE_id_dim + self.LFE_vit_dim = LFE_vit_dim + self.LFE_depth = LFE_depth + self.LFE_dim_head = LFE_dim_head + self.LFE_num_heads = LFE_num_heads + self.LFE_num_id_token = LFE_num_id_token + self.LFE_num_querie = LFE_num_querie + self.LFE_output_dim = LFE_output_dim + self.LFE_ff_mult = LFE_ff_mult + self.LFE_num_scale = LFE_num_scale + # cross configs + self.inner_dim = inner_dim + self.cross_attn_interval = cross_attn_interval + self.num_cross_attn = num_layers // cross_attn_interval + self.cross_attn_dim_head = cross_attn_dim_head + self.cross_attn_num_heads = cross_attn_num_heads + self.cross_attn_kv_dim = int(self.inner_dim / 3 * 2) + self.local_face_scale = local_face_scale + # face modules + self._init_face_inputs() + + self.gradient_checkpointing = False + + def _init_face_inputs(self): + self.local_facial_extractor = LocalFacialExtractor( + id_dim=self.LFE_id_dim, + vit_dim=self.LFE_vit_dim, + depth=self.LFE_depth, + dim_head=self.LFE_dim_head, + heads=self.LFE_num_heads, + num_id_token=self.LFE_num_id_token, + num_queries=self.LFE_num_querie, + output_dim=self.LFE_output_dim, + ff_mult=self.LFE_ff_mult, + num_scale=self.LFE_num_scale, + ) + self.perceiver_cross_attention = nn.ModuleList( + [ + PerceiverCrossAttention( + dim=self.inner_dim, + dim_head=self.cross_attn_dim_head, + heads=self.cross_attn_num_heads, + kv_dim=self.cross_attn_kv_dim, + ) + for _ in range(self.num_cross_attn) + ] + ) + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: int | float | torch.LongTensor, + timestep_cond: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + attention_kwargs: dict[str, Any] | None = None, + id_cond: torch.Tensor | None = None, + id_vit_hidden: torch.Tensor | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + # fuse clip and insightface + valid_face_emb = None + if self.is_train_face: + id_cond = id_cond.to(device=hidden_states.device, dtype=hidden_states.dtype) + id_vit_hidden = [ + tensor.to(device=hidden_states.device, dtype=hidden_states.dtype) for tensor in id_vit_hidden + ] + valid_face_emb = self.local_facial_extractor( + id_cond, id_vit_hidden + ) # torch.Size([1, 1280]), list[5](torch.Size([1, 577, 1024])) -> torch.Size([1, 32, 2048]) + + batch_size, num_frames, channels, height, width = hidden_states.shape + + # 1. Time embedding + timesteps = timestep + t_emb = self.time_proj(timesteps) + + # timesteps does not contain any weights and will always return f32 tensors + # but time_embedding might actually be running in fp16. so we need to cast here. + # there might be better ways to encapsulate this. + t_emb = t_emb.to(dtype=hidden_states.dtype) + emb = self.time_embedding(t_emb, timestep_cond) + + # 2. Patch embedding + # torch.Size([1, 226, 4096]) torch.Size([1, 13, 32, 60, 90]) + hidden_states = self.patch_embed(encoder_hidden_states, hidden_states) # torch.Size([1, 17776, 3072]) + hidden_states = self.embedding_dropout(hidden_states) # torch.Size([1, 17776, 3072]) + + text_seq_length = encoder_hidden_states.shape[1] + encoder_hidden_states = hidden_states[:, :text_seq_length] # torch.Size([1, 226, 3072]) + hidden_states = hidden_states[:, text_seq_length:] # torch.Size([1, 17550, 3072]) + + # 3. Transformer blocks + ca_idx = 0 + for i, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + emb, + image_rotary_emb, + ) + else: + hidden_states, encoder_hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=emb, + image_rotary_emb=image_rotary_emb, + ) + + if self.is_train_face: + if i % self.cross_attn_interval == 0 and valid_face_emb is not None: + hidden_states = hidden_states + self.local_face_scale * self.perceiver_cross_attention[ca_idx]( + valid_face_emb, hidden_states + ) # torch.Size([2, 32, 2048]) torch.Size([2, 17550, 3072]) + ca_idx += 1 + + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + hidden_states = self.norm_final(hidden_states) + hidden_states = hidden_states[:, text_seq_length:] + + # 4. Final block + hidden_states = self.norm_out(hidden_states, temb=emb) + hidden_states = self.proj_out(hidden_states) + + # 5. Unpatchify + # Note: we use `-1` instead of `channels`: + # - It is okay to `channels` use for ConsisID (number of input channels is equal to output channels) + p = self.config.patch_size + output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p) + output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/dit_transformer_2d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/dit_transformer_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..3d10c278cdbb5c62901a34db9047a04fd65aa752 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/dit_transformer_2d.py @@ -0,0 +1,226 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any + +import torch +import torch.nn.functional as F +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ..attention import BasicTransformerBlock +from ..embeddings import PatchEmbed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class DiTTransformer2DModel(ModelMixin, ConfigMixin): + r""" + A 2D Transformer model as introduced in DiT (https://huggingface.co/papers/2212.09748). + + Parameters: + num_attention_heads (int, optional, defaults to 16): The number of heads to use for multi-head attention. + attention_head_dim (int, optional, defaults to 72): The number of channels in each head. + in_channels (int, defaults to 4): The number of channels in the input. + out_channels (int, optional): + The number of channels in the output. Specify this parameter if the output channel number differs from the + input. + num_layers (int, optional, defaults to 28): The number of layers of Transformer blocks to use. + dropout (float, optional, defaults to 0.0): The dropout probability to use within the Transformer blocks. + norm_num_groups (int, optional, defaults to 32): + Number of groups for group normalization within Transformer blocks. + attention_bias (bool, optional, defaults to True): + Configure if the Transformer blocks' attention should contain a bias parameter. + sample_size (int, defaults to 32): + The width of the latent images. This parameter is fixed during training. + patch_size (int, defaults to 2): + Size of the patches the model processes, relevant for architectures working on non-sequential data. + activation_fn (str, optional, defaults to "gelu-approximate"): + Activation function to use in feed-forward networks within Transformer blocks. + num_embeds_ada_norm (int, optional, defaults to 1000): + Number of embeddings for AdaLayerNorm, fixed during training and affects the maximum denoising steps during + inference. + upcast_attention (bool, optional, defaults to False): + If true, upcasts the attention mechanism dimensions for potentially improved performance. + norm_type (str, optional, defaults to "ada_norm_zero"): + Specifies the type of normalization used, can be 'ada_norm_zero'. + norm_elementwise_affine (bool, optional, defaults to False): + If true, enables element-wise affine parameters in the normalization layers. + norm_eps (float, optional, defaults to 1e-5): + A small constant added to the denominator in normalization layers to prevent division by zero. + """ + + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + _supports_gradient_checkpointing = True + _supports_group_offloading = False + + @register_to_config + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 72, + in_channels: int = 4, + out_channels: int | None = None, + num_layers: int = 28, + dropout: float = 0.0, + norm_num_groups: int = 32, + attention_bias: bool = True, + sample_size: int = 32, + patch_size: int = 2, + activation_fn: str = "gelu-approximate", + num_embeds_ada_norm: int | None = 1000, + upcast_attention: bool = False, + norm_type: str = "ada_norm_zero", + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-5, + ): + super().__init__() + + # Validate inputs. + if norm_type != "ada_norm_zero": + raise NotImplementedError( + f"Forward pass is not implemented when `patch_size` is not None and `norm_type` is '{norm_type}'." + ) + elif norm_type == "ada_norm_zero" and num_embeds_ada_norm is None: + raise ValueError( + f"When using a `patch_size` and this `norm_type` ({norm_type}), `num_embeds_ada_norm` cannot be None." + ) + + # Set some common variables used across the board. + self.attention_head_dim = attention_head_dim + self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim + self.out_channels = in_channels if out_channels is None else out_channels + self.gradient_checkpointing = False + + # 2. Initialize the position embedding and transformer blocks. + self.height = self.config.sample_size + self.width = self.config.sample_size + + self.patch_size = self.config.patch_size + self.pos_embed = PatchEmbed( + height=self.config.sample_size, + width=self.config.sample_size, + patch_size=self.config.patch_size, + in_channels=self.config.in_channels, + embed_dim=self.inner_dim, + ) + + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + self.inner_dim, + self.config.num_attention_heads, + self.config.attention_head_dim, + dropout=self.config.dropout, + activation_fn=self.config.activation_fn, + num_embeds_ada_norm=self.config.num_embeds_ada_norm, + attention_bias=self.config.attention_bias, + upcast_attention=self.config.upcast_attention, + norm_type=norm_type, + norm_elementwise_affine=self.config.norm_elementwise_affine, + norm_eps=self.config.norm_eps, + ) + for _ in range(self.config.num_layers) + ] + ) + + # 3. Output blocks. + self.norm_out = nn.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out_1 = nn.Linear(self.inner_dim, 2 * self.inner_dim) + self.proj_out_2 = nn.Linear( + self.inner_dim, self.config.patch_size * self.config.patch_size * self.out_channels + ) + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor | None = None, + class_labels: torch.LongTensor | None = None, + cross_attention_kwargs: dict[str, Any] = None, + return_dict: bool = True, + ): + """ + The [`DiTTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.LongTensor` of shape `(batch size, num latent pixels)` if discrete, `torch.FloatTensor` of shape `(batch size, channel, height, width)` if continuous): + Input `hidden_states`. + timestep ( `torch.LongTensor`, *optional*): + Used to indicate denoising step. Optional timestep to be applied as an embedding in `AdaLayerNorm`. + class_labels ( `torch.LongTensor` of shape `(batch size, num classes)`, *optional*): + Used to indicate class labels conditioning. Optional class labels to be applied as an embedding in + `AdaLayerZeroNorm`. + cross_attention_kwargs ( `dict[str, Any]`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.unets.unet_2d_condition.UNet2DConditionOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + # 1. Input + height, width = hidden_states.shape[-2] // self.patch_size, hidden_states.shape[-1] // self.patch_size + hidden_states = self.pos_embed(hidden_states) + + # 2. Blocks + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + None, + None, + None, + timestep, + cross_attention_kwargs, + class_labels, + ) + else: + hidden_states = block( + hidden_states, + attention_mask=None, + encoder_hidden_states=None, + encoder_attention_mask=None, + timestep=timestep, + cross_attention_kwargs=cross_attention_kwargs, + class_labels=class_labels, + ) + + # 3. Output + conditioning = self.transformer_blocks[0].norm1.emb(timestep, class_labels, hidden_dtype=hidden_states.dtype) + shift, scale = self.proj_out_1(F.silu(conditioning)).chunk(2, dim=1) + hidden_states = self.norm_out(hidden_states) * (1 + scale[:, None]) + shift[:, None] + hidden_states = self.proj_out_2(hidden_states) + + # unpatchify + height = width = int(hidden_states.shape[1] ** 0.5) + hidden_states = hidden_states.reshape( + shape=(-1, height, width, self.patch_size, self.patch_size, self.out_channels) + ) + hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(-1, self.out_channels, height * self.patch_size, width * self.patch_size) + ) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/dual_transformer_2d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/dual_transformer_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..c25c6e9c42273dacade3ab6dabddff0dc94c2877 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/dual_transformer_2d.py @@ -0,0 +1,154 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from torch import nn + +from ..modeling_outputs import Transformer2DModelOutput +from .transformer_2d import Transformer2DModel + + +class DualTransformer2DModel(nn.Module): + """ + Dual transformer wrapper that combines two `Transformer2DModel`s for mixed inference. + + Parameters: + num_attention_heads (`int`, *optional*, defaults to 16): The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 88): The number of channels in each head. + in_channels (`int`, *optional*): + Pass if the input is continuous. The number of channels in the input and output. + num_layers (`int`, *optional*, defaults to 1): The number of layers of Transformer blocks to use. + dropout (`float`, *optional*, defaults to 0.1): The dropout probability to use. + cross_attention_dim (`int`, *optional*): The number of encoder_hidden_states dimensions to use. + sample_size (`int`, *optional*): Pass if the input is discrete. The width of the latent images. + Note that this is fixed at training time as it is used for learning a number of position embeddings. See + `ImagePositionalEmbeddings`. + num_vector_embeds (`int`, *optional*): + Pass if the input is discrete. The number of classes of the vector embeddings of the latent pixels. + Includes the class for the masked latent pixel. + activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward. + num_embeds_ada_norm ( `int`, *optional*): Pass if at least one of the norm_layers is `AdaLayerNorm`. + The number of diffusion steps used during training. Note that this is fixed at training time as it is used + to learn a number of embeddings that are added to the hidden states. During inference, you can denoise for + up to but not more than steps than `num_embeds_ada_norm`. + attention_bias (`bool`, *optional*): + Configure if the TransformerBlocks' attention should contain a bias parameter. + """ + + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: int | None = None, + num_layers: int = 1, + dropout: float = 0.0, + norm_num_groups: int = 32, + cross_attention_dim: int | None = None, + attention_bias: bool = False, + sample_size: int | None = None, + num_vector_embeds: int | None = None, + activation_fn: str = "geglu", + num_embeds_ada_norm: int | None = None, + ): + super().__init__() + self.transformers = nn.ModuleList( + [ + Transformer2DModel( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + in_channels=in_channels, + num_layers=num_layers, + dropout=dropout, + norm_num_groups=norm_num_groups, + cross_attention_dim=cross_attention_dim, + attention_bias=attention_bias, + sample_size=sample_size, + num_vector_embeds=num_vector_embeds, + activation_fn=activation_fn, + num_embeds_ada_norm=num_embeds_ada_norm, + ) + for _ in range(2) + ] + ) + + # Variables that can be set by a pipeline: + + # The ratio of transformer1 to transformer2's output states to be combined during inference + self.mix_ratio = 0.5 + + # The shape of `encoder_hidden_states` is expected to be + # `(batch_size, condition_lengths[0]+condition_lengths[1], num_features)` + self.condition_lengths = [77, 257] + + # Which transformer to use to encode which condition. + # E.g. `(1, 0)` means that we'll use `transformers[1](conditions[0])` and `transformers[0](conditions[1])` + self.transformer_index_for_condition = [1, 0] + + def forward( + self, + hidden_states, + encoder_hidden_states, + timestep=None, + attention_mask=None, + cross_attention_kwargs=None, + return_dict: bool = True, + ): + """ + Args: + hidden_states ( When discrete, `torch.LongTensor` of shape `(batch size, num latent pixels)`. + When continuous, `torch.Tensor` of shape `(batch size, channel, height, width)`): Input hidden_states. + encoder_hidden_states ( `torch.LongTensor` of shape `(batch size, encoder_hidden_states dim)`, *optional*): + Conditional embeddings for cross attention layer. If not given, cross-attention defaults to + self-attention. + timestep ( `torch.long`, *optional*): + Optional timestep to be applied as an embedding in AdaLayerNorm's. Used to indicate denoising step. + attention_mask (`torch.Tensor`, *optional*): + Optional attention mask to be applied in Attention. + cross_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`models.unets.unet_2d_condition.UNet2DConditionOutput`] instead of a plain + tuple. + + Returns: + [`~models.transformers.transformer_2d.Transformer2DModelOutput`] or `tuple`: + [`~models.transformers.transformer_2d.Transformer2DModelOutput`] if `return_dict` is True, otherwise a + `tuple`. When returning a tuple, the first element is the sample tensor. + """ + input_states = hidden_states + + encoded_states = [] + tokens_start = 0 + # attention_mask is not used yet + for i in range(2): + # for each of the two transformers, pass the corresponding condition tokens + condition_state = encoder_hidden_states[:, tokens_start : tokens_start + self.condition_lengths[i]] + transformer_index = self.transformer_index_for_condition[i] + encoded_state = self.transformers[transformer_index]( + input_states, + encoder_hidden_states=condition_state, + timestep=timestep, + cross_attention_kwargs=cross_attention_kwargs, + return_dict=False, + )[0] + encoded_states.append(encoded_state - input_states) + tokens_start += self.condition_lengths[i] + + output_states = encoded_states[0] * self.mix_ratio + encoded_states[1] * (1 - self.mix_ratio) + output_states = output_states + input_states + + if not return_dict: + return (output_states,) + + return Transformer2DModelOutput(sample=output_states) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/hunyuan_transformer_2d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/hunyuan_transformer_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..a25aa99fb8b9743d15dccc85ae1152bb356d5e8b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/hunyuan_transformer_2d.py @@ -0,0 +1,509 @@ +# Copyright 2025 HunyuanDiT Authors, Qixun Wang and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionMixin, FeedForward +from ..attention_processor import Attention, FusedHunyuanAttnProcessor2_0, HunyuanAttnProcessor2_0 +from ..embeddings import ( + HunyuanCombinedTimestepTextSizeStyleEmbedding, + PatchEmbed, + PixArtAlphaTextProjection, +) +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, FP32LayerNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class AdaLayerNormShift(nn.Module): + r""" + Norm layer modified to incorporate timestep embeddings. + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`): The size of the embeddings dictionary. + """ + + def __init__(self, embedding_dim: int, elementwise_affine=True, eps=1e-6): + super().__init__() + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, embedding_dim) + self.norm = FP32LayerNorm(embedding_dim, elementwise_affine=elementwise_affine, eps=eps) + + def forward(self, x: torch.Tensor, emb: torch.Tensor) -> torch.Tensor: + shift = self.linear(self.silu(emb.to(torch.float32)).to(emb.dtype)) + x = self.norm(x) + shift.unsqueeze(dim=1) + return x + + +@maybe_allow_in_graph +class HunyuanDiTBlock(nn.Module): + r""" + Transformer block used in Hunyuan-DiT model (https://github.com/Tencent/HunyuanDiT). Allow skip connection and + QKNorm + + Parameters: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of headsto use for multi-head attention. + cross_attention_dim (`int`,*optional*): + The size of the encoder_hidden_states vector for cross attention. + dropout(`float`, *optional*, defaults to 0.0): + The dropout probability to use. + activation_fn (`str`,*optional*, defaults to `"geglu"`): + Activation function to be used in feed-forward. . + norm_elementwise_affine (`bool`, *optional*, defaults to `True`): + Whether to use learnable elementwise affine parameters for normalization. + norm_eps (`float`, *optional*, defaults to 1e-6): + A small constant added to the denominator in normalization layers to prevent division by zero. + final_dropout (`bool` *optional*, defaults to False): + Whether to apply a final dropout after the last feed-forward layer. + ff_inner_dim (`int`, *optional*): + The size of the hidden layer in the feed-forward block. Defaults to `None`. + ff_bias (`bool`, *optional*, defaults to `True`): + Whether to use bias in the feed-forward block. + skip (`bool`, *optional*, defaults to `False`): + Whether to use skip connection. Defaults to `False` for down-blocks and mid-blocks. + qk_norm (`bool`, *optional*, defaults to `True`): + Whether to use normalization in QK calculation. Defaults to `True`. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + cross_attention_dim: int = 1024, + dropout=0.0, + activation_fn: str = "geglu", + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-6, + final_dropout: bool = False, + ff_inner_dim: int | None = None, + ff_bias: bool = True, + skip: bool = False, + qk_norm: bool = True, + ): + super().__init__() + + # Define 3 blocks. Each block has its own normalization layer. + # NOTE: when new version comes, check norm2 and norm 3 + # 1. Self-Attn + self.norm1 = AdaLayerNormShift(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + + self.attn1 = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=dim // num_attention_heads, + heads=num_attention_heads, + qk_norm="layer_norm" if qk_norm else None, + eps=1e-6, + bias=True, + processor=HunyuanAttnProcessor2_0(), + ) + + # 2. Cross-Attn + self.norm2 = FP32LayerNorm(dim, norm_eps, norm_elementwise_affine) + + self.attn2 = Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + dim_head=dim // num_attention_heads, + heads=num_attention_heads, + qk_norm="layer_norm" if qk_norm else None, + eps=1e-6, + bias=True, + processor=HunyuanAttnProcessor2_0(), + ) + # 3. Feed-forward + self.norm3 = FP32LayerNorm(dim, norm_eps, norm_elementwise_affine) + + self.ff = FeedForward( + dim, + dropout=dropout, ### 0.0 + activation_fn=activation_fn, ### approx GeLU + final_dropout=final_dropout, ### 0.0 + inner_dim=ff_inner_dim, ### int(dim * mlp_ratio) + bias=ff_bias, + ) + + # 4. Skip Connection + if skip: + self.skip_norm = FP32LayerNorm(2 * dim, norm_eps, elementwise_affine=True) + self.skip_linear = nn.Linear(2 * dim, dim) + else: + self.skip_linear = None + + # let chunk size default to None + self._chunk_size = None + self._chunk_dim = 0 + + # Copied from diffusers.models.attention.BasicTransformerBlock.set_chunk_feed_forward + def set_chunk_feed_forward(self, chunk_size: int | None, dim: int = 0): + # Sets chunk feed-forward + self._chunk_size = chunk_size + self._chunk_dim = dim + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb=None, + skip=None, + ) -> torch.Tensor: + # Notice that normalization is always applied before the real computation in the following blocks. + # 0. Long Skip Connection + if self.skip_linear is not None: + cat = torch.cat([hidden_states, skip], dim=-1) + cat = self.skip_norm(cat) + hidden_states = self.skip_linear(cat) + + # 1. Self-Attention + norm_hidden_states = self.norm1(hidden_states, temb) ### checked: self.norm1 is correct + attn_output = self.attn1( + norm_hidden_states, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = hidden_states + attn_output + + # 2. Cross-Attention + hidden_states = hidden_states + self.attn2( + self.norm2(hidden_states), + encoder_hidden_states=encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + ) + + # FFN Layer ### TODO: switch norm2 and norm3 in the state dict + mlp_inputs = self.norm3(hidden_states) + hidden_states = hidden_states + self.ff(mlp_inputs) + + return hidden_states + + +class HunyuanDiT2DModel(ModelMixin, AttentionMixin, ConfigMixin): + """ + HunYuanDiT: Diffusion model with a Transformer backbone. + + Inherit ModelMixin and ConfigMixin to be compatible with the sampler StableDiffusionPipeline of diffusers. + + Parameters: + num_attention_heads (`int`, *optional*, defaults to 16): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 88): + The number of channels in each head. + in_channels (`int`, *optional*): + The number of channels in the input and output (specify if the input is **continuous**). + patch_size (`int`, *optional*): + The size of the patch to use for the input. + activation_fn (`str`, *optional*, defaults to `"geglu"`): + Activation function to use in feed-forward. + sample_size (`int`, *optional*): + The width of the latent images. This is fixed during training since it is used to learn a number of + position embeddings. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability to use. + cross_attention_dim (`int`, *optional*): + The number of dimension in the clip text embedding. + hidden_size (`int`, *optional*): + The size of hidden layer in the conditioning embedding layers. + num_layers (`int`, *optional*, defaults to 1): + The number of layers of Transformer blocks to use. + mlp_ratio (`float`, *optional*, defaults to 4.0): + The ratio of the hidden layer size to the input size. + learn_sigma (`bool`, *optional*, defaults to `True`): + Whether to predict variance. + cross_attention_dim_t5 (`int`, *optional*): + The number dimensions in t5 text embedding. + pooled_projection_dim (`int`, *optional*): + The size of the pooled projection. + text_len (`int`, *optional*): + The length of the clip text embedding. + text_len_t5 (`int`, *optional*): + The length of the T5 text embedding. + use_style_cond_and_image_meta_size (`bool`, *optional*): + Whether or not to use style condition and image meta size. True for version <=1.1, False for version >= 1.2 + """ + + _skip_layerwise_casting_patterns = ["pos_embed", "norm", "pooler"] + _supports_group_offloading = False + + @register_to_config + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: int | None = None, + patch_size: int | None = None, + activation_fn: str = "gelu-approximate", + sample_size=32, + hidden_size=1152, + num_layers: int = 28, + mlp_ratio: float = 4.0, + learn_sigma: bool = True, + cross_attention_dim: int = 1024, + norm_type: str = "layer_norm", + cross_attention_dim_t5: int = 2048, + pooled_projection_dim: int = 1024, + text_len: int = 77, + text_len_t5: int = 256, + use_style_cond_and_image_meta_size: bool = True, + ): + super().__init__() + self.out_channels = in_channels * 2 if learn_sigma else in_channels + self.num_heads = num_attention_heads + self.inner_dim = num_attention_heads * attention_head_dim + + self.text_embedder = PixArtAlphaTextProjection( + in_features=cross_attention_dim_t5, + hidden_size=cross_attention_dim_t5 * 4, + out_features=cross_attention_dim, + act_fn="silu_fp32", + ) + + self.text_embedding_padding = nn.Parameter(torch.randn(text_len + text_len_t5, cross_attention_dim)) + + self.pos_embed = PatchEmbed( + height=sample_size, + width=sample_size, + in_channels=in_channels, + embed_dim=hidden_size, + patch_size=patch_size, + pos_embed_type=None, + ) + + self.time_extra_emb = HunyuanCombinedTimestepTextSizeStyleEmbedding( + hidden_size, + pooled_projection_dim=pooled_projection_dim, + seq_len=text_len_t5, + cross_attention_dim=cross_attention_dim_t5, + use_style_cond_and_image_meta_size=use_style_cond_and_image_meta_size, + ) + + # HunyuanDiT Blocks + self.blocks = nn.ModuleList( + [ + HunyuanDiTBlock( + dim=self.inner_dim, + num_attention_heads=self.config.num_attention_heads, + activation_fn=activation_fn, + ff_inner_dim=int(self.inner_dim * mlp_ratio), + cross_attention_dim=cross_attention_dim, + qk_norm=True, # See https://huggingface.co/papers/2302.05442 for details. + skip=layer > num_layers // 2, + ) + for layer in range(num_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections with FusedAttnProcessor2_0->FusedHunyuanAttnProcessor2_0 + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is 🧪 experimental. + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + self.set_attn_processor(FusedHunyuanAttnProcessor2_0()) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + > [!WARNING] > This API is 🧪 experimental. + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + + def set_default_attn_processor(self): + """ + Disables custom attention processors and sets the default attention implementation. + """ + self.set_attn_processor(HunyuanAttnProcessor2_0()) + + def forward( + self, + hidden_states, + timestep, + encoder_hidden_states=None, + text_embedding_mask=None, + encoder_hidden_states_t5=None, + text_embedding_mask_t5=None, + image_meta_size=None, + style=None, + image_rotary_emb=None, + controlnet_block_samples=None, + return_dict=True, + ): + """ + The [`HunyuanDiT2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch size, dim, height, width)`): + The input tensor. + timestep ( `torch.LongTensor`, *optional*): + Used to indicate denoising step. + encoder_hidden_states ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. This is the output of `BertModel`. + text_embedding_mask: torch.Tensor + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output + of `BertModel`. + encoder_hidden_states_t5 ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. This is the output of T5 Text Encoder. + text_embedding_mask_t5: torch.Tensor + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. This is the output + of T5 Text Encoder. + image_meta_size (torch.Tensor): + Conditional embedding indicate the image sizes + style: torch.Tensor: + Conditional embedding indicate the style + image_rotary_emb (`torch.Tensor`): + The image rotary embeddings to apply on query and key tensors during attention calculation. + return_dict: bool + Whether to return a dictionary. + """ + + height, width = hidden_states.shape[-2:] + + hidden_states = self.pos_embed(hidden_states) + + temb = self.time_extra_emb( + timestep, encoder_hidden_states_t5, image_meta_size, style, hidden_dtype=timestep.dtype + ) # [B, D] + + # text projection + batch_size, sequence_length, _ = encoder_hidden_states_t5.shape + encoder_hidden_states_t5 = self.text_embedder( + encoder_hidden_states_t5.view(-1, encoder_hidden_states_t5.shape[-1]) + ) + encoder_hidden_states_t5 = encoder_hidden_states_t5.view(batch_size, sequence_length, -1) + + encoder_hidden_states = torch.cat([encoder_hidden_states, encoder_hidden_states_t5], dim=1) + text_embedding_mask = torch.cat([text_embedding_mask, text_embedding_mask_t5], dim=-1) + text_embedding_mask = text_embedding_mask.unsqueeze(2).bool() + + encoder_hidden_states = torch.where(text_embedding_mask, encoder_hidden_states, self.text_embedding_padding) + + skips = [] + for layer, block in enumerate(self.blocks): + if layer > self.config.num_layers // 2: + if controlnet_block_samples is not None: + skip = skips.pop() + controlnet_block_samples.pop() + else: + skip = skips.pop() + hidden_states = block( + hidden_states, + temb=temb, + encoder_hidden_states=encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + skip=skip, + ) # (N, L, D) + else: + hidden_states = block( + hidden_states, + temb=temb, + encoder_hidden_states=encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + ) # (N, L, D) + + if layer < (self.config.num_layers // 2 - 1): + skips.append(hidden_states) + + if controlnet_block_samples is not None and len(controlnet_block_samples) != 0: + raise ValueError("The number of controls is not equal to the number of skip connections.") + + # final layer + hidden_states = self.norm_out(hidden_states, temb.to(torch.float32)) + hidden_states = self.proj_out(hidden_states) + # (N, L, patch_size ** 2 * out_channels) + + # unpatchify: (N, out_channels, H, W) + patch_size = self.pos_embed.patch_size + height = height // patch_size + width = width // patch_size + + hidden_states = hidden_states.reshape( + shape=(hidden_states.shape[0], height, width, patch_size, patch_size, self.out_channels) + ) + hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(hidden_states.shape[0], self.out_channels, height * patch_size, width * patch_size) + ) + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) + + # Copied from diffusers.models.unets.unet_3d_condition.UNet3DConditionModel.enable_forward_chunking + def enable_forward_chunking(self, chunk_size: int | None = None, dim: int = 0) -> None: + """ + Sets the attention processor to use [feed forward + chunking](https://huggingface.co/blog/reformer#2-chunked-feed-forward-layers). + + Parameters: + chunk_size (`int`, *optional*): + The chunk size of the feed-forward layers. If not specified, will run feed-forward layer individually + over each tensor of dim=`dim`. + dim (`int`, *optional*, defaults to `0`): + The dimension over which the feed-forward computation should be chunked. Choose between dim=0 (batch) + or dim=1 (sequence length). + """ + if dim not in [0, 1]: + raise ValueError(f"Make sure to set `dim` to either 0 or 1, not {dim}") + + # By default chunk size is 1 + chunk_size = chunk_size or 1 + + def fn_recursive_feed_forward(module: torch.nn.Module, chunk_size: int, dim: int): + if hasattr(module, "set_chunk_feed_forward"): + module.set_chunk_feed_forward(chunk_size=chunk_size, dim=dim) + + for child in module.children(): + fn_recursive_feed_forward(child, chunk_size, dim) + + for module in self.children(): + fn_recursive_feed_forward(module, chunk_size, dim) + + # Copied from diffusers.models.unets.unet_3d_condition.UNet3DConditionModel.disable_forward_chunking + def disable_forward_chunking(self): + def fn_recursive_feed_forward(module: torch.nn.Module, chunk_size: int, dim: int): + if hasattr(module, "set_chunk_feed_forward"): + module.set_chunk_feed_forward(chunk_size=chunk_size, dim=dim) + + for child in module.children(): + fn_recursive_feed_forward(child, chunk_size, dim) + + for module in self.children(): + fn_recursive_feed_forward(module, None, 0) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/latte_transformer_3d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/latte_transformer_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..32e97aff8fb7dbfa0ed9031fc06b153c6e965b8a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/latte_transformer_3d.py @@ -0,0 +1,329 @@ +# Copyright 2025 the Latte Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ..attention import BasicTransformerBlock +from ..cache_utils import CacheMixin +from ..embeddings import PatchEmbed, PixArtAlphaTextProjection, get_1d_sincos_pos_embed_from_grid +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormSingle + + +class LatteTransformer3DModel(ModelMixin, ConfigMixin, CacheMixin): + _supports_gradient_checkpointing = True + + """ + A 3D Transformer model for video-like data, paper: https://huggingface.co/papers/2401.03048, official code: + https://github.com/Vchitect/Latte + + Parameters: + num_attention_heads (`int`, *optional*, defaults to 16): The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 88): The number of channels in each head. + in_channels (`int`, *optional*): + The number of channels in the input. + out_channels (`int`, *optional*): + The number of channels in the output. + num_layers (`int`, *optional*, defaults to 1): The number of layers of Transformer blocks to use. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + cross_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use. + attention_bias (`bool`, *optional*): + Configure if the `TransformerBlocks` attention should contain a bias parameter. + sample_size (`int`, *optional*): The width of the latent images (specify if the input is **discrete**). + This is fixed during training since it is used to learn a number of position embeddings. + patch_size (`int`, *optional*): + The size of the patches to use in the patch embedding layer. + activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to use in feed-forward. + num_embeds_ada_norm ( `int`, *optional*): + The number of diffusion steps used during training. Pass if at least one of the norm_layers is + `AdaLayerNorm`. This is fixed during training since it is used to learn a number of embeddings that are + added to the hidden states. During inference, you can denoise for up to but not more steps than + `num_embeds_ada_norm`. + norm_type (`str`, *optional*, defaults to `"layer_norm"`): + The type of normalization to use. Options are `"layer_norm"` or `"ada_layer_norm"`. + norm_elementwise_affine (`bool`, *optional*, defaults to `True`): + Whether or not to use elementwise affine in normalization layers. + norm_eps (`float`, *optional*, defaults to 1e-5): The epsilon value to use in normalization layers. + caption_channels (`int`, *optional*): + The number of channels in the caption embeddings. + video_length (`int`, *optional*): + The number of frames in the video-like data. + """ + + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + + @register_to_config + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: int | None = None, + out_channels: int | None = None, + num_layers: int = 1, + dropout: float = 0.0, + cross_attention_dim: int | None = None, + attention_bias: bool = False, + sample_size: int = 64, + patch_size: int | None = None, + activation_fn: str = "geglu", + num_embeds_ada_norm: int | None = None, + norm_type: str = "layer_norm", + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-5, + caption_channels: int = None, + video_length: int = 16, + ): + super().__init__() + inner_dim = num_attention_heads * attention_head_dim + + # 1. Define input layers + self.height = sample_size + self.width = sample_size + + interpolation_scale = self.config.sample_size // 64 + interpolation_scale = max(interpolation_scale, 1) + self.pos_embed = PatchEmbed( + height=sample_size, + width=sample_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=inner_dim, + interpolation_scale=interpolation_scale, + ) + + # 2. Define spatial transformers blocks + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + cross_attention_dim=cross_attention_dim, + activation_fn=activation_fn, + num_embeds_ada_norm=num_embeds_ada_norm, + attention_bias=attention_bias, + norm_type=norm_type, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + ) + for d in range(num_layers) + ] + ) + + # 3. Define temporal transformers blocks + self.temporal_transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + cross_attention_dim=None, + activation_fn=activation_fn, + num_embeds_ada_norm=num_embeds_ada_norm, + attention_bias=attention_bias, + norm_type=norm_type, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + ) + for d in range(num_layers) + ] + ) + + # 4. Define output layers + self.out_channels = in_channels if out_channels is None else out_channels + self.norm_out = nn.LayerNorm(inner_dim, elementwise_affine=False, eps=1e-6) + self.scale_shift_table = nn.Parameter(torch.randn(2, inner_dim) / inner_dim**0.5) + self.proj_out = nn.Linear(inner_dim, patch_size * patch_size * self.out_channels) + + # 5. Latte other blocks. + self.adaln_single = AdaLayerNormSingle(inner_dim, use_additional_conditions=False) + self.caption_projection = PixArtAlphaTextProjection(in_features=caption_channels, hidden_size=inner_dim) + + # define temporal positional embedding + temp_pos_embed = get_1d_sincos_pos_embed_from_grid( + inner_dim, torch.arange(0, video_length).unsqueeze(1), output_type="pt" + ) # 1152 hidden size + self.register_buffer("temp_pos_embed", temp_pos_embed.float().unsqueeze(0), persistent=False) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + enable_temporal_attentions: bool = True, + return_dict: bool = True, + ): + """ + The [`LatteTransformer3DModel`] forward method. + + Args: + hidden_states shape `(batch size, channel, num_frame, height, width)`: + Input `hidden_states`. + timestep ( `torch.LongTensor`, *optional*): + Used to indicate denoising step. Optional timestep to be applied as an embedding in `AdaLayerNorm`. + encoder_hidden_states ( `torch.FloatTensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. If not given, cross-attention defaults to + self-attention. + encoder_attention_mask ( `torch.Tensor`, *optional*): + Cross-attention mask applied to `encoder_hidden_states`. Two formats supported: + + * Mask `(batcheight, sequence_length)` True = keep, False = discard. + * Bias `(batcheight, 1, sequence_length)` 0 = keep, -10000 = discard. + + If `ndim == 2`: will be interpreted as a mask, then converted into a bias consistent with the format + above. This bias will be added to the cross-attention scores. + enable_temporal_attentions: + (`bool`, *optional*, defaults to `True`): Whether to enable temporal attentions. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.unet_2d_condition.UNet2DConditionOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + + # Reshape hidden states + batch_size, channels, num_frame, height, width = hidden_states.shape + # batch_size channels num_frame height width -> (batch_size * num_frame) channels height width + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).reshape(-1, channels, height, width) + + # Input + height, width = ( + hidden_states.shape[-2] // self.config.patch_size, + hidden_states.shape[-1] // self.config.patch_size, + ) + num_patches = height * width + + hidden_states = self.pos_embed(hidden_states) # already add positional embeddings + + added_cond_kwargs = {"resolution": None, "aspect_ratio": None} + timestep, embedded_timestep = self.adaln_single( + timestep, added_cond_kwargs=added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_states.dtype + ) + + # Prepare text embeddings for spatial block + # batch_size num_tokens hidden_size -> (batch_size * num_frame) num_tokens hidden_size + encoder_hidden_states = self.caption_projection(encoder_hidden_states) # 3 120 1152 + encoder_hidden_states_spatial = encoder_hidden_states.repeat_interleave( + num_frame, dim=0, output_size=encoder_hidden_states.shape[0] * num_frame + ).view(-1, encoder_hidden_states.shape[-2], encoder_hidden_states.shape[-1]) + + # Prepare timesteps for spatial and temporal block + timestep_spatial = timestep.repeat_interleave( + num_frame, dim=0, output_size=timestep.shape[0] * num_frame + ).view(-1, timestep.shape[-1]) + timestep_temp = timestep.repeat_interleave( + num_patches, dim=0, output_size=timestep.shape[0] * num_patches + ).view(-1, timestep.shape[-1]) + + # Spatial and temporal transformer blocks + for i, (spatial_block, temp_block) in enumerate( + zip(self.transformer_blocks, self.temporal_transformer_blocks) + ): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + spatial_block, + hidden_states, + None, # attention_mask + encoder_hidden_states_spatial, + encoder_attention_mask, + timestep_spatial, + None, # cross_attention_kwargs + None, # class_labels + ) + else: + hidden_states = spatial_block( + hidden_states, + None, # attention_mask + encoder_hidden_states_spatial, + encoder_attention_mask, + timestep_spatial, + None, # cross_attention_kwargs + None, # class_labels + ) + + if enable_temporal_attentions: + # (batch_size * num_frame) num_tokens hidden_size -> (batch_size * num_tokens) num_frame hidden_size + hidden_states = hidden_states.reshape( + batch_size, -1, hidden_states.shape[-2], hidden_states.shape[-1] + ).permute(0, 2, 1, 3) + hidden_states = hidden_states.reshape(-1, hidden_states.shape[-2], hidden_states.shape[-1]) + + if i == 0 and num_frame > 1: + hidden_states = hidden_states + self.temp_pos_embed.to(hidden_states.dtype) + + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + temp_block, + hidden_states, + None, # attention_mask + None, # encoder_hidden_states + None, # encoder_attention_mask + timestep_temp, + None, # cross_attention_kwargs + None, # class_labels + ) + else: + hidden_states = temp_block( + hidden_states, + None, # attention_mask + None, # encoder_hidden_states + None, # encoder_attention_mask + timestep_temp, + None, # cross_attention_kwargs + None, # class_labels + ) + + # (batch_size * num_tokens) num_frame hidden_size -> (batch_size * num_frame) num_tokens hidden_size + hidden_states = hidden_states.reshape( + batch_size, -1, hidden_states.shape[-2], hidden_states.shape[-1] + ).permute(0, 2, 1, 3) + hidden_states = hidden_states.reshape(-1, hidden_states.shape[-2], hidden_states.shape[-1]) + + embedded_timestep = embedded_timestep.repeat_interleave( + num_frame, dim=0, output_size=embedded_timestep.shape[0] * num_frame + ).view(-1, embedded_timestep.shape[-1]) + shift, scale = (self.scale_shift_table[None] + embedded_timestep[:, None]).chunk(2, dim=1) + hidden_states = self.norm_out(hidden_states) + # Modulation + hidden_states = hidden_states * (1 + scale) + shift + hidden_states = self.proj_out(hidden_states) + + # unpatchify + if self.adaln_single is None: + height = width = int(hidden_states.shape[1] ** 0.5) + hidden_states = hidden_states.reshape( + shape=(-1, height, width, self.config.patch_size, self.config.patch_size, self.out_channels) + ) + hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(-1, self.out_channels, height * self.config.patch_size, width * self.config.patch_size) + ) + output = output.reshape(batch_size, -1, output.shape[-3], output.shape[-2], output.shape[-1]).permute( + 0, 2, 1, 3, 4 + ) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/lumina_nextdit2d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/lumina_nextdit2d.py new file mode 100644 index 0000000000000000000000000000000000000000..46a6753b4cb190938588901e5186783e2cc974f1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/lumina_nextdit2d.py @@ -0,0 +1,342 @@ +# Copyright 2025 Alpha-VLLM Authors and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ..attention import LuminaFeedForward +from ..attention_processor import Attention, LuminaAttnProcessor2_0 +from ..embeddings import ( + LuminaCombinedTimestepCaptionEmbedding, + LuminaPatchEmbed, +) +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import LuminaLayerNormContinuous, LuminaRMSNormZero, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class LuminaNextDiTBlock(nn.Module): + """ + A LuminaNextDiTBlock for LuminaNextDiT2DModel. + + Parameters: + dim (`int`): Embedding dimension of the input features. + num_attention_heads (`int`): Number of attention heads. + num_kv_heads (`int`): + Number of attention heads in key and value features (if using GQA), or set to None for the same as query. + multiple_of (`int`): The number of multiple of ffn layer. + ffn_dim_multiplier (`float`): The multiplier factor of ffn layer dimension. + norm_eps (`float`): The eps for norm layer. + qk_norm (`bool`): normalization for query and key. + cross_attention_dim (`int`): Cross attention embedding dimension of the input text prompt hidden_states. + norm_elementwise_affine (`bool`, *optional*, defaults to True), + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + num_kv_heads: int, + multiple_of: int, + ffn_dim_multiplier: float, + norm_eps: float, + qk_norm: bool, + cross_attention_dim: int, + norm_elementwise_affine: bool = True, + ) -> None: + super().__init__() + self.head_dim = dim // num_attention_heads + + self.gate = nn.Parameter(torch.zeros([num_attention_heads])) + + # Self-attention + self.attn1 = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=dim // num_attention_heads, + qk_norm="layer_norm_across_heads" if qk_norm else None, + heads=num_attention_heads, + kv_heads=num_kv_heads, + eps=1e-5, + bias=False, + out_bias=False, + processor=LuminaAttnProcessor2_0(), + ) + self.attn1.to_out = nn.Identity() + + # Cross-attention + self.attn2 = Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + dim_head=dim // num_attention_heads, + qk_norm="layer_norm_across_heads" if qk_norm else None, + heads=num_attention_heads, + kv_heads=num_kv_heads, + eps=1e-5, + bias=False, + out_bias=False, + processor=LuminaAttnProcessor2_0(), + ) + + self.feed_forward = LuminaFeedForward( + dim=dim, + inner_dim=int(4 * 2 * dim / 3), + multiple_of=multiple_of, + ffn_dim_multiplier=ffn_dim_multiplier, + ) + + self.norm1 = LuminaRMSNormZero( + embedding_dim=dim, + norm_eps=norm_eps, + norm_elementwise_affine=norm_elementwise_affine, + ) + self.ffn_norm1 = RMSNorm(dim, eps=norm_eps, elementwise_affine=norm_elementwise_affine) + + self.norm2 = RMSNorm(dim, eps=norm_eps, elementwise_affine=norm_elementwise_affine) + self.ffn_norm2 = RMSNorm(dim, eps=norm_eps, elementwise_affine=norm_elementwise_affine) + + self.norm1_context = RMSNorm(cross_attention_dim, eps=norm_eps, elementwise_affine=norm_elementwise_affine) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + image_rotary_emb: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_mask: torch.Tensor, + temb: torch.Tensor, + cross_attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor: + """ + Perform a forward pass through the LuminaNextDiTBlock. + + Parameters: + hidden_states (`torch.Tensor`): The input of hidden_states for LuminaNextDiTBlock. + attention_mask (`torch.Tensor): The input of hidden_states corresponse attention mask. + image_rotary_emb (`torch.Tensor`): Precomputed cosine and sine frequencies. + encoder_hidden_states: (`torch.Tensor`): The hidden_states of text prompt are processed by Gemma encoder. + encoder_mask (`torch.Tensor`): The hidden_states of text prompt attention mask. + temb (`torch.Tensor`): Timestep embedding with text prompt embedding. + cross_attention_kwargs (`dict[str, Any]`): kwargs for cross attention. + """ + residual = hidden_states + + # Self-attention + norm_hidden_states, gate_msa, scale_mlp, gate_mlp = self.norm1(hidden_states, temb) + self_attn_output = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_hidden_states, + attention_mask=attention_mask, + query_rotary_emb=image_rotary_emb, + key_rotary_emb=image_rotary_emb, + **cross_attention_kwargs, + ) + + # Cross-attention + norm_encoder_hidden_states = self.norm1_context(encoder_hidden_states) + cross_attn_output = self.attn2( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + attention_mask=encoder_mask, + query_rotary_emb=image_rotary_emb, + key_rotary_emb=None, + **cross_attention_kwargs, + ) + cross_attn_output = cross_attn_output * self.gate.tanh().view(1, 1, -1, 1) + mixed_attn_output = self_attn_output + cross_attn_output + mixed_attn_output = mixed_attn_output.flatten(-2) + # linear proj + hidden_states = self.attn2.to_out[0](mixed_attn_output) + + hidden_states = residual + gate_msa.unsqueeze(1).tanh() * self.norm2(hidden_states) + + mlp_output = self.feed_forward(self.ffn_norm1(hidden_states) * (1 + scale_mlp.unsqueeze(1))) + + hidden_states = hidden_states + gate_mlp.unsqueeze(1).tanh() * self.ffn_norm2(mlp_output) + + return hidden_states + + +class LuminaNextDiT2DModel(ModelMixin, ConfigMixin): + """ + LuminaNextDiT: Diffusion model with a Transformer backbone. + + Inherit ModelMixin and ConfigMixin to be compatible with the sampler StableDiffusionPipeline of diffusers. + + Parameters: + sample_size (`int`): The width of the latent images. This is fixed during training since + it is used to learn a number of position embeddings. + patch_size (`int`, *optional*, (`int`, *optional*, defaults to 2): + The size of each patch in the image. This parameter defines the resolution of patches fed into the model. + in_channels (`int`, *optional*, defaults to 4): + The number of input channels for the model. Typically, this matches the number of channels in the input + images. + hidden_size (`int`, *optional*, defaults to 4096): + The dimensionality of the hidden layers in the model. This parameter determines the width of the model's + hidden representations. + num_layers (`int`, *optional*, default to 32): + The number of layers in the model. This defines the depth of the neural network. + num_attention_heads (`int`, *optional*, defaults to 32): + The number of attention heads in each attention layer. This parameter specifies how many separate attention + mechanisms are used. + num_kv_heads (`int`, *optional*, defaults to 8): + The number of key-value heads in the attention mechanism, if different from the number of attention heads. + If None, it defaults to num_attention_heads. + multiple_of (`int`, *optional*, defaults to 256): + A factor that the hidden size should be a multiple of. This can help optimize certain hardware + configurations. + ffn_dim_multiplier (`float`, *optional*): + A multiplier for the dimensionality of the feed-forward network. If None, it uses a default value based on + the model configuration. + norm_eps (`float`, *optional*, defaults to 1e-5): + A small value added to the denominator for numerical stability in normalization layers. + learn_sigma (`bool`, *optional*, defaults to True): + Whether the model should learn the sigma parameter, which might be related to uncertainty or variance in + predictions. + qk_norm (`bool`, *optional*, defaults to True): + Indicates if the queries and keys in the attention mechanism should be normalized. + cross_attention_dim (`int`, *optional*, defaults to 2048): + The dimensionality of the text embeddings. This parameter defines the size of the text representations used + in the model. + scaling_factor (`float`, *optional*, defaults to 1.0): + A scaling factor applied to certain parameters or layers in the model. This can be used for adjusting the + overall scale of the model's operations. + """ + + _skip_layerwise_casting_patterns = ["patch_embedder", "norm", "ffn_norm"] + + @register_to_config + def __init__( + self, + sample_size: int = 128, + patch_size: int | None = 2, + in_channels: int | None = 4, + hidden_size: int | None = 2304, + num_layers: int | None = 32, + num_attention_heads: int | None = 32, + num_kv_heads: int | None = None, + multiple_of: int | None = 256, + ffn_dim_multiplier: float | None = None, + norm_eps: float | None = 1e-5, + learn_sigma: bool | None = True, + qk_norm: bool | None = True, + cross_attention_dim: int | None = 2048, + scaling_factor: float | None = 1.0, + ) -> None: + super().__init__() + self.sample_size = sample_size + self.patch_size = patch_size + self.in_channels = in_channels + self.out_channels = in_channels * 2 if learn_sigma else in_channels + self.hidden_size = hidden_size + self.num_attention_heads = num_attention_heads + self.head_dim = hidden_size // num_attention_heads + self.scaling_factor = scaling_factor + + self.patch_embedder = LuminaPatchEmbed( + patch_size=patch_size, in_channels=in_channels, embed_dim=hidden_size, bias=True + ) + + self.pad_token = nn.Parameter(torch.empty(hidden_size)) + + self.time_caption_embed = LuminaCombinedTimestepCaptionEmbedding( + hidden_size=min(hidden_size, 1024), cross_attention_dim=cross_attention_dim + ) + + self.layers = nn.ModuleList( + [ + LuminaNextDiTBlock( + hidden_size, + num_attention_heads, + num_kv_heads, + multiple_of, + ffn_dim_multiplier, + norm_eps, + qk_norm, + cross_attention_dim, + ) + for _ in range(num_layers) + ] + ) + self.norm_out = LuminaLayerNormContinuous( + embedding_dim=hidden_size, + conditioning_embedding_dim=min(hidden_size, 1024), + elementwise_affine=False, + eps=1e-6, + bias=True, + out_dim=patch_size * patch_size * self.out_channels, + ) + # self.final_layer = LuminaFinalLayer(hidden_size, patch_size, self.out_channels) + + assert (hidden_size // num_attention_heads) % 4 == 0, "2d rope needs head dim to be divisible by 4" + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_mask: torch.Tensor, + image_rotary_emb: torch.Tensor, + cross_attention_kwargs: dict[str, Any] = None, + return_dict=True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + """ + Forward pass of LuminaNextDiT. + + Parameters: + hidden_states (torch.Tensor): Input tensor of shape (N, C, H, W). + timestep (torch.Tensor): Tensor of diffusion timesteps of shape (N,). + encoder_hidden_states (torch.Tensor): Tensor of caption features of shape (N, D). + encoder_mask (torch.Tensor): Tensor of caption masks of shape (N, L). + """ + hidden_states, mask, img_size, image_rotary_emb = self.patch_embedder(hidden_states, image_rotary_emb) + image_rotary_emb = image_rotary_emb.to(hidden_states.device) + + temb = self.time_caption_embed(timestep, encoder_hidden_states, encoder_mask) + + encoder_mask = encoder_mask.bool() + for layer in self.layers: + hidden_states = layer( + hidden_states, + mask, + image_rotary_emb, + encoder_hidden_states, + encoder_mask, + temb=temb, + cross_attention_kwargs=cross_attention_kwargs, + ) + + hidden_states = self.norm_out(hidden_states, temb) + + # unpatchify + height_tokens = width_tokens = self.patch_size + height, width = img_size[0] + batch_size = hidden_states.size(0) + sequence_length = (height // height_tokens) * (width // width_tokens) + hidden_states = hidden_states[:, :sequence_length].view( + batch_size, height // height_tokens, width // width_tokens, height_tokens, width_tokens, self.out_channels + ) + output = hidden_states.permute(0, 5, 1, 3, 2, 4).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/pixart_transformer_2d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/pixart_transformer_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..2476668ba307f81f4982fa0660e0df620c91a620 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/pixart_transformer_2d.py @@ -0,0 +1,362 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ..attention import AttentionMixin, BasicTransformerBlock +from ..attention_processor import Attention, AttnProcessor, FusedAttnProcessor2_0 +from ..embeddings import PatchEmbed, PixArtAlphaTextProjection +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormSingle + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class PixArtTransformer2DModel(ModelMixin, AttentionMixin, ConfigMixin): + r""" + A 2D Transformer model as introduced in PixArt family of models (https://huggingface.co/papers/2310.00426, + https://huggingface.co/papers/2403.04692). + + Parameters: + num_attention_heads (int, optional, defaults to 16): The number of heads to use for multi-head attention. + attention_head_dim (int, optional, defaults to 72): The number of channels in each head. + in_channels (int, defaults to 4): The number of channels in the input. + out_channels (int, optional): + The number of channels in the output. Specify this parameter if the output channel number differs from the + input. + num_layers (int, optional, defaults to 28): The number of layers of Transformer blocks to use. + dropout (float, optional, defaults to 0.0): The dropout probability to use within the Transformer blocks. + norm_num_groups (int, optional, defaults to 32): + Number of groups for group normalization within Transformer blocks. + cross_attention_dim (int, optional): + The dimensionality for cross-attention layers, typically matching the encoder's hidden dimension. + attention_bias (bool, optional, defaults to True): + Configure if the Transformer blocks' attention should contain a bias parameter. + sample_size (int, defaults to 128): + The width of the latent images. This parameter is fixed during training. + patch_size (int, defaults to 2): + Size of the patches the model processes, relevant for architectures working on non-sequential data. + activation_fn (str, optional, defaults to "gelu-approximate"): + Activation function to use in feed-forward networks within Transformer blocks. + num_embeds_ada_norm (int, optional, defaults to 1000): + Number of embeddings for AdaLayerNorm, fixed during training and affects the maximum denoising steps during + inference. + upcast_attention (bool, optional, defaults to False): + If true, upcasts the attention mechanism dimensions for potentially improved performance. + norm_type (str, optional, defaults to "ada_norm_zero"): + Specifies the type of normalization used, can be 'ada_norm_zero'. + norm_elementwise_affine (bool, optional, defaults to False): + If true, enables element-wise affine parameters in the normalization layers. + norm_eps (float, optional, defaults to 1e-6): + A small constant added to the denominator in normalization layers to prevent division by zero. + interpolation_scale (int, optional): Scale factor to use during interpolating the position embeddings. + use_additional_conditions (bool, optional): If we're using additional conditions as inputs. + attention_type (str, optional, defaults to "default"): Kind of attention mechanism to be used. + caption_channels (int, optional, defaults to None): + Number of channels to use for projecting the caption embeddings. + use_linear_projection (bool, optional, defaults to False): + Deprecated argument. Will be removed in a future version. + num_vector_embeds (bool, optional, defaults to False): + Deprecated argument. Will be removed in a future version. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["BasicTransformerBlock", "PatchEmbed"] + _skip_layerwise_casting_patterns = ["pos_embed", "norm", "adaln_single"] + + @register_to_config + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 72, + in_channels: int = 4, + out_channels: int | None = 8, + num_layers: int = 28, + dropout: float = 0.0, + norm_num_groups: int = 32, + cross_attention_dim: int | None = 1152, + attention_bias: bool = True, + sample_size: int = 128, + patch_size: int = 2, + activation_fn: str = "gelu-approximate", + num_embeds_ada_norm: int | None = 1000, + upcast_attention: bool = False, + norm_type: str = "ada_norm_single", + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + interpolation_scale: int | None = None, + use_additional_conditions: bool | None = None, + caption_channels: int | None = None, + attention_type: str | None = "default", + ): + super().__init__() + + # Validate inputs. + if norm_type != "ada_norm_single": + raise NotImplementedError( + f"Forward pass is not implemented when `patch_size` is not None and `norm_type` is '{norm_type}'." + ) + elif norm_type == "ada_norm_single" and num_embeds_ada_norm is None: + raise ValueError( + f"When using a `patch_size` and this `norm_type` ({norm_type}), `num_embeds_ada_norm` cannot be None." + ) + + # Set some common variables used across the board. + self.attention_head_dim = attention_head_dim + self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim + self.out_channels = in_channels if out_channels is None else out_channels + if use_additional_conditions is None: + if sample_size == 128: + use_additional_conditions = True + else: + use_additional_conditions = False + self.use_additional_conditions = use_additional_conditions + + self.gradient_checkpointing = False + + # 2. Initialize the position embedding and transformer blocks. + self.height = self.config.sample_size + self.width = self.config.sample_size + + interpolation_scale = ( + self.config.interpolation_scale + if self.config.interpolation_scale is not None + else max(self.config.sample_size // 64, 1) + ) + self.pos_embed = PatchEmbed( + height=self.config.sample_size, + width=self.config.sample_size, + patch_size=self.config.patch_size, + in_channels=self.config.in_channels, + embed_dim=self.inner_dim, + interpolation_scale=interpolation_scale, + ) + + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + self.inner_dim, + self.config.num_attention_heads, + self.config.attention_head_dim, + dropout=self.config.dropout, + cross_attention_dim=self.config.cross_attention_dim, + activation_fn=self.config.activation_fn, + num_embeds_ada_norm=self.config.num_embeds_ada_norm, + attention_bias=self.config.attention_bias, + upcast_attention=self.config.upcast_attention, + norm_type=norm_type, + norm_elementwise_affine=self.config.norm_elementwise_affine, + norm_eps=self.config.norm_eps, + attention_type=self.config.attention_type, + ) + for _ in range(self.config.num_layers) + ] + ) + + # 3. Output blocks. + self.norm_out = nn.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6) + self.scale_shift_table = nn.Parameter(torch.randn(2, self.inner_dim) / self.inner_dim**0.5) + self.proj_out = nn.Linear(self.inner_dim, self.config.patch_size * self.config.patch_size * self.out_channels) + + self.adaln_single = AdaLayerNormSingle( + self.inner_dim, use_additional_conditions=self.use_additional_conditions + ) + self.caption_projection = None + if self.config.caption_channels is not None: + self.caption_projection = PixArtAlphaTextProjection( + in_features=self.config.caption_channels, hidden_size=self.inner_dim + ) + + def set_default_attn_processor(self): + """ + Disables custom attention processors and sets the default attention implementation. + + Safe to just use `AttnProcessor()` as PixArt doesn't have any exotic attention processors in default model. + """ + self.set_attn_processor(AttnProcessor()) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is 🧪 experimental. + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + self.set_attn_processor(FusedAttnProcessor2_0()) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + > [!WARNING] > This API is 🧪 experimental. + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + timestep: torch.LongTensor | None = None, + added_cond_kwargs: dict[str, torch.Tensor] = None, + cross_attention_kwargs: dict[str, Any] = None, + attention_mask: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + return_dict: bool = True, + ): + """ + The [`PixArtTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`): + Input `hidden_states`. + encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. If not given, cross-attention defaults to + self-attention. + timestep (`torch.LongTensor`, *optional*): + Used to indicate denoising step. Optional timestep to be applied as an embedding in `AdaLayerNorm`. + added_cond_kwargs: (`dict[str, Any]`, *optional*): Additional conditions to be used as inputs. + cross_attention_kwargs ( `dict[str, Any]`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + attention_mask ( `torch.Tensor`, *optional*): + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask + is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large + negative values to the attention scores corresponding to "discard" tokens. + encoder_attention_mask ( `torch.Tensor`, *optional*): + Cross-attention mask applied to `encoder_hidden_states`. Two formats supported: + + * Mask `(batch, sequence_length)` True = keep, False = discard. + * Bias `(batch, 1, sequence_length)` 0 = keep, -10000 = discard. + + If `ndim == 2`: will be interpreted as a mask, then converted into a bias consistent with the format + above. This bias will be added to the cross-attention scores. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.unets.unet_2d_condition.UNet2DConditionOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + if self.use_additional_conditions and added_cond_kwargs is None: + raise ValueError("`added_cond_kwargs` cannot be None when using additional conditions for `adaln_single`.") + + # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. + # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. + # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. + # expects mask of shape: + # [batch, key_tokens] + # adds singleton query_tokens dimension: + # [batch, 1, key_tokens] + # this helps to broadcast it as a bias over attention scores, which will be in one of the following shapes: + # [batch, heads, query_tokens, key_tokens] (e.g. torch sdp attn) + # [batch * heads, query_tokens, key_tokens] (e.g. xformers or classic attn) + if attention_mask is not None and attention_mask.ndim == 2: + # assume that mask is expressed as: + # (1 = keep, 0 = discard) + # convert mask into a bias that can be added to attention scores: + # (keep = +0, discard = -10000.0) + attention_mask = (1 - attention_mask.to(hidden_states.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # convert encoder_attention_mask to a bias the same way we do for attention_mask + if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2: + encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0 + encoder_attention_mask = encoder_attention_mask.unsqueeze(1) + + # 1. Input + batch_size = hidden_states.shape[0] + height, width = ( + hidden_states.shape[-2] // self.config.patch_size, + hidden_states.shape[-1] // self.config.patch_size, + ) + hidden_states = self.pos_embed(hidden_states) + + timestep, embedded_timestep = self.adaln_single( + timestep, added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_states.dtype + ) + + if self.caption_projection is not None: + encoder_hidden_states = self.caption_projection(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.shape[-1]) + + # 2. Blocks + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + timestep, + cross_attention_kwargs, + None, + ) + else: + hidden_states = block( + hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + timestep=timestep, + cross_attention_kwargs=cross_attention_kwargs, + class_labels=None, + ) + + # 3. Output + shift, scale = ( + self.scale_shift_table[None] + embedded_timestep[:, None].to(self.scale_shift_table.device) + ).chunk(2, dim=1) + hidden_states = self.norm_out(hidden_states) + # Modulation + hidden_states = hidden_states * (1 + scale.to(hidden_states.device)) + shift.to(hidden_states.device) + hidden_states = self.proj_out(hidden_states) + hidden_states = hidden_states.squeeze(1) + + # unpatchify + hidden_states = hidden_states.reshape( + shape=(-1, height, width, self.config.patch_size, self.config.patch_size, self.out_channels) + ) + hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(-1, self.out_channels, height * self.config.patch_size, width * self.config.patch_size) + ) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/prior_transformer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/prior_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..ace2b529c4f2109e1069a5c635feb898e8752245 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/prior_transformer.py @@ -0,0 +1,322 @@ +from dataclasses import dataclass + +import torch +import torch.nn.functional as F +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin, UNet2DConditionLoadersMixin +from ...utils import BaseOutput +from ..attention import AttentionMixin, BasicTransformerBlock +from ..attention_processor import ( + ADDED_KV_ATTENTION_PROCESSORS, + CROSS_ATTENTION_PROCESSORS, + AttnAddedKVProcessor, + AttnProcessor, +) +from ..embeddings import TimestepEmbedding, Timesteps +from ..modeling_utils import ModelMixin + + +@dataclass +class PriorTransformerOutput(BaseOutput): + """ + The output of [`PriorTransformer`]. + + Args: + predicted_image_embedding (`torch.Tensor` of shape `(batch_size, embedding_dim)`): + The predicted CLIP image embedding conditioned on the CLIP text embedding input. + """ + + predicted_image_embedding: torch.Tensor + + +class PriorTransformer(ModelMixin, AttentionMixin, ConfigMixin, UNet2DConditionLoadersMixin, PeftAdapterMixin): + """ + A Prior Transformer model. + + Parameters: + num_attention_heads (`int`, *optional*, defaults to 32): The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 64): The number of channels in each head. + num_layers (`int`, *optional*, defaults to 20): The number of layers of Transformer blocks to use. + embedding_dim (`int`, *optional*, defaults to 768): The dimension of the model input `hidden_states` + num_embeddings (`int`, *optional*, defaults to 77): + The number of embeddings of the model input `hidden_states` + additional_embeddings (`int`, *optional*, defaults to 4): The number of additional tokens appended to the + projected `hidden_states`. The actual length of the used `hidden_states` is `num_embeddings + + additional_embeddings`. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + time_embed_act_fn (`str`, *optional*, defaults to 'silu'): + The activation function to use to create timestep embeddings. + norm_in_type (`str`, *optional*, defaults to None): The normalization layer to apply on hidden states before + passing to Transformer blocks. Set it to `None` if normalization is not needed. + embedding_proj_norm_type (`str`, *optional*, defaults to None): + The normalization layer to apply on the input `proj_embedding`. Set it to `None` if normalization is not + needed. + encoder_hid_proj_type (`str`, *optional*, defaults to `linear`): + The projection layer to apply on the input `encoder_hidden_states`. Set it to `None` if + `encoder_hidden_states` is `None`. + added_emb_type (`str`, *optional*, defaults to `prd`): Additional embeddings to condition the model. + Choose from `prd` or `None`. if choose `prd`, it will prepend a token indicating the (quantized) dot + product between the text embedding and image embedding as proposed in the unclip paper + https://huggingface.co/papers/2204.06125 If it is `None`, no additional embeddings will be prepended. + time_embed_dim (`int, *optional*, defaults to None): The dimension of timestep embeddings. + If None, will be set to `num_attention_heads * attention_head_dim` + embedding_proj_dim (`int`, *optional*, default to None): + The dimension of `proj_embedding`. If None, will be set to `embedding_dim`. + clip_embed_dim (`int`, *optional*, default to None): + The dimension of the output. If None, will be set to `embedding_dim`. + """ + + @register_to_config + def __init__( + self, + num_attention_heads: int = 32, + attention_head_dim: int = 64, + num_layers: int = 20, + embedding_dim: int = 768, + num_embeddings=77, + additional_embeddings=4, + dropout: float = 0.0, + time_embed_act_fn: str = "silu", + norm_in_type: str | None = None, # layer + embedding_proj_norm_type: str | None = None, # layer + encoder_hid_proj_type: str | None = "linear", # linear + added_emb_type: str | None = "prd", # prd + time_embed_dim: int | None = None, + embedding_proj_dim: int | None = None, + clip_embed_dim: int | None = None, + ): + super().__init__() + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + inner_dim = num_attention_heads * attention_head_dim + self.additional_embeddings = additional_embeddings + + time_embed_dim = time_embed_dim or inner_dim + embedding_proj_dim = embedding_proj_dim or embedding_dim + clip_embed_dim = clip_embed_dim or embedding_dim + + self.time_proj = Timesteps(inner_dim, True, 0) + self.time_embedding = TimestepEmbedding(inner_dim, time_embed_dim, out_dim=inner_dim, act_fn=time_embed_act_fn) + + self.proj_in = nn.Linear(embedding_dim, inner_dim) + + if embedding_proj_norm_type is None: + self.embedding_proj_norm = None + elif embedding_proj_norm_type == "layer": + self.embedding_proj_norm = nn.LayerNorm(embedding_proj_dim) + else: + raise ValueError(f"unsupported embedding_proj_norm_type: {embedding_proj_norm_type}") + + self.embedding_proj = nn.Linear(embedding_proj_dim, inner_dim) + + if encoder_hid_proj_type is None: + self.encoder_hidden_states_proj = None + elif encoder_hid_proj_type == "linear": + self.encoder_hidden_states_proj = nn.Linear(embedding_dim, inner_dim) + else: + raise ValueError(f"unsupported encoder_hid_proj_type: {encoder_hid_proj_type}") + + self.positional_embedding = nn.Parameter(torch.zeros(1, num_embeddings + additional_embeddings, inner_dim)) + + if added_emb_type == "prd": + self.prd_embedding = nn.Parameter(torch.zeros(1, 1, inner_dim)) + elif added_emb_type is None: + self.prd_embedding = None + else: + raise ValueError( + f"`added_emb_type`: {added_emb_type} is not supported. Make sure to choose one of `'prd'` or `None`." + ) + + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + activation_fn="gelu", + attention_bias=True, + ) + for d in range(num_layers) + ] + ) + + if norm_in_type == "layer": + self.norm_in = nn.LayerNorm(inner_dim) + elif norm_in_type is None: + self.norm_in = None + else: + raise ValueError(f"Unsupported norm_in_type: {norm_in_type}.") + + self.norm_out = nn.LayerNorm(inner_dim) + + self.proj_to_clip_embeddings = nn.Linear(inner_dim, clip_embed_dim) + + causal_attention_mask = torch.full( + [num_embeddings + additional_embeddings, num_embeddings + additional_embeddings], -10000.0 + ) + causal_attention_mask.triu_(1) + causal_attention_mask = causal_attention_mask[None, ...] + self.register_buffer("causal_attention_mask", causal_attention_mask, persistent=False) + + self.clip_mean = nn.Parameter(torch.zeros(1, clip_embed_dim)) + self.clip_std = nn.Parameter(torch.zeros(1, clip_embed_dim)) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.set_default_attn_processor + def set_default_attn_processor(self): + """ + Disables custom attention processors and sets the default attention implementation. + """ + if all(proc.__class__ in ADDED_KV_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnAddedKVProcessor() + elif all(proc.__class__ in CROSS_ATTENTION_PROCESSORS for proc in self.attn_processors.values()): + processor = AttnProcessor() + else: + raise ValueError( + f"Cannot call `set_default_attn_processor` when attention processors are of type {next(iter(self.attn_processors.values()))}" + ) + + self.set_attn_processor(processor) + + def forward( + self, + hidden_states, + timestep: torch.Tensor | float | int, + proj_embedding: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.BoolTensor | None = None, + return_dict: bool = True, + ): + """ + The [`PriorTransformer`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch_size, embedding_dim)`): + The currently predicted image embeddings. + timestep (`torch.LongTensor`): + Current denoising step. + proj_embedding (`torch.Tensor` of shape `(batch_size, embedding_dim)`): + Projected embedding vector the denoising process is conditioned on. + encoder_hidden_states (`torch.Tensor` of shape `(batch_size, num_embeddings, embedding_dim)`): + Hidden states of the text embeddings the denoising process is conditioned on. + attention_mask (`torch.BoolTensor` of shape `(batch_size, num_embeddings)`): + Text mask for the text embeddings. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformers.prior_transformer.PriorTransformerOutput`] instead of + a plain tuple. + + Returns: + [`~models.transformers.prior_transformer.PriorTransformerOutput`] or `tuple`: + If return_dict is True, a [`~models.transformers.prior_transformer.PriorTransformerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor. + """ + batch_size = hidden_states.shape[0] + + timesteps = timestep + if not torch.is_tensor(timesteps): + timesteps = torch.tensor([timesteps], dtype=torch.long, device=hidden_states.device) + elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None].to(hidden_states.device) + + # broadcast to batch dimension in a way that's compatible with ONNX/Core ML + timesteps = timesteps * torch.ones(batch_size, dtype=timesteps.dtype, device=timesteps.device) + + timesteps_projected = self.time_proj(timesteps) + + # timesteps does not contain any weights and will always return f32 tensors + # but time_embedding might be fp16, so we need to cast here. + timesteps_projected = timesteps_projected.to(dtype=self.dtype) + time_embeddings = self.time_embedding(timesteps_projected) + + if self.embedding_proj_norm is not None: + proj_embedding = self.embedding_proj_norm(proj_embedding) + + proj_embeddings = self.embedding_proj(proj_embedding) + if self.encoder_hidden_states_proj is not None and encoder_hidden_states is not None: + encoder_hidden_states = self.encoder_hidden_states_proj(encoder_hidden_states) + elif self.encoder_hidden_states_proj is not None and encoder_hidden_states is None: + raise ValueError("`encoder_hidden_states_proj` requires `encoder_hidden_states` to be set") + + hidden_states = self.proj_in(hidden_states) + + positional_embeddings = self.positional_embedding.to(hidden_states.dtype) + + additional_embeds = [] + additional_embeddings_len = 0 + + if encoder_hidden_states is not None: + additional_embeds.append(encoder_hidden_states) + additional_embeddings_len += encoder_hidden_states.shape[1] + + if len(proj_embeddings.shape) == 2: + proj_embeddings = proj_embeddings[:, None, :] + + if len(hidden_states.shape) == 2: + hidden_states = hidden_states[:, None, :] + + additional_embeds = additional_embeds + [ + proj_embeddings, + time_embeddings[:, None, :], + hidden_states, + ] + + if self.prd_embedding is not None: + prd_embedding = self.prd_embedding.to(hidden_states.dtype).expand(batch_size, -1, -1) + additional_embeds.append(prd_embedding) + + hidden_states = torch.cat( + additional_embeds, + dim=1, + ) + + # Allow positional_embedding to not include the `addtional_embeddings` and instead pad it with zeros for these additional tokens + additional_embeddings_len = additional_embeddings_len + proj_embeddings.shape[1] + 1 + if positional_embeddings.shape[1] < hidden_states.shape[1]: + positional_embeddings = F.pad( + positional_embeddings, + ( + 0, + 0, + additional_embeddings_len, + self.prd_embedding.shape[1] if self.prd_embedding is not None else 0, + ), + value=0.0, + ) + + hidden_states = hidden_states + positional_embeddings + + if attention_mask is not None: + attention_mask = (1 - attention_mask.to(hidden_states.dtype)) * -10000.0 + attention_mask = F.pad(attention_mask, (0, self.additional_embeddings), value=0.0) + attention_mask = (attention_mask[:, None, :] + self.causal_attention_mask).to(hidden_states.dtype) + attention_mask = attention_mask.repeat_interleave( + self.config.num_attention_heads, + dim=0, + output_size=attention_mask.shape[0] * self.config.num_attention_heads, + ) + + if self.norm_in is not None: + hidden_states = self.norm_in(hidden_states) + + for block in self.transformer_blocks: + hidden_states = block(hidden_states, attention_mask=attention_mask) + + hidden_states = self.norm_out(hidden_states) + + if self.prd_embedding is not None: + hidden_states = hidden_states[:, -1] + else: + hidden_states = hidden_states[:, additional_embeddings_len:] + + predicted_image_embedding = self.proj_to_clip_embeddings(hidden_states) + + if not return_dict: + return (predicted_image_embedding,) + + return PriorTransformerOutput(predicted_image_embedding=predicted_image_embedding) + + def post_process_latents(self, prior_latents): + prior_latents = (prior_latents * self.clip_std) + self.clip_mean + return prior_latents diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/sana_transformer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/sana_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..ff078dc695d73a3f4846a8c7ec7fa7e7fd364ce7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/sana_transformer.py @@ -0,0 +1,519 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +import torch.nn.functional as F +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ..attention import AttentionMixin +from ..attention_processor import ( + Attention, + SanaLinearAttnProcessor2_0, +) +from ..embeddings import PatchEmbed, PixArtAlphaTextProjection, TimestepEmbedding, Timesteps +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormSingle, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class GLUMBConv(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + expand_ratio: float = 4, + norm_type: str | None = None, + residual_connection: bool = True, + ) -> None: + super().__init__() + + hidden_channels = int(expand_ratio * in_channels) + self.norm_type = norm_type + self.residual_connection = residual_connection + + self.nonlinearity = nn.SiLU() + self.conv_inverted = nn.Conv2d(in_channels, hidden_channels * 2, 1, 1, 0) + self.conv_depth = nn.Conv2d(hidden_channels * 2, hidden_channels * 2, 3, 1, 1, groups=hidden_channels * 2) + self.conv_point = nn.Conv2d(hidden_channels, out_channels, 1, 1, 0, bias=False) + + self.norm = None + if norm_type == "rms_norm": + self.norm = RMSNorm(out_channels, eps=1e-5, elementwise_affine=True, bias=True) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.residual_connection: + residual = hidden_states + + hidden_states = self.conv_inverted(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + + hidden_states = self.conv_depth(hidden_states) + hidden_states, gate = torch.chunk(hidden_states, 2, dim=1) + hidden_states = hidden_states * self.nonlinearity(gate) + + hidden_states = self.conv_point(hidden_states) + + if self.norm_type == "rms_norm": + # move channel to the last dimension so we apply RMSnorm across channel dimension + hidden_states = self.norm(hidden_states.movedim(1, -1)).movedim(-1, 1) + + if self.residual_connection: + hidden_states = hidden_states + residual + + return hidden_states + + +class SanaModulatedNorm(nn.Module): + def __init__(self, dim: int, elementwise_affine: bool = False, eps: float = 1e-6): + super().__init__() + self.norm = nn.LayerNorm(dim, elementwise_affine=elementwise_affine, eps=eps) + + def forward( + self, hidden_states: torch.Tensor, temb: torch.Tensor, scale_shift_table: torch.Tensor + ) -> torch.Tensor: + hidden_states = self.norm(hidden_states) + shift, scale = (scale_shift_table[None] + temb[:, None].to(scale_shift_table.device)).chunk(2, dim=1) + hidden_states = hidden_states * (1 + scale) + shift + return hidden_states + + +class SanaCombinedTimestepGuidanceEmbeddings(nn.Module): + def __init__(self, embedding_dim): + super().__init__() + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.guidance_condition_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.guidance_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 6 * embedding_dim, bias=True) + + def forward(self, timestep: torch.Tensor, guidance: torch.Tensor = None, hidden_dtype: torch.dtype = None): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, D) + + guidance_proj = self.guidance_condition_proj(guidance) + guidance_emb = self.guidance_embedder(guidance_proj.to(dtype=hidden_dtype)) + conditioning = timesteps_emb + guidance_emb + + return self.linear(self.silu(conditioning)), conditioning + + +class SanaAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("SanaAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class SanaTransformerBlock(nn.Module): + r""" + Transformer block introduced in [Sana](https://huggingface.co/papers/2410.10629). + """ + + def __init__( + self, + dim: int = 2240, + num_attention_heads: int = 70, + attention_head_dim: int = 32, + dropout: float = 0.0, + num_cross_attention_heads: int | None = 20, + cross_attention_head_dim: int | None = 112, + cross_attention_dim: int | None = 2240, + attention_bias: bool = True, + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + attention_out_bias: bool = True, + mlp_ratio: float = 2.5, + qk_norm: str | None = None, + ) -> None: + super().__init__() + + # 1. Self Attention + self.norm1 = nn.LayerNorm(dim, elementwise_affine=False, eps=norm_eps) + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + kv_heads=num_attention_heads if qk_norm is not None else None, + qk_norm=qk_norm, + dropout=dropout, + bias=attention_bias, + cross_attention_dim=None, + processor=SanaLinearAttnProcessor2_0(), + ) + + # 2. Cross Attention + if cross_attention_dim is not None: + self.norm2 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + self.attn2 = Attention( + query_dim=dim, + qk_norm=qk_norm, + kv_heads=num_cross_attention_heads if qk_norm is not None else None, + cross_attention_dim=cross_attention_dim, + heads=num_cross_attention_heads, + dim_head=cross_attention_head_dim, + dropout=dropout, + bias=True, + out_bias=attention_out_bias, + processor=SanaAttnProcessor2_0(), + ) + + # 3. Feed-forward + self.ff = GLUMBConv(dim, dim, mlp_ratio, norm_type=None, residual_connection=False) + + self.scale_shift_table = nn.Parameter(torch.randn(6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + timestep: torch.LongTensor | None = None, + height: int = None, + width: int = None, + ) -> torch.Tensor: + batch_size = hidden_states.shape[0] + + # 1. Modulation + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = ( + self.scale_shift_table[None] + timestep.reshape(batch_size, 6, -1) + ).chunk(6, dim=1) + + # 2. Self Attention + norm_hidden_states = self.norm1(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa + norm_hidden_states = norm_hidden_states.to(hidden_states.dtype) + + attn_output = self.attn1(norm_hidden_states) + hidden_states = hidden_states + gate_msa * attn_output + + # 3. Cross Attention + if self.attn2 is not None: + attn_output = self.attn2( + hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=encoder_attention_mask, + ) + hidden_states = attn_output + hidden_states + + # 4. Feed-forward + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp + + norm_hidden_states = norm_hidden_states.unflatten(1, (height, width)).permute(0, 3, 1, 2) + ff_output = self.ff(norm_hidden_states) + ff_output = ff_output.flatten(2, 3).permute(0, 2, 1) + hidden_states = hidden_states + gate_mlp * ff_output + + return hidden_states + + +class SanaTransformer2DModel(ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): + r""" + A 2D Transformer model introduced in [Sana](https://huggingface.co/papers/2410.10629) family of models. + + Args: + in_channels (`int`, defaults to `32`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `32`): + The number of channels in the output. + num_attention_heads (`int`, defaults to `70`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `32`): + The number of channels in each head. + num_layers (`int`, defaults to `20`): + The number of layers of Transformer blocks to use. + num_cross_attention_heads (`int`, *optional*, defaults to `20`): + The number of heads to use for cross-attention. + cross_attention_head_dim (`int`, *optional*, defaults to `112`): + The number of channels in each head for cross-attention. + cross_attention_dim (`int`, *optional*, defaults to `2240`): + The number of channels in the cross-attention output. + caption_channels (`int`, defaults to `2304`): + The number of channels in the caption embeddings. + mlp_ratio (`float`, defaults to `2.5`): + The expansion ratio to use in the GLUMBConv layer. + dropout (`float`, defaults to `0.0`): + The dropout probability. + attention_bias (`bool`, defaults to `False`): + Whether to use bias in the attention layer. + sample_size (`int`, defaults to `32`): + The base size of the input latent. + patch_size (`int`, defaults to `1`): + The size of the patches to use in the patch embedding layer. + norm_elementwise_affine (`bool`, defaults to `False`): + Whether to use elementwise affinity in the normalization layer. + norm_eps (`float`, defaults to `1e-6`): + The epsilon value for the normalization layer. + qk_norm (`str`, *optional*, defaults to `None`): + The normalization to use for the query and key. + timestep_scale (`float`, defaults to `1.0`): + The scale to use for the timesteps. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["SanaTransformerBlock", "PatchEmbed", "SanaModulatedNorm"] + _skip_layerwise_casting_patterns = ["patch_embed", "norm"] + + @register_to_config + def __init__( + self, + in_channels: int = 32, + out_channels: int | None = 32, + num_attention_heads: int = 70, + attention_head_dim: int = 32, + num_layers: int = 20, + num_cross_attention_heads: int | None = 20, + cross_attention_head_dim: int | None = 112, + cross_attention_dim: int | None = 2240, + caption_channels: int = 2304, + mlp_ratio: float = 2.5, + dropout: float = 0.0, + attention_bias: bool = False, + sample_size: int = 32, + patch_size: int = 1, + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + interpolation_scale: int | None = None, + guidance_embeds: bool = False, + guidance_embeds_scale: float = 0.1, + qk_norm: str | None = None, + timestep_scale: float = 1.0, + ) -> None: + super().__init__() + + out_channels = out_channels or in_channels + inner_dim = num_attention_heads * attention_head_dim + + # 1. Patch Embedding + self.patch_embed = PatchEmbed( + height=sample_size, + width=sample_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=inner_dim, + interpolation_scale=interpolation_scale, + pos_embed_type="sincos" if interpolation_scale is not None else None, + ) + + # 2. Additional condition embeddings + if guidance_embeds: + self.time_embed = SanaCombinedTimestepGuidanceEmbeddings(inner_dim) + else: + self.time_embed = AdaLayerNormSingle(inner_dim) + + self.caption_projection = PixArtAlphaTextProjection(in_features=caption_channels, hidden_size=inner_dim) + self.caption_norm = RMSNorm(inner_dim, eps=1e-5, elementwise_affine=True) + + # 3. Transformer blocks + self.transformer_blocks = nn.ModuleList( + [ + SanaTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + num_cross_attention_heads=num_cross_attention_heads, + cross_attention_head_dim=cross_attention_head_dim, + cross_attention_dim=cross_attention_dim, + attention_bias=attention_bias, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + mlp_ratio=mlp_ratio, + qk_norm=qk_norm, + ) + for _ in range(num_layers) + ] + ) + + # 4. Output blocks + self.scale_shift_table = nn.Parameter(torch.randn(2, inner_dim) / inner_dim**0.5) + self.norm_out = SanaModulatedNorm(inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(inner_dim, patch_size * patch_size * out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: torch.Tensor, + guidance: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + controlnet_block_samples: tuple[torch.Tensor] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor, ...] | Transformer2DModelOutput: + # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. + # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. + # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. + # expects mask of shape: + # [batch, key_tokens] + # adds singleton query_tokens dimension: + # [batch, 1, key_tokens] + # this helps to broadcast it as a bias over attention scores, which will be in one of the following shapes: + # [batch, heads, query_tokens, key_tokens] (e.g. torch sdp attn) + # [batch * heads, query_tokens, key_tokens] (e.g. xformers or classic attn) + if attention_mask is not None and attention_mask.ndim == 2: + # assume that mask is expressed as: + # (1 = keep, 0 = discard) + # convert mask into a bias that can be added to attention scores: + # (keep = +0, discard = -10000.0) + attention_mask = (1 - attention_mask.to(hidden_states.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # convert encoder_attention_mask to a bias the same way we do for attention_mask + if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2: + encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0 + encoder_attention_mask = encoder_attention_mask.unsqueeze(1) + + # 1. Input + batch_size, num_channels, height, width = hidden_states.shape + p = self.config.patch_size + post_patch_height, post_patch_width = height // p, width // p + + hidden_states = self.patch_embed(hidden_states) + + if guidance is not None: + timestep, embedded_timestep = self.time_embed( + timestep, guidance=guidance, hidden_dtype=hidden_states.dtype + ) + else: + timestep, embedded_timestep = self.time_embed( + timestep, batch_size=batch_size, hidden_dtype=hidden_states.dtype + ) + + encoder_hidden_states = self.caption_projection(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.shape[-1]) + + encoder_hidden_states = self.caption_norm(encoder_hidden_states) + + # 2. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + for index_block, block in enumerate(self.transformer_blocks): + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + timestep, + post_patch_height, + post_patch_width, + ) + if controlnet_block_samples is not None and 0 < index_block <= len(controlnet_block_samples): + hidden_states = hidden_states + controlnet_block_samples[index_block - 1] + + else: + for index_block, block in enumerate(self.transformer_blocks): + hidden_states = block( + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + timestep, + post_patch_height, + post_patch_width, + ) + if controlnet_block_samples is not None and 0 < index_block <= len(controlnet_block_samples): + hidden_states = hidden_states + controlnet_block_samples[index_block - 1] + + # 3. Normalization + hidden_states = self.norm_out(hidden_states, embedded_timestep, self.scale_shift_table) + + hidden_states = self.proj_out(hidden_states) + + # 5. Unpatchify + hidden_states = hidden_states.reshape( + batch_size, post_patch_height, post_patch_width, self.config.patch_size, self.config.patch_size, -1 + ) + hidden_states = hidden_states.permute(0, 5, 1, 3, 2, 4) + output = hidden_states.reshape(batch_size, -1, post_patch_height * p, post_patch_width * p) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/stable_audio_transformer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/stable_audio_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..f4974926ec7279a1691e35f41c86d427d9123dea --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/stable_audio_transformer.py @@ -0,0 +1,376 @@ +# Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import numpy as np +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionMixin, FeedForward +from ..attention_processor import Attention, StableAudioAttnProcessor2_0 +from ..modeling_utils import ModelMixin +from ..transformers.transformer_2d import Transformer2DModelOutput + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class StableAudioGaussianFourierProjection(nn.Module): + """Gaussian Fourier embeddings for noise levels.""" + + # Copied from diffusers.models.embeddings.GaussianFourierProjection.__init__ + def __init__( + self, embedding_size: int = 256, scale: float = 1.0, set_W_to_weight=True, log=True, flip_sin_to_cos=False + ): + super().__init__() + self.weight = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False) + self.log = log + self.flip_sin_to_cos = flip_sin_to_cos + + if set_W_to_weight: + # to delete later + del self.weight + self.W = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False) + self.weight = self.W + del self.W + + def forward(self, x): + if self.log: + x = torch.log(x) + + x_proj = 2 * np.pi * x[:, None] @ self.weight[None, :] + + if self.flip_sin_to_cos: + out = torch.cat([torch.cos(x_proj), torch.sin(x_proj)], dim=-1) + else: + out = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1) + return out + + +@maybe_allow_in_graph +class StableAudioDiTBlock(nn.Module): + r""" + Transformer block used in Stable Audio model (https://github.com/Stability-AI/stable-audio-tools). Allow skip + connection and QKNorm + + Parameters: + dim (`int`): The number of channels in the input and output. + num_attention_heads (`int`): The number of heads to use for the query states. + num_key_value_attention_heads (`int`): The number of heads to use for the key and value states. + attention_head_dim (`int`): The number of channels in each head. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention. + upcast_attention (`bool`, *optional*): + Whether to upcast the attention computation to float32. This is useful for mixed precision training. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + num_key_value_attention_heads: int, + attention_head_dim: int, + dropout=0.0, + cross_attention_dim: int | None = None, + upcast_attention: bool = False, + norm_eps: float = 1e-5, + ff_inner_dim: int | None = None, + ): + super().__init__() + # Define 3 blocks. Each block has its own normalization layer. + # 1. Self-Attn + self.norm1 = nn.LayerNorm(dim, elementwise_affine=True, eps=norm_eps) + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=False, + upcast_attention=upcast_attention, + out_bias=False, + processor=StableAudioAttnProcessor2_0(), + ) + + # 2. Cross-Attn + self.norm2 = nn.LayerNorm(dim, norm_eps, True) + + self.attn2 = Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + kv_heads=num_key_value_attention_heads, + dropout=dropout, + bias=False, + upcast_attention=upcast_attention, + out_bias=False, + processor=StableAudioAttnProcessor2_0(), + ) # is self-attn if encoder_hidden_states is none + + # 3. Feed-forward + self.norm3 = nn.LayerNorm(dim, norm_eps, True) + self.ff = FeedForward( + dim, + dropout=dropout, + activation_fn="swiglu", + final_dropout=False, + inner_dim=ff_inner_dim, + bias=True, + ) + + # let chunk size default to None + self._chunk_size = None + self._chunk_dim = 0 + + def set_chunk_feed_forward(self, chunk_size: int | None, dim: int = 0): + # Sets chunk feed-forward + self._chunk_size = chunk_size + self._chunk_dim = dim + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + rotary_embedding: torch.FloatTensor | None = None, + ) -> torch.Tensor: + # Notice that normalization is always applied before the real computation in the following blocks. + # 0. Self-Attention + norm_hidden_states = self.norm1(hidden_states) + + attn_output = self.attn1( + norm_hidden_states, + attention_mask=attention_mask, + rotary_emb=rotary_embedding, + ) + + hidden_states = attn_output + hidden_states + + # 2. Cross-Attention + norm_hidden_states = self.norm2(hidden_states) + + attn_output = self.attn2( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=encoder_attention_mask, + ) + hidden_states = attn_output + hidden_states + + # 3. Feed-forward + norm_hidden_states = self.norm3(hidden_states) + ff_output = self.ff(norm_hidden_states) + + hidden_states = ff_output + hidden_states + + return hidden_states + + +class StableAudioDiTModel(ModelMixin, AttentionMixin, ConfigMixin): + """ + The Diffusion Transformer model introduced in Stable Audio. + + Reference: https://github.com/Stability-AI/stable-audio-tools + + Parameters: + sample_size ( `int`, *optional*, defaults to 1024): The size of the input sample. + in_channels (`int`, *optional*, defaults to 64): The number of channels in the input. + num_layers (`int`, *optional*, defaults to 24): The number of layers of Transformer blocks to use. + attention_head_dim (`int`, *optional*, defaults to 64): The number of channels in each head. + num_attention_heads (`int`, *optional*, defaults to 24): The number of heads to use for the query states. + num_key_value_attention_heads (`int`, *optional*, defaults to 12): + The number of heads to use for the key and value states. + out_channels (`int`, defaults to 64): Number of output channels. + cross_attention_dim ( `int`, *optional*, defaults to 768): Dimension of the cross-attention projection. + time_proj_dim ( `int`, *optional*, defaults to 256): Dimension of the timestep inner projection. + global_states_input_dim ( `int`, *optional*, defaults to 1536): + Input dimension of the global hidden states projection. + cross_attention_input_dim ( `int`, *optional*, defaults to 768): + Input dimension of the cross-attention projection + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["preprocess_conv", "postprocess_conv", "^proj_in$", "^proj_out$", "norm"] + + @register_to_config + def __init__( + self, + sample_size: int = 1024, + in_channels: int = 64, + num_layers: int = 24, + attention_head_dim: int = 64, + num_attention_heads: int = 24, + num_key_value_attention_heads: int = 12, + out_channels: int = 64, + cross_attention_dim: int = 768, + time_proj_dim: int = 256, + global_states_input_dim: int = 1536, + cross_attention_input_dim: int = 768, + ): + super().__init__() + self.sample_size = sample_size + self.out_channels = out_channels + self.inner_dim = num_attention_heads * attention_head_dim + + self.time_proj = StableAudioGaussianFourierProjection( + embedding_size=time_proj_dim // 2, + flip_sin_to_cos=True, + log=False, + set_W_to_weight=False, + ) + + self.timestep_proj = nn.Sequential( + nn.Linear(time_proj_dim, self.inner_dim, bias=True), + nn.SiLU(), + nn.Linear(self.inner_dim, self.inner_dim, bias=True), + ) + + self.global_proj = nn.Sequential( + nn.Linear(global_states_input_dim, self.inner_dim, bias=False), + nn.SiLU(), + nn.Linear(self.inner_dim, self.inner_dim, bias=False), + ) + + self.cross_attention_proj = nn.Sequential( + nn.Linear(cross_attention_input_dim, cross_attention_dim, bias=False), + nn.SiLU(), + nn.Linear(cross_attention_dim, cross_attention_dim, bias=False), + ) + + self.preprocess_conv = nn.Conv1d(in_channels, in_channels, 1, bias=False) + self.proj_in = nn.Linear(in_channels, self.inner_dim, bias=False) + + self.transformer_blocks = nn.ModuleList( + [ + StableAudioDiTBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + num_key_value_attention_heads=num_key_value_attention_heads, + attention_head_dim=attention_head_dim, + cross_attention_dim=cross_attention_dim, + ) + for i in range(num_layers) + ] + ) + + self.proj_out = nn.Linear(self.inner_dim, self.out_channels, bias=False) + self.postprocess_conv = nn.Conv1d(self.out_channels, self.out_channels, 1, bias=False) + + self.gradient_checkpointing = False + + # Copied from diffusers.models.transformers.hunyuan_transformer_2d.HunyuanDiT2DModel.set_default_attn_processor with Hunyuan->StableAudio + def set_default_attn_processor(self): + """ + Disables custom attention processors and sets the default attention implementation. + """ + self.set_attn_processor(StableAudioAttnProcessor2_0()) + + def forward( + self, + hidden_states: torch.FloatTensor, + timestep: torch.LongTensor = None, + encoder_hidden_states: torch.FloatTensor = None, + global_hidden_states: torch.FloatTensor = None, + rotary_embedding: torch.FloatTensor = None, + return_dict: bool = True, + attention_mask: torch.LongTensor | None = None, + encoder_attention_mask: torch.LongTensor | None = None, + ) -> torch.FloatTensor | Transformer2DModelOutput: + """ + The [`StableAudioDiTModel`] forward method. + + Args: + hidden_states (`torch.FloatTensor` of shape `(batch size, in_channels, sequence_len)`): + Input `hidden_states`. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, encoder_sequence_len, cross_attention_input_dim)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + global_hidden_states (`torch.FloatTensor` of shape `(batch size, global_sequence_len, global_states_input_dim)`): + Global embeddings that will be prepended to the hidden states. + rotary_embedding (`torch.Tensor`): + The rotary embeddings to apply on query and key tensors during attention calculation. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_len)`, *optional*): + Mask to avoid performing attention on padding token indices, formed by concatenating the attention + masks + for the two text encoders together. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + encoder_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_len)`, *optional*): + Mask to avoid performing attention on padding token cross-attention indices, formed by concatenating + the attention masks + for the two text encoders together. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + cross_attention_hidden_states = self.cross_attention_proj(encoder_hidden_states) + global_hidden_states = self.global_proj(global_hidden_states) + time_hidden_states = self.timestep_proj(self.time_proj(timestep.to(self.dtype))) + + global_hidden_states = global_hidden_states + time_hidden_states.unsqueeze(1) + + hidden_states = self.preprocess_conv(hidden_states) + hidden_states + # (batch_size, dim, sequence_length) -> (batch_size, sequence_length, dim) + hidden_states = hidden_states.transpose(1, 2) + + hidden_states = self.proj_in(hidden_states) + + # prepend global states to hidden states + hidden_states = torch.cat([global_hidden_states, hidden_states], dim=-2) + if attention_mask is not None: + prepend_mask = torch.ones((hidden_states.shape[0], 1), device=hidden_states.device, dtype=torch.bool) + attention_mask = torch.cat([prepend_mask, attention_mask], dim=-1) + + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + attention_mask, + cross_attention_hidden_states, + encoder_attention_mask, + rotary_embedding, + ) + + else: + hidden_states = block( + hidden_states=hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=cross_attention_hidden_states, + encoder_attention_mask=encoder_attention_mask, + rotary_embedding=rotary_embedding, + ) + + hidden_states = self.proj_out(hidden_states) + + # (batch_size, sequence_length, dim) -> (batch_size, dim, sequence_length) + # remove prepend length that has been added by global hidden states + hidden_states = hidden_states.transpose(1, 2)[:, :, 1:] + hidden_states = self.postprocess_conv(hidden_states) + hidden_states + + if not return_dict: + return (hidden_states,) + + return Transformer2DModelOutput(sample=hidden_states) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/t5_film_transformer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/t5_film_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..1ae2b1e3fedb7316525cf1405ea2ba4192783b65 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/t5_film_transformer.py @@ -0,0 +1,435 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ..attention_processor import Attention +from ..embeddings import get_timestep_embedding +from ..modeling_utils import ModelMixin + + +class T5FilmDecoder(ModelMixin, ConfigMixin): + r""" + T5 style decoder with FiLM conditioning. + + Args: + input_dims (`int`, *optional*, defaults to `128`): + The number of input dimensions. + targets_length (`int`, *optional*, defaults to `256`): + The length of the targets. + d_model (`int`, *optional*, defaults to `768`): + Size of the input hidden states. + num_layers (`int`, *optional*, defaults to `12`): + The number of `DecoderLayer`'s to use. + num_heads (`int`, *optional*, defaults to `12`): + The number of attention heads to use. + d_kv (`int`, *optional*, defaults to `64`): + Size of the key-value projection vectors. + d_ff (`int`, *optional*, defaults to `2048`): + The number of dimensions in the intermediate feed-forward layer of `DecoderLayer`'s. + dropout_rate (`float`, *optional*, defaults to `0.1`): + Dropout probability. + """ + + @register_to_config + def __init__( + self, + input_dims: int = 128, + targets_length: int = 256, + max_decoder_noise_time: float = 2000.0, + d_model: int = 768, + num_layers: int = 12, + num_heads: int = 12, + d_kv: int = 64, + d_ff: int = 2048, + dropout_rate: float = 0.1, + ): + super().__init__() + + self.conditioning_emb = nn.Sequential( + nn.Linear(d_model, d_model * 4, bias=False), + nn.SiLU(), + nn.Linear(d_model * 4, d_model * 4, bias=False), + nn.SiLU(), + ) + + self.position_encoding = nn.Embedding(targets_length, d_model) + self.position_encoding.weight.requires_grad = False + + self.continuous_inputs_projection = nn.Linear(input_dims, d_model, bias=False) + + self.dropout = nn.Dropout(p=dropout_rate) + + self.decoders = nn.ModuleList() + for lyr_num in range(num_layers): + # FiLM conditional T5 decoder + lyr = DecoderLayer(d_model=d_model, d_kv=d_kv, num_heads=num_heads, d_ff=d_ff, dropout_rate=dropout_rate) + self.decoders.append(lyr) + + self.decoder_norm = T5LayerNorm(d_model) + + self.post_dropout = nn.Dropout(p=dropout_rate) + self.spec_out = nn.Linear(d_model, input_dims, bias=False) + + def encoder_decoder_mask(self, query_input: torch.Tensor, key_input: torch.Tensor) -> torch.Tensor: + mask = torch.mul(query_input.unsqueeze(-1), key_input.unsqueeze(-2)) + return mask.unsqueeze(-3) + + def forward(self, encodings_and_masks, decoder_input_tokens, decoder_noise_time): + batch, _, _ = decoder_input_tokens.shape + assert decoder_noise_time.shape == (batch,) + + # decoder_noise_time is in [0, 1), so rescale to expected timing range. + time_steps = get_timestep_embedding( + decoder_noise_time * self.config.max_decoder_noise_time, + embedding_dim=self.config.d_model, + max_period=self.config.max_decoder_noise_time, + ).to(dtype=self.dtype) + + conditioning_emb = self.conditioning_emb(time_steps).unsqueeze(1) + + assert conditioning_emb.shape == (batch, 1, self.config.d_model * 4) + + seq_length = decoder_input_tokens.shape[1] + + # If we want to use relative positions for audio context, we can just offset + # this sequence by the length of encodings_and_masks. + decoder_positions = torch.broadcast_to( + torch.arange(seq_length, device=decoder_input_tokens.device), + (batch, seq_length), + ) + + position_encodings = self.position_encoding(decoder_positions) + + inputs = self.continuous_inputs_projection(decoder_input_tokens) + inputs += position_encodings + y = self.dropout(inputs) + + # decoder: No padding present. + decoder_mask = torch.ones( + decoder_input_tokens.shape[:2], device=decoder_input_tokens.device, dtype=inputs.dtype + ) + + # Translate encoding masks to encoder-decoder masks. + encodings_and_encdec_masks = [(x, self.encoder_decoder_mask(decoder_mask, y)) for x, y in encodings_and_masks] + + # cross attend style: concat encodings + encoded = torch.cat([x[0] for x in encodings_and_encdec_masks], dim=1) + encoder_decoder_mask = torch.cat([x[1] for x in encodings_and_encdec_masks], dim=-1) + + for lyr in self.decoders: + y = lyr( + y, + conditioning_emb=conditioning_emb, + encoder_hidden_states=encoded, + encoder_attention_mask=encoder_decoder_mask, + )[0] + + y = self.decoder_norm(y) + y = self.post_dropout(y) + + spec_out = self.spec_out(y) + return spec_out + + +class DecoderLayer(nn.Module): + r""" + T5 decoder layer. + + Args: + d_model (`int`): + Size of the input hidden states. + d_kv (`int`): + Size of the key-value projection vectors. + num_heads (`int`): + Number of attention heads. + d_ff (`int`): + Size of the intermediate feed-forward layer. + dropout_rate (`float`): + Dropout probability. + layer_norm_epsilon (`float`, *optional*, defaults to `1e-6`): + A small value used for numerical stability to avoid dividing by zero. + """ + + def __init__( + self, d_model: int, d_kv: int, num_heads: int, d_ff: int, dropout_rate: float, layer_norm_epsilon: float = 1e-6 + ): + super().__init__() + self.layer = nn.ModuleList() + + # cond self attention: layer 0 + self.layer.append( + T5LayerSelfAttentionCond(d_model=d_model, d_kv=d_kv, num_heads=num_heads, dropout_rate=dropout_rate) + ) + + # cross attention: layer 1 + self.layer.append( + T5LayerCrossAttention( + d_model=d_model, + d_kv=d_kv, + num_heads=num_heads, + dropout_rate=dropout_rate, + layer_norm_epsilon=layer_norm_epsilon, + ) + ) + + # Film Cond MLP + dropout: last layer + self.layer.append( + T5LayerFFCond(d_model=d_model, d_ff=d_ff, dropout_rate=dropout_rate, layer_norm_epsilon=layer_norm_epsilon) + ) + + def forward( + self, + hidden_states: torch.Tensor, + conditioning_emb: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + encoder_decoder_position_bias=None, + ) -> tuple[torch.Tensor]: + hidden_states = self.layer[0]( + hidden_states, + conditioning_emb=conditioning_emb, + attention_mask=attention_mask, + ) + + if encoder_hidden_states is not None: + encoder_extended_attention_mask = torch.where(encoder_attention_mask > 0, 0, -1e10).to( + encoder_hidden_states.dtype + ) + + hidden_states = self.layer[1]( + hidden_states, + key_value_states=encoder_hidden_states, + attention_mask=encoder_extended_attention_mask, + ) + + # Apply Film Conditional Feed Forward layer + hidden_states = self.layer[-1](hidden_states, conditioning_emb) + + return (hidden_states,) + + +class T5LayerSelfAttentionCond(nn.Module): + r""" + T5 style self-attention layer with conditioning. + + Args: + d_model (`int`): + Size of the input hidden states. + d_kv (`int`): + Size of the key-value projection vectors. + num_heads (`int`): + Number of attention heads. + dropout_rate (`float`): + Dropout probability. + """ + + def __init__(self, d_model: int, d_kv: int, num_heads: int, dropout_rate: float): + super().__init__() + self.layer_norm = T5LayerNorm(d_model) + self.FiLMLayer = T5FiLMLayer(in_features=d_model * 4, out_features=d_model) + self.attention = Attention(query_dim=d_model, heads=num_heads, dim_head=d_kv, out_bias=False, scale_qk=False) + self.dropout = nn.Dropout(dropout_rate) + + def forward( + self, + hidden_states: torch.Tensor, + conditioning_emb: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + # pre_self_attention_layer_norm + normed_hidden_states = self.layer_norm(hidden_states) + + if conditioning_emb is not None: + normed_hidden_states = self.FiLMLayer(normed_hidden_states, conditioning_emb) + + # Self-attention block + attention_output = self.attention(normed_hidden_states) + + hidden_states = hidden_states + self.dropout(attention_output) + + return hidden_states + + +class T5LayerCrossAttention(nn.Module): + r""" + T5 style cross-attention layer. + + Args: + d_model (`int`): + Size of the input hidden states. + d_kv (`int`): + Size of the key-value projection vectors. + num_heads (`int`): + Number of attention heads. + dropout_rate (`float`): + Dropout probability. + layer_norm_epsilon (`float`): + A small value used for numerical stability to avoid dividing by zero. + """ + + def __init__(self, d_model: int, d_kv: int, num_heads: int, dropout_rate: float, layer_norm_epsilon: float): + super().__init__() + self.attention = Attention(query_dim=d_model, heads=num_heads, dim_head=d_kv, out_bias=False, scale_qk=False) + self.layer_norm = T5LayerNorm(d_model, eps=layer_norm_epsilon) + self.dropout = nn.Dropout(dropout_rate) + + def forward( + self, + hidden_states: torch.Tensor, + key_value_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + normed_hidden_states = self.layer_norm(hidden_states) + attention_output = self.attention( + normed_hidden_states, + encoder_hidden_states=key_value_states, + attention_mask=attention_mask.squeeze(1), + ) + layer_output = hidden_states + self.dropout(attention_output) + return layer_output + + +class T5LayerFFCond(nn.Module): + r""" + T5 style feed-forward conditional layer. + + Args: + d_model (`int`): + Size of the input hidden states. + d_ff (`int`): + Size of the intermediate feed-forward layer. + dropout_rate (`float`): + Dropout probability. + layer_norm_epsilon (`float`): + A small value used for numerical stability to avoid dividing by zero. + """ + + def __init__(self, d_model: int, d_ff: int, dropout_rate: float, layer_norm_epsilon: float): + super().__init__() + self.DenseReluDense = T5DenseGatedActDense(d_model=d_model, d_ff=d_ff, dropout_rate=dropout_rate) + self.film = T5FiLMLayer(in_features=d_model * 4, out_features=d_model) + self.layer_norm = T5LayerNorm(d_model, eps=layer_norm_epsilon) + self.dropout = nn.Dropout(dropout_rate) + + def forward(self, hidden_states: torch.Tensor, conditioning_emb: torch.Tensor | None = None) -> torch.Tensor: + forwarded_states = self.layer_norm(hidden_states) + if conditioning_emb is not None: + forwarded_states = self.film(forwarded_states, conditioning_emb) + + forwarded_states = self.DenseReluDense(forwarded_states) + hidden_states = hidden_states + self.dropout(forwarded_states) + return hidden_states + + +class T5DenseGatedActDense(nn.Module): + r""" + T5 style feed-forward layer with gated activations and dropout. + + Args: + d_model (`int`): + Size of the input hidden states. + d_ff (`int`): + Size of the intermediate feed-forward layer. + dropout_rate (`float`): + Dropout probability. + """ + + def __init__(self, d_model: int, d_ff: int, dropout_rate: float): + super().__init__() + self.wi_0 = nn.Linear(d_model, d_ff, bias=False) + self.wi_1 = nn.Linear(d_model, d_ff, bias=False) + self.wo = nn.Linear(d_ff, d_model, bias=False) + self.dropout = nn.Dropout(dropout_rate) + self.act = NewGELUActivation() + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_gelu = self.act(self.wi_0(hidden_states)) + hidden_linear = self.wi_1(hidden_states) + hidden_states = hidden_gelu * hidden_linear + hidden_states = self.dropout(hidden_states) + + hidden_states = self.wo(hidden_states) + return hidden_states + + +class T5LayerNorm(nn.Module): + r""" + T5 style layer normalization module. + + Args: + hidden_size (`int`): + Size of the input hidden states. + eps (`float`, `optional`, defaults to `1e-6`): + A small value used for numerical stability to avoid dividing by zero. + """ + + def __init__(self, hidden_size: int, eps: float = 1e-6): + """ + Construct a layernorm module in the T5 style. No bias and no subtraction of mean. + """ + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + # T5 uses a layer_norm which only scales and doesn't shift, which is also known as Root Mean + # Square Layer Normalization https://huggingface.co/papers/1910.07467 thus variance is calculated + # w/o mean and there is no bias. Additionally we want to make sure that the accumulation for + # half-precision inputs is done in fp32 + + variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + + # convert into half-precision if necessary + if self.weight.dtype in [torch.float16, torch.bfloat16]: + hidden_states = hidden_states.to(self.weight.dtype) + + return self.weight * hidden_states + + +class NewGELUActivation(nn.Module): + """ + Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see + the Gaussian Error Linear Units paper: https://huggingface.co/papers/1606.08415 + """ + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return 0.5 * input * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (input + 0.044715 * torch.pow(input, 3.0)))) + + +class T5FiLMLayer(nn.Module): + """ + T5 style FiLM Layer. + + Args: + in_features (`int`): + Number of input features. + out_features (`int`): + Number of output features. + """ + + def __init__(self, in_features: int, out_features: int): + super().__init__() + self.scale_bias = nn.Linear(in_features, out_features * 2, bias=False) + + def forward(self, x: torch.Tensor, conditioning_emb: torch.Tensor) -> torch.Tensor: + emb = self.scale_bias(conditioning_emb) + scale, shift = torch.chunk(emb, 2, -1) + x = x * (1 + scale) + shift + return x diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_2d.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..12f89201d752407c2a9ceeddfe6663ec2bfde769 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_2d.py @@ -0,0 +1,551 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any + +import torch +import torch.nn.functional as F +from torch import nn + +from ...configuration_utils import LegacyConfigMixin, register_to_config +from ...utils import deprecate, logging +from ..attention import BasicTransformerBlock +from ..embeddings import ImagePositionalEmbeddings, PatchEmbed, PixArtAlphaTextProjection +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import LegacyModelMixin +from ..normalization import AdaLayerNormSingle + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class Transformer2DModelOutput(Transformer2DModelOutput): + def __init__(self, *args, **kwargs): + deprecation_message = "Importing `Transformer2DModelOutput` from `diffusers.models.transformer_2d` is deprecated and this will be removed in a future version. Please use `from diffusers.models.modeling_outputs import Transformer2DModelOutput`, instead." + deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message) + super().__init__(*args, **kwargs) + + +class Transformer2DModel(LegacyModelMixin, LegacyConfigMixin): + """ + A 2D Transformer model for image-like data. + + Parameters: + num_attention_heads (`int`, *optional*, defaults to 16): The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 88): The number of channels in each head. + in_channels (`int`, *optional*): + The number of channels in the input and output (specify if the input is **continuous**). + num_layers (`int`, *optional*, defaults to 1): The number of layers of Transformer blocks to use. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + cross_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use. + sample_size (`int`, *optional*): The width of the latent images (specify if the input is **discrete**). + This is fixed during training since it is used to learn a number of position embeddings. + num_vector_embeds (`int`, *optional*): + The number of classes of the vector embeddings of the latent pixels (specify if the input is **discrete**). + Includes the class for the masked latent pixel. + activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to use in feed-forward. + num_embeds_ada_norm ( `int`, *optional*): + The number of diffusion steps used during training. Pass if at least one of the norm_layers is + `AdaLayerNorm`. This is fixed during training since it is used to learn a number of embeddings that are + added to the hidden states. + + During inference, you can denoise for up to but not more steps than `num_embeds_ada_norm`. + attention_bias (`bool`, *optional*): + Configure if the `TransformerBlocks` attention should contain a bias parameter. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["BasicTransformerBlock"] + _skip_layerwise_casting_patterns = ["latent_image_embedding", "norm"] + + @register_to_config + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: int | None = None, + out_channels: int | None = None, + num_layers: int = 1, + dropout: float = 0.0, + norm_num_groups: int = 32, + cross_attention_dim: int | None = None, + attention_bias: bool = False, + sample_size: int | None = None, + num_vector_embeds: int | None = None, + patch_size: int | None = None, + activation_fn: str = "geglu", + num_embeds_ada_norm: int | None = None, + use_linear_projection: bool = False, + only_cross_attention: bool = False, + double_self_attention: bool = False, + upcast_attention: bool = False, + norm_type: str = "layer_norm", # 'layer_norm', 'ada_norm', 'ada_norm_zero', 'ada_norm_single', 'ada_norm_continuous', 'layer_norm_i2vgen' + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-5, + attention_type: str = "default", + caption_channels: int = None, + interpolation_scale: float = None, + use_additional_conditions: bool | None = None, + ): + super().__init__() + + # Validate inputs. + if patch_size is not None: + if norm_type not in ["ada_norm", "ada_norm_zero", "ada_norm_single"]: + raise NotImplementedError( + f"Forward pass is not implemented when `patch_size` is not None and `norm_type` is '{norm_type}'." + ) + elif norm_type in ["ada_norm", "ada_norm_zero"] and num_embeds_ada_norm is None: + raise ValueError( + f"When using a `patch_size` and this `norm_type` ({norm_type}), `num_embeds_ada_norm` cannot be None." + ) + + # 1. Transformer2DModel can process both standard continuous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)` + # Define whether input is continuous or discrete depending on configuration + self.is_input_continuous = (in_channels is not None) and (patch_size is None) + self.is_input_vectorized = num_vector_embeds is not None + self.is_input_patches = in_channels is not None and patch_size is not None + + if self.is_input_continuous and self.is_input_vectorized: + raise ValueError( + f"Cannot define both `in_channels`: {in_channels} and `num_vector_embeds`: {num_vector_embeds}. Make" + " sure that either `in_channels` or `num_vector_embeds` is None." + ) + elif self.is_input_vectorized and self.is_input_patches: + raise ValueError( + f"Cannot define both `num_vector_embeds`: {num_vector_embeds} and `patch_size`: {patch_size}. Make" + " sure that either `num_vector_embeds` or `num_patches` is None." + ) + elif not self.is_input_continuous and not self.is_input_vectorized and not self.is_input_patches: + raise ValueError( + f"Has to define `in_channels`: {in_channels}, `num_vector_embeds`: {num_vector_embeds}, or patch_size:" + f" {patch_size}. Make sure that `in_channels`, `num_vector_embeds` or `num_patches` is not None." + ) + + if norm_type == "layer_norm" and num_embeds_ada_norm is not None: + deprecation_message = ( + f"The configuration file of this model: {self.__class__} is outdated. `norm_type` is either not set or" + " incorrectly set to `'layer_norm'`. Make sure to set `norm_type` to `'ada_norm'` in the config." + " Please make sure to update the config accordingly as leaving `norm_type` might led to incorrect" + " results in future versions. If you have downloaded this checkpoint from the Hugging Face Hub, it" + " would be very nice if you could open a Pull request for the `transformer/config.json` file" + ) + deprecate("norm_type!=num_embeds_ada_norm", "1.0.0", deprecation_message, standard_warn=False) + norm_type = "ada_norm" + + # Set some common variables used across the board. + self.use_linear_projection = use_linear_projection + self.interpolation_scale = interpolation_scale + self.caption_channels = caption_channels + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim + self.in_channels = in_channels + self.out_channels = in_channels if out_channels is None else out_channels + self.gradient_checkpointing = False + + if use_additional_conditions is None: + if norm_type == "ada_norm_single" and sample_size == 128: + use_additional_conditions = True + else: + use_additional_conditions = False + self.use_additional_conditions = use_additional_conditions + + # 2. Initialize the right blocks. + # These functions follow a common structure: + # a. Initialize the input blocks. b. Initialize the transformer blocks. + # c. Initialize the output blocks and other projection blocks when necessary. + if self.is_input_continuous: + self._init_continuous_input(norm_type=norm_type) + elif self.is_input_vectorized: + self._init_vectorized_inputs(norm_type=norm_type) + elif self.is_input_patches: + self._init_patched_inputs(norm_type=norm_type) + + def _init_continuous_input(self, norm_type): + self.norm = torch.nn.GroupNorm( + num_groups=self.config.norm_num_groups, num_channels=self.in_channels, eps=1e-6, affine=True + ) + if self.use_linear_projection: + self.proj_in = torch.nn.Linear(self.in_channels, self.inner_dim) + else: + self.proj_in = torch.nn.Conv2d(self.in_channels, self.inner_dim, kernel_size=1, stride=1, padding=0) + + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + self.inner_dim, + self.config.num_attention_heads, + self.config.attention_head_dim, + dropout=self.config.dropout, + cross_attention_dim=self.config.cross_attention_dim, + activation_fn=self.config.activation_fn, + num_embeds_ada_norm=self.config.num_embeds_ada_norm, + attention_bias=self.config.attention_bias, + only_cross_attention=self.config.only_cross_attention, + double_self_attention=self.config.double_self_attention, + upcast_attention=self.config.upcast_attention, + norm_type=norm_type, + norm_elementwise_affine=self.config.norm_elementwise_affine, + norm_eps=self.config.norm_eps, + attention_type=self.config.attention_type, + ) + for _ in range(self.config.num_layers) + ] + ) + + if self.use_linear_projection: + self.proj_out = torch.nn.Linear(self.inner_dim, self.out_channels) + else: + self.proj_out = torch.nn.Conv2d(self.inner_dim, self.out_channels, kernel_size=1, stride=1, padding=0) + + def _init_vectorized_inputs(self, norm_type): + assert self.config.sample_size is not None, "Transformer2DModel over discrete input must provide sample_size" + assert self.config.num_vector_embeds is not None, ( + "Transformer2DModel over discrete input must provide num_embed" + ) + + self.height = self.config.sample_size + self.width = self.config.sample_size + self.num_latent_pixels = self.height * self.width + + self.latent_image_embedding = ImagePositionalEmbeddings( + num_embed=self.config.num_vector_embeds, embed_dim=self.inner_dim, height=self.height, width=self.width + ) + + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + self.inner_dim, + self.config.num_attention_heads, + self.config.attention_head_dim, + dropout=self.config.dropout, + cross_attention_dim=self.config.cross_attention_dim, + activation_fn=self.config.activation_fn, + num_embeds_ada_norm=self.config.num_embeds_ada_norm, + attention_bias=self.config.attention_bias, + only_cross_attention=self.config.only_cross_attention, + double_self_attention=self.config.double_self_attention, + upcast_attention=self.config.upcast_attention, + norm_type=norm_type, + norm_elementwise_affine=self.config.norm_elementwise_affine, + norm_eps=self.config.norm_eps, + attention_type=self.config.attention_type, + ) + for _ in range(self.config.num_layers) + ] + ) + + self.norm_out = nn.LayerNorm(self.inner_dim) + self.out = nn.Linear(self.inner_dim, self.config.num_vector_embeds - 1) + + def _init_patched_inputs(self, norm_type): + assert self.config.sample_size is not None, "Transformer2DModel over patched input must provide sample_size" + + self.height = self.config.sample_size + self.width = self.config.sample_size + + self.patch_size = self.config.patch_size + interpolation_scale = ( + self.config.interpolation_scale + if self.config.interpolation_scale is not None + else max(self.config.sample_size // 64, 1) + ) + self.pos_embed = PatchEmbed( + height=self.config.sample_size, + width=self.config.sample_size, + patch_size=self.config.patch_size, + in_channels=self.in_channels, + embed_dim=self.inner_dim, + interpolation_scale=interpolation_scale, + ) + + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + self.inner_dim, + self.config.num_attention_heads, + self.config.attention_head_dim, + dropout=self.config.dropout, + cross_attention_dim=self.config.cross_attention_dim, + activation_fn=self.config.activation_fn, + num_embeds_ada_norm=self.config.num_embeds_ada_norm, + attention_bias=self.config.attention_bias, + only_cross_attention=self.config.only_cross_attention, + double_self_attention=self.config.double_self_attention, + upcast_attention=self.config.upcast_attention, + norm_type=norm_type, + norm_elementwise_affine=self.config.norm_elementwise_affine, + norm_eps=self.config.norm_eps, + attention_type=self.config.attention_type, + ) + for _ in range(self.config.num_layers) + ] + ) + + if self.config.norm_type != "ada_norm_single": + self.norm_out = nn.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out_1 = nn.Linear(self.inner_dim, 2 * self.inner_dim) + self.proj_out_2 = nn.Linear( + self.inner_dim, self.config.patch_size * self.config.patch_size * self.out_channels + ) + elif self.config.norm_type == "ada_norm_single": + self.norm_out = nn.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6) + self.scale_shift_table = nn.Parameter(torch.randn(2, self.inner_dim) / self.inner_dim**0.5) + self.proj_out = nn.Linear( + self.inner_dim, self.config.patch_size * self.config.patch_size * self.out_channels + ) + + # PixArt-Alpha blocks. + self.adaln_single = None + if self.config.norm_type == "ada_norm_single": + # TODO(Sayak, PVP) clean this, for now we use sample size to determine whether to use + # additional conditions until we find better name + self.adaln_single = AdaLayerNormSingle( + self.inner_dim, use_additional_conditions=self.use_additional_conditions + ) + + self.caption_projection = None + if self.caption_channels is not None: + self.caption_projection = PixArtAlphaTextProjection( + in_features=self.caption_channels, hidden_size=self.inner_dim + ) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + timestep: torch.LongTensor | None = None, + added_cond_kwargs: dict[str, torch.Tensor] = None, + class_labels: torch.LongTensor | None = None, + cross_attention_kwargs: dict[str, Any] = None, + attention_mask: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + return_dict: bool = True, + ): + """ + The [`Transformer2DModel`] forward method. + + Args: + hidden_states (`torch.LongTensor` of shape `(batch size, num latent pixels)` if discrete, `torch.Tensor` of shape `(batch size, channel, height, width)` if continuous): + Input `hidden_states`. + encoder_hidden_states ( `torch.Tensor` of shape `(batch size, sequence len, embed dims)`, *optional*): + Conditional embeddings for cross attention layer. If not given, cross-attention defaults to + self-attention. + timestep ( `torch.LongTensor`, *optional*): + Used to indicate denoising step. Optional timestep to be applied as an embedding in `AdaLayerNorm`. + class_labels ( `torch.LongTensor` of shape `(batch size, num classes)`, *optional*): + Used to indicate class labels conditioning. Optional class labels to be applied as an embedding in + `AdaLayerZeroNorm`. + cross_attention_kwargs ( `dict[str, Any]`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + attention_mask ( `torch.Tensor`, *optional*): + An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask + is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large + negative values to the attention scores corresponding to "discard" tokens. + encoder_attention_mask ( `torch.Tensor`, *optional*): + Cross-attention mask applied to `encoder_hidden_states`. Two formats supported: + + * Mask `(batch, sequence_length)` True = keep, False = discard. + * Bias `(batch, 1, sequence_length)` 0 = keep, -10000 = discard. + + If `ndim == 2`: will be interpreted as a mask, then converted into a bias consistent with the format + above. This bias will be added to the cross-attention scores. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.unets.unet_2d_condition.UNet2DConditionOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformers.transformer_2d.Transformer2DModelOutput`] is returned, + otherwise a `tuple` where the first element is the sample tensor. + """ + if cross_attention_kwargs is not None: + if cross_attention_kwargs.get("scale", None) is not None: + logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") + # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. + # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. + # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. + # expects mask of shape: + # [batch, key_tokens] + # adds singleton query_tokens dimension: + # [batch, 1, key_tokens] + # this helps to broadcast it as a bias over attention scores, which will be in one of the following shapes: + # [batch, heads, query_tokens, key_tokens] (e.g. torch sdp attn) + # [batch * heads, query_tokens, key_tokens] (e.g. xformers or classic attn) + if attention_mask is not None and attention_mask.ndim == 2: + # assume that mask is expressed as: + # (1 = keep, 0 = discard) + # convert mask into a bias that can be added to attention scores: + # (keep = +0, discard = -10000.0) + attention_mask = (1 - attention_mask.to(hidden_states.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # convert encoder_attention_mask to a bias the same way we do for attention_mask + if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2: + encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0 + encoder_attention_mask = encoder_attention_mask.unsqueeze(1) + + # 1. Input + if self.is_input_continuous: + batch_size, _, height, width = hidden_states.shape + residual = hidden_states + hidden_states, inner_dim = self._operate_on_continuous_inputs(hidden_states) + elif self.is_input_vectorized: + hidden_states = self.latent_image_embedding(hidden_states) + elif self.is_input_patches: + height, width = hidden_states.shape[-2] // self.patch_size, hidden_states.shape[-1] // self.patch_size + hidden_states, encoder_hidden_states, timestep, embedded_timestep = self._operate_on_patched_inputs( + hidden_states, encoder_hidden_states, timestep, added_cond_kwargs + ) + + # 2. Blocks + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + timestep, + cross_attention_kwargs, + class_labels, + ) + else: + hidden_states = block( + hidden_states, + attention_mask=attention_mask, + encoder_hidden_states=encoder_hidden_states, + encoder_attention_mask=encoder_attention_mask, + timestep=timestep, + cross_attention_kwargs=cross_attention_kwargs, + class_labels=class_labels, + ) + + # 3. Output + if self.is_input_continuous: + output = self._get_output_for_continuous_inputs( + hidden_states=hidden_states, + residual=residual, + batch_size=batch_size, + height=height, + width=width, + inner_dim=inner_dim, + ) + elif self.is_input_vectorized: + output = self._get_output_for_vectorized_inputs(hidden_states) + elif self.is_input_patches: + output = self._get_output_for_patched_inputs( + hidden_states=hidden_states, + timestep=timestep, + class_labels=class_labels, + embedded_timestep=embedded_timestep, + height=height, + width=width, + ) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) + + def _operate_on_continuous_inputs(self, hidden_states): + batch, _, height, width = hidden_states.shape + hidden_states = self.norm(hidden_states) + + if not self.use_linear_projection: + hidden_states = self.proj_in(hidden_states) + inner_dim = hidden_states.shape[1] + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * width, inner_dim) + else: + inner_dim = hidden_states.shape[1] + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * width, inner_dim) + hidden_states = self.proj_in(hidden_states) + + return hidden_states, inner_dim + + def _operate_on_patched_inputs(self, hidden_states, encoder_hidden_states, timestep, added_cond_kwargs): + batch_size = hidden_states.shape[0] + hidden_states = self.pos_embed(hidden_states) + embedded_timestep = None + + if self.adaln_single is not None: + if self.use_additional_conditions and added_cond_kwargs is None: + raise ValueError( + "`added_cond_kwargs` cannot be None when using additional conditions for `adaln_single`." + ) + timestep, embedded_timestep = self.adaln_single( + timestep, added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_states.dtype + ) + + if self.caption_projection is not None: + encoder_hidden_states = self.caption_projection(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.shape[-1]) + + return hidden_states, encoder_hidden_states, timestep, embedded_timestep + + def _get_output_for_continuous_inputs(self, hidden_states, residual, batch_size, height, width, inner_dim): + if not self.use_linear_projection: + hidden_states = ( + hidden_states.reshape(batch_size, height, width, inner_dim).permute(0, 3, 1, 2).contiguous() + ) + hidden_states = self.proj_out(hidden_states) + else: + hidden_states = self.proj_out(hidden_states) + hidden_states = ( + hidden_states.reshape(batch_size, height, width, inner_dim).permute(0, 3, 1, 2).contiguous() + ) + + output = hidden_states + residual + return output + + def _get_output_for_vectorized_inputs(self, hidden_states): + hidden_states = self.norm_out(hidden_states) + logits = self.out(hidden_states) + # (batch, self.num_vector_embeds - 1, self.num_latent_pixels) + logits = logits.permute(0, 2, 1) + # log(p(x_0)) + output = F.log_softmax(logits.double(), dim=1).float() + return output + + def _get_output_for_patched_inputs( + self, hidden_states, timestep, class_labels, embedded_timestep, height=None, width=None + ): + if self.config.norm_type != "ada_norm_single": + conditioning = self.transformer_blocks[0].norm1.emb( + timestep, class_labels, hidden_dtype=hidden_states.dtype + ) + shift, scale = self.proj_out_1(F.silu(conditioning)).chunk(2, dim=1) + hidden_states = self.norm_out(hidden_states) * (1 + scale[:, None]) + shift[:, None] + hidden_states = self.proj_out_2(hidden_states) + elif self.config.norm_type == "ada_norm_single": + shift, scale = (self.scale_shift_table[None] + embedded_timestep[:, None]).chunk(2, dim=1) + hidden_states = self.norm_out(hidden_states) + # Modulation + hidden_states = hidden_states * (1 + scale) + shift + hidden_states = self.proj_out(hidden_states) + hidden_states = hidden_states.squeeze(1) + + # unpatchify + if self.adaln_single is None: + height = width = int(hidden_states.shape[1] ** 0.5) + hidden_states = hidden_states.reshape( + shape=(-1, height, width, self.patch_size, self.patch_size, self.out_channels) + ) + hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(-1, self.out_channels, height * self.patch_size, width * self.patch_size) + ) + return output diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_allegro.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_allegro.py new file mode 100644 index 0000000000000000000000000000000000000000..934e2787674f4e183ea96e27d61cd820e0bf9200 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_allegro.py @@ -0,0 +1,412 @@ +# Copyright 2025 The RhymesAI and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import FeedForward +from ..attention_processor import AllegroAttnProcessor2_0, Attention +from ..cache_utils import CacheMixin +from ..embeddings import PatchEmbed, PixArtAlphaTextProjection +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormSingle + + +logger = logging.get_logger(__name__) + + +@maybe_allow_in_graph +class AllegroTransformerBlock(nn.Module): + r""" + Transformer block used in [Allegro](https://github.com/rhymes-ai/Allegro) model. + + Args: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`): + The number of channels in each head. + dropout (`float`, defaults to `0.0`): + The dropout probability to use. + cross_attention_dim (`int`, defaults to `2304`): + The dimension of the cross attention features. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to be used in feed-forward. + attention_bias (`bool`, defaults to `False`): + Whether or not to use bias in attention projection layers. + only_cross_attention (`bool`, defaults to `False`): + norm_elementwise_affine (`bool`, defaults to `True`): + Whether to use learnable elementwise affine parameters for normalization. + norm_eps (`float`, defaults to `1e-5`): + Epsilon value for normalization layers. + final_dropout (`bool` defaults to `False`): + Whether to apply a final dropout after the last feed-forward layer. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + dropout=0.0, + cross_attention_dim: int | None = None, + activation_fn: str = "geglu", + attention_bias: bool = False, + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-5, + ): + super().__init__() + + # 1. Self Attention + self.norm1 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + cross_attention_dim=None, + processor=AllegroAttnProcessor2_0(), + ) + + # 2. Cross Attention + self.norm2 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + self.attn2 = Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + processor=AllegroAttnProcessor2_0(), + ) + + # 3. Feed Forward + self.norm3 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + + self.ff = FeedForward( + dim, + dropout=dropout, + activation_fn=activation_fn, + ) + + # 4. Scale-shift + self.scale_shift_table = nn.Parameter(torch.randn(6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + temb: torch.LongTensor | None = None, + attention_mask: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + image_rotary_emb=None, + ) -> torch.Tensor: + # 0. Self-Attention + batch_size = hidden_states.shape[0] + + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = ( + self.scale_shift_table[None] + temb.reshape(batch_size, 6, -1) + ).chunk(6, dim=1) + norm_hidden_states = self.norm1(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa + norm_hidden_states = norm_hidden_states.squeeze(1) + + attn_output = self.attn1( + norm_hidden_states, + encoder_hidden_states=None, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + attn_output = gate_msa * attn_output + + hidden_states = attn_output + hidden_states + if hidden_states.ndim == 4: + hidden_states = hidden_states.squeeze(1) + + # 1. Cross-Attention + if self.attn2 is not None: + norm_hidden_states = hidden_states + + attn_output = self.attn2( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=encoder_attention_mask, + image_rotary_emb=None, + ) + hidden_states = attn_output + hidden_states + + # 2. Feed-forward + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp + + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp * ff_output + + hidden_states = ff_output + hidden_states + + # TODO(aryan): maybe following line is not required + if hidden_states.ndim == 4: + hidden_states = hidden_states.squeeze(1) + + return hidden_states + + +class AllegroTransformer3DModel(ModelMixin, ConfigMixin, CacheMixin): + _supports_gradient_checkpointing = True + + """ + A 3D Transformer model for video-like data. + + Args: + patch_size (`int`, defaults to `2`): + The size of spatial patches to use in the patch embedding layer. + patch_size_t (`int`, defaults to `1`): + The size of temporal patches to use in the patch embedding layer. + num_attention_heads (`int`, defaults to `24`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `96`): + The number of channels in each head. + in_channels (`int`, defaults to `4`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `4`): + The number of channels in the output. + num_layers (`int`, defaults to `32`): + The number of layers of Transformer blocks to use. + dropout (`float`, defaults to `0.0`): + The dropout probability to use. + cross_attention_dim (`int`, defaults to `2304`): + The dimension of the cross attention features. + attention_bias (`bool`, defaults to `True`): + Whether or not to use bias in the attention projection layers. + sample_height (`int`, defaults to `90`): + The height of the input latents. + sample_width (`int`, defaults to `160`): + The width of the input latents. + sample_frames (`int`, defaults to `22`): + The number of frames in the input latents. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to use in feed-forward. + norm_elementwise_affine (`bool`, defaults to `False`): + Whether or not to use elementwise affine in normalization layers. + norm_eps (`float`, defaults to `1e-6`): + The epsilon value to use in normalization layers. + caption_channels (`int`, defaults to `4096`): + Number of channels to use for projecting the caption embeddings. + interpolation_scale_h (`float`, defaults to `2.0`): + Scaling factor to apply in 3D positional embeddings across height dimension. + interpolation_scale_w (`float`, defaults to `2.0`): + Scaling factor to apply in 3D positional embeddings across width dimension. + interpolation_scale_t (`float`, defaults to `2.2`): + Scaling factor to apply in 3D positional embeddings across time dimension. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["pos_embed", "norm", "adaln_single"] + + @register_to_config + def __init__( + self, + patch_size: int = 2, + patch_size_t: int = 1, + num_attention_heads: int = 24, + attention_head_dim: int = 96, + in_channels: int = 4, + out_channels: int = 4, + num_layers: int = 32, + dropout: float = 0.0, + cross_attention_dim: int = 2304, + attention_bias: bool = True, + sample_height: int = 90, + sample_width: int = 160, + sample_frames: int = 22, + activation_fn: str = "gelu-approximate", + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + caption_channels: int = 4096, + interpolation_scale_h: float = 2.0, + interpolation_scale_w: float = 2.0, + interpolation_scale_t: float = 2.2, + ): + super().__init__() + + self.inner_dim = num_attention_heads * attention_head_dim + + interpolation_scale_t = ( + interpolation_scale_t + if interpolation_scale_t is not None + else ((sample_frames - 1) // 16 + 1) + if sample_frames % 2 == 1 + else sample_frames // 16 + ) + interpolation_scale_h = interpolation_scale_h if interpolation_scale_h is not None else sample_height / 30 + interpolation_scale_w = interpolation_scale_w if interpolation_scale_w is not None else sample_width / 40 + + # 1. Patch embedding + self.pos_embed = PatchEmbed( + height=sample_height, + width=sample_width, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=self.inner_dim, + pos_embed_type=None, + ) + + # 2. Transformer blocks + self.transformer_blocks = nn.ModuleList( + [ + AllegroTransformerBlock( + self.inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + cross_attention_dim=cross_attention_dim, + activation_fn=activation_fn, + attention_bias=attention_bias, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + ) + for _ in range(num_layers) + ] + ) + + # 3. Output projection & norm + self.norm_out = nn.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6) + self.scale_shift_table = nn.Parameter(torch.randn(2, self.inner_dim) / self.inner_dim**0.5) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * out_channels) + + # 4. Timestep embeddings + self.adaln_single = AdaLayerNormSingle(self.inner_dim, use_additional_conditions=False) + + # 5. Caption projection + self.caption_projection = PixArtAlphaTextProjection(in_features=caption_channels, hidden_size=self.inner_dim) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: torch.LongTensor, + attention_mask: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + return_dict: bool = True, + ): + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t = self.config.patch_size_t + p = self.config.patch_size + + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p + post_patch_width = width // p + + # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. + # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. + # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. + # expects mask of shape: + # [batch, key_tokens] + # adds singleton query_tokens dimension: + # [batch, 1, key_tokens] + # this helps to broadcast it as a bias over attention scores, which will be in one of the following shapes: + # [batch, heads, query_tokens, key_tokens] (e.g. torch sdp attn) + # [batch * heads, query_tokens, key_tokens] (e.g. xformers or classic attn) attention_mask_vid, attention_mask_img = None, None + if attention_mask is not None and attention_mask.ndim == 4: + # assume that mask is expressed as: + # (1 = keep, 0 = discard) + # convert mask into a bias that can be added to attention scores: + # (keep = +0, discard = -10000.0) + # b, frame+use_image_num, h, w -> a video with images + # b, 1, h, w -> only images + attention_mask = attention_mask.to(hidden_states.dtype) + attention_mask = attention_mask[:, :num_frames] # [batch_size, num_frames, height, width] + + if attention_mask.numel() > 0: + attention_mask = attention_mask.unsqueeze(1) # [batch_size, 1, num_frames, height, width] + attention_mask = F.max_pool3d(attention_mask, kernel_size=(p_t, p, p), stride=(p_t, p, p)) + attention_mask = attention_mask.flatten(1).view(batch_size, 1, -1) + + attention_mask = ( + (1 - attention_mask.bool().to(hidden_states.dtype)) * -10000.0 if attention_mask.numel() > 0 else None + ) + + # convert encoder_attention_mask to a bias the same way we do for attention_mask + if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2: + encoder_attention_mask = (1 - encoder_attention_mask.to(self.dtype)) * -10000.0 + encoder_attention_mask = encoder_attention_mask.unsqueeze(1) + + # 1. Timestep embeddings + timestep, embedded_timestep = self.adaln_single( + timestep, batch_size=batch_size, hidden_dtype=hidden_states.dtype + ) + + # 2. Patch embeddings + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).flatten(0, 1) + hidden_states = self.pos_embed(hidden_states) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)).flatten(1, 2) + + encoder_hidden_states = self.caption_projection(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, encoder_hidden_states.shape[-1]) + + # 3. Transformer blocks + for i, block in enumerate(self.transformer_blocks): + # TODO(aryan): Implement gradient checkpointing + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + timestep, + attention_mask, + encoder_attention_mask, + image_rotary_emb, + ) + else: + hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=timestep, + attention_mask=attention_mask, + encoder_attention_mask=encoder_attention_mask, + image_rotary_emb=image_rotary_emb, + ) + + # 4. Output normalization & projection + shift, scale = (self.scale_shift_table[None] + embedded_timestep[:, None]).chunk(2, dim=1) + hidden_states = self.norm_out(hidden_states) + + # Modulation + hidden_states = hidden_states * (1 + scale) + shift + hidden_states = self.proj_out(hidden_states) + hidden_states = hidden_states.squeeze(1) + + # 5. Unpatchify + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, p_t, p, p, -1 + ) + hidden_states = hidden_states.permute(0, 7, 1, 4, 2, 5, 3, 6) + output = hidden_states.reshape(batch_size, -1, num_frames, height, width) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_bria.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_bria.py new file mode 100644 index 0000000000000000000000000000000000000000..99b7bbfd64cf7fdfda634c9596ca6f8a6b35fcd8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_bria.py @@ -0,0 +1,708 @@ +import inspect +from typing import Any + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import TimestepEmbedding, apply_rotary_emb, get_timestep_embedding +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _get_projections(attn: "BriaAttention", hidden_states, encoder_hidden_states=None): + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + encoder_query = encoder_key = encoder_value = None + if encoder_hidden_states is not None and attn.added_kv_proj_dim is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_fused_projections(attn: "BriaAttention", hidden_states, encoder_hidden_states=None): + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + + encoder_query = encoder_key = encoder_value = (None,) + if encoder_hidden_states is not None and hasattr(attn, "to_added_qkv"): + encoder_query, encoder_key, encoder_value = attn.to_added_qkv(encoder_hidden_states).chunk(3, dim=-1) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_qkv_projections(attn: "BriaAttention", hidden_states, encoder_hidden_states=None): + if attn.fused_projections: + return _get_fused_projections(attn, hidden_states, encoder_hidden_states) + return _get_projections(attn, hidden_states, encoder_hidden_states) + + +def get_1d_rotary_pos_embed( + dim: int, + pos: np.ndarray | int, + theta: float = 10000.0, + use_real=False, + linear_factor=1.0, + ntk_factor=1.0, + repeat_interleave_real=True, + freqs_dtype=torch.float32, # torch.float32, torch.float64 (flux) +): + """ + Precompute the frequency tensor for complex exponentials (cis) with given dimensions. + + This function calculates a frequency tensor with complex exponentials using the given dimension 'dim' and the end + index 'end'. The 'theta' parameter scales the frequencies. The returned tensor contains complex values in complex64 + data type. + + Args: + dim (`int`): Dimension of the frequency tensor. + pos (`np.ndarray` or `int`): Position indices for the frequency tensor. [S] or scalar + theta (`float`, *optional*, defaults to 10000.0): + Scaling factor for frequency computation. Defaults to 10000.0. + use_real (`bool`, *optional*): + If True, return real part and imaginary part separately. Otherwise, return complex numbers. + linear_factor (`float`, *optional*, defaults to 1.0): + Scaling factor for the context extrapolation. Defaults to 1.0. + ntk_factor (`float`, *optional*, defaults to 1.0): + Scaling factor for the NTK-Aware RoPE. Defaults to 1.0. + repeat_interleave_real (`bool`, *optional*, defaults to `True`): + If `True` and `use_real`, real part and imaginary part are each interleaved with themselves to reach `dim`. + Otherwise, they are concateanted with themselves. + freqs_dtype (`torch.float32` or `torch.float64`, *optional*, defaults to `torch.float32`): + the dtype of the frequency tensor. + Returns: + `torch.Tensor`: Precomputed frequency tensor with complex exponentials. [S, D/2] + """ + assert dim % 2 == 0 + + if isinstance(pos, int): + pos = torch.arange(pos) + if isinstance(pos, np.ndarray): + pos = torch.from_numpy(pos) # type: ignore # [S] + + theta = theta * ntk_factor + freqs = ( + 1.0 + / (theta ** (torch.arange(0, dim, 2, dtype=freqs_dtype, device=pos.device)[: (dim // 2)] / dim)) + / linear_factor + ) # [D/2] + freqs = torch.outer(pos, freqs) # type: ignore # [S, D/2] + if use_real and repeat_interleave_real: + # bria + freqs_cos = freqs.cos().repeat_interleave(2, dim=1).float() # [S, D] + freqs_sin = freqs.sin().repeat_interleave(2, dim=1).float() # [S, D] + return freqs_cos, freqs_sin + elif use_real: + # stable audio, allegro + freqs_cos = torch.cat([freqs.cos(), freqs.cos()], dim=-1).float() # [S, D] + freqs_sin = torch.cat([freqs.sin(), freqs.sin()], dim=-1).float() # [S, D] + return freqs_cos, freqs_sin + else: + # lumina + freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64 # [S, D/2] + return freqs_cis + + +class BriaAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + + def __call__( + self, + attn: "BriaAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( + attn, hidden_states, encoder_hidden_states + ) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if attn.added_kv_proj_dim is not None: + encoder_query = encoder_query.unflatten(-1, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(-1, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(-1, (attn.heads, -1)) + + encoder_query = attn.norm_added_q(encoder_query) + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([encoder_query, query], dim=1) + key = torch.cat([encoder_key, key], dim=1) + value = torch.cat([encoder_value, value], dim=1) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( + [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 + ) + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class BriaAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = BriaAttnProcessor + _available_processors = [ + BriaAttnProcessor, + ] + + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + added_kv_proj_dim: int | None = None, + added_proj_bias: bool | None = True, + out_bias: bool = True, + eps: float = 1e-5, + out_dim: int = None, + context_pre_only: bool | None = None, + pre_only: bool = False, + elementwise_affine: bool = True, + processor=None, + ): + super().__init__() + + self.head_dim = dim_head + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.query_dim = query_dim + self.use_bias = bias + self.dropout = dropout + self.out_dim = out_dim if out_dim is not None else query_dim + self.context_pre_only = context_pre_only + self.pre_only = pre_only + self.heads = out_dim // dim_head if out_dim is not None else heads + self.added_kv_proj_dim = added_kv_proj_dim + self.added_proj_bias = added_proj_bias + + self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_v = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + + if not self.pre_only: + self.to_out = torch.nn.ModuleList([]) + self.to_out.append(torch.nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(torch.nn.Dropout(dropout)) + + if added_kv_proj_dim is not None: + self.norm_added_q = torch.nn.RMSNorm(dim_head, eps=eps) + self.norm_added_k = torch.nn.RMSNorm(dim_head, eps=eps) + self.add_q_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.to_add_out = torch.nn.Linear(self.inner_dim, query_dim, bias=out_bias) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + quiet_attn_parameters = {"ip_adapter_masks", "ip_hidden_states"} + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters and k not in quiet_attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs) + + +class BriaEmbedND(torch.nn.Module): + # modified from https://github.com/black-forest-labs/flux/blob/c00d7c60b085fce8058b9df845e036090873f2ce/src/flux/modules/layers.py#L11 + def __init__(self, theta: int, axes_dim: list[int]): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + n_axes = ids.shape[-1] + cos_out = [] + sin_out = [] + pos = ids.float() + is_mps = ids.device.type == "mps" + freqs_dtype = torch.float32 if is_mps else torch.float64 + for i in range(n_axes): + cos, sin = get_1d_rotary_pos_embed( + self.axes_dim[i], + pos[:, i], + theta=self.theta, + repeat_interleave_real=True, + use_real=True, + freqs_dtype=freqs_dtype, + ) + cos_out.append(cos) + sin_out.append(sin) + freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device) + freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device) + return freqs_cos, freqs_sin + + +class BriaTimesteps(nn.Module): + def __init__( + self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float, scale: int = 1, time_theta=10000 + ): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + self.downscale_freq_shift = downscale_freq_shift + self.scale = scale + self.time_theta = time_theta + + def forward(self, timesteps): + t_emb = get_timestep_embedding( + timesteps, + self.num_channels, + flip_sin_to_cos=self.flip_sin_to_cos, + downscale_freq_shift=self.downscale_freq_shift, + scale=self.scale, + max_period=self.time_theta, + ) + return t_emb + + +class BriaTimestepProjEmbeddings(nn.Module): + def __init__(self, embedding_dim, time_theta): + super().__init__() + + self.time_proj = BriaTimesteps( + num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0, time_theta=time_theta + ) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + def forward(self, timestep, dtype): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=dtype)) # (N, D) + return timesteps_emb + + +class BriaPosEmbed(torch.nn.Module): + # modified from https://github.com/black-forest-labs/flux/blob/c00d7c60b085fce8058b9df845e036090873f2ce/src/flux/modules/layers.py#L11 + def __init__(self, theta: int, axes_dim: list[int]): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + n_axes = ids.shape[-1] + cos_out = [] + sin_out = [] + pos = ids.float() + is_mps = ids.device.type == "mps" + freqs_dtype = torch.float32 if is_mps else torch.float64 + for i in range(n_axes): + cos, sin = get_1d_rotary_pos_embed( + self.axes_dim[i], + pos[:, i], + theta=self.theta, + repeat_interleave_real=True, + use_real=True, + freqs_dtype=freqs_dtype, + ) + cos_out.append(cos) + sin_out.append(sin) + freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device) + freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device) + return freqs_cos, freqs_sin + + +@maybe_allow_in_graph +class BriaTransformerBlock(nn.Module): + def __init__( + self, dim: int, num_attention_heads: int, attention_head_dim: int, qk_norm: str = "rms_norm", eps: float = 1e-6 + ): + super().__init__() + + self.norm1 = AdaLayerNormZero(dim) + self.norm1_context = AdaLayerNormZero(dim) + + self.attn = BriaAttention( + query_dim=dim, + added_kv_proj_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=False, + bias=True, + processor=BriaAttnProcessor(), + eps=eps, + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + attention_kwargs = attention_kwargs or {} + + # Attention. + attention_outputs = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + **attention_kwargs, + ) + + if len(attention_outputs) == 2: + attn_output, context_attn_output = attention_outputs + elif len(attention_outputs) == 3: + attn_output, context_attn_output, ip_attn_output = attention_outputs + + # Process attention outputs for the `hidden_states`. + attn_output = gate_msa.unsqueeze(1) * attn_output + hidden_states = hidden_states + attn_output + + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp.unsqueeze(1) * ff_output + + hidden_states = hidden_states + ff_output + if len(attention_outputs) == 3: + hidden_states = hidden_states + ip_attn_output + + # Process attention outputs for the `encoder_hidden_states`. + context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output + encoder_hidden_states = encoder_hidden_states + context_attn_output + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + + return encoder_hidden_states, hidden_states + + +@maybe_allow_in_graph +class BriaSingleTransformerBlock(nn.Module): + def __init__(self, dim: int, num_attention_heads: int, attention_head_dim: int, mlp_ratio: float = 4.0): + super().__init__() + self.mlp_hidden_dim = int(dim * mlp_ratio) + + self.norm = AdaLayerNormZeroSingle(dim) + self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim) + self.act_mlp = nn.GELU(approximate="tanh") + self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim) + + processor = BriaAttnProcessor() + + self.attn = BriaAttention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + processor=processor, + eps=1e-6, + pre_only=True, + ) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_len = encoder_hidden_states.shape[1] + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + residual = hidden_states + norm_hidden_states, gate = self.norm(hidden_states, emb=temb) + mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states)) + attention_kwargs = attention_kwargs or {} + attn_output = self.attn( + hidden_states=norm_hidden_states, + image_rotary_emb=image_rotary_emb, + **attention_kwargs, + ) + + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + gate = gate.unsqueeze(1) + hidden_states = gate * self.proj_out(hidden_states) + hidden_states = residual + hidden_states + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + encoder_hidden_states, hidden_states = hidden_states[:, :text_seq_len], hidden_states[:, text_seq_len:] + return encoder_hidden_states, hidden_states + + +class BriaTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin): + """ + The Transformer model introduced in Flux. Based on FluxPipeline with several changes: + - no pooled embeddings + - We use zero padding for prompts + - No guidance embedding since this is not a distilled version + Reference: https://blackforestlabs.ai/announcing-black-forest-labs/ + + Parameters: + patch_size (`int`): Patch size to turn the input data into small patches. + in_channels (`int`, *optional*, defaults to 16): The number of channels in the input. + num_layers (`int`, *optional*, defaults to 18): The number of layers of MMDiT blocks to use. + num_single_layers (`int`, *optional*, defaults to 18): The number of layers of single DiT blocks to use. + attention_head_dim (`int`, *optional*, defaults to 64): The number of channels in each head. + num_attention_heads (`int`, *optional*, defaults to 18): The number of heads to use for multi-head attention. + joint_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use. + pooled_projection_dim (`int`): Number of dimensions to use when projecting the `pooled_projections`. + guidance_embeds (`bool`, defaults to False): Whether to use guidance embeddings. + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + patch_size: int = 1, + in_channels: int = 64, + num_layers: int = 19, + num_single_layers: int = 38, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 4096, + pooled_projection_dim: int = None, + guidance_embeds: bool = False, + axes_dims_rope: list[int] = [16, 56, 56], + rope_theta=10000, + time_theta=10000, + ): + super().__init__() + self.out_channels = in_channels + self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim + + self.pos_embed = BriaEmbedND(theta=rope_theta, axes_dim=axes_dims_rope) + + self.time_embed = BriaTimestepProjEmbeddings(embedding_dim=self.inner_dim, time_theta=time_theta) + if guidance_embeds: + self.guidance_embed = BriaTimestepProjEmbeddings(embedding_dim=self.inner_dim) + + self.context_embedder = nn.Linear(self.config.joint_attention_dim, self.inner_dim) + self.x_embedder = torch.nn.Linear(self.config.in_channels, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + BriaTransformerBlock( + dim=self.inner_dim, + num_attention_heads=self.config.num_attention_heads, + attention_head_dim=self.config.attention_head_dim, + ) + for i in range(self.config.num_layers) + ] + ) + + self.single_transformer_blocks = nn.ModuleList( + [ + BriaSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=self.config.num_attention_heads, + attention_head_dim=self.config.attention_head_dim, + ) + for i in range(self.config.num_single_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + pooled_projections: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + guidance: torch.Tensor = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + controlnet_block_samples=None, + controlnet_single_block_samples=None, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + """ + The [`BriaTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`): + Input `hidden_states`. + encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected + from the embeddings of input conditions. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + block_controlnet_hidden_states: (`list` of `torch.Tensor`): + A list of tensors that if specified are added to the residuals of transformer blocks. + attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + hidden_states = self.x_embedder(hidden_states) + + timestep = timestep.to(hidden_states.dtype) + if guidance is not None: + guidance = guidance.to(hidden_states.dtype) + else: + guidance = None + + temb = self.time_embed(timestep, dtype=hidden_states.dtype) + + if guidance: + temb += self.guidance_embed(guidance, dtype=hidden_states.dtype) + + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + if len(txt_ids.shape) == 3: + txt_ids = txt_ids[0] + + if len(img_ids.shape) == 3: + img_ids = img_ids[0] + + ids = torch.cat((txt_ids, img_ids), dim=0) + image_rotary_emb = self.pos_embed(ids) + + for index_block, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + attention_kwargs, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + + # controlnet residual + if controlnet_block_samples is not None: + interval_control = len(self.transformer_blocks) / len(controlnet_block_samples) + interval_control = int(np.ceil(interval_control)) + hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control] + + for index_block, block in enumerate(self.single_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + attention_kwargs, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + + # controlnet residual + if controlnet_single_block_samples is not None: + interval_control = len(self.single_transformer_blocks) / len(controlnet_single_block_samples) + interval_control = int(np.ceil(interval_control)) + hidden_states[:, encoder_hidden_states.shape[1] :, ...] = ( + hidden_states[:, encoder_hidden_states.shape[1] :, ...] + + controlnet_single_block_samples[index_block // interval_control] + ) + + hidden_states = self.norm_out(hidden_states, temb) + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_bria_fibo.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_bria_fibo.py new file mode 100644 index 0000000000000000000000000000000000000000..7ddbccfa47c531bea0483238ede74499103c483e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_bria_fibo.py @@ -0,0 +1,637 @@ +# Copyright (c) Bria.ai. All rights reserved. +# +# This file is licensed under the Creative Commons Attribution-NonCommercial 4.0 International Public License (CC-BY-NC-4.0). +# You may obtain a copy of the license at https://creativecommons.org/licenses/by-nc/4.0/ +# +# You are free to share and adapt this material for non-commercial purposes provided you give appropriate credit, +# indicate if changes were made, and do not use the material for commercial purposes. +# +# See the license for further details. +import inspect +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...models.attention_processor import Attention +from ...models.embeddings import TimestepEmbedding, apply_rotary_emb, get_1d_rotary_pos_embed, get_timestep_embedding +from ...models.modeling_outputs import Transformer2DModelOutput +from ...models.modeling_utils import ModelMixin +from ...models.transformers.transformer_bria import BriaAttnProcessor +from ...utils import ( + apply_lora_scale, + logging, +) +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _get_projections(attn: "BriaFiboAttention", hidden_states, encoder_hidden_states=None): + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + encoder_query = encoder_key = encoder_value = None + if encoder_hidden_states is not None and attn.added_kv_proj_dim is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_fused_projections(attn: "BriaFiboAttention", hidden_states, encoder_hidden_states=None): + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + + encoder_query = encoder_key = encoder_value = (None,) + if encoder_hidden_states is not None and hasattr(attn, "to_added_qkv"): + encoder_query, encoder_key, encoder_value = attn.to_added_qkv(encoder_hidden_states).chunk(3, dim=-1) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_qkv_projections(attn: "BriaFiboAttention", hidden_states, encoder_hidden_states=None): + if attn.fused_projections: + return _get_fused_projections(attn, hidden_states, encoder_hidden_states) + return _get_projections(attn, hidden_states, encoder_hidden_states) + + +# Copied from diffusers.models.transformers.transformer_flux.FluxAttnProcessor with FluxAttnProcessor->BriaFiboAttnProcessor, FluxAttention->BriaFiboAttention +class BriaFiboAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + + def __call__( + self, + attn: "BriaFiboAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( + attn, hidden_states, encoder_hidden_states + ) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if attn.added_kv_proj_dim is not None: + encoder_query = encoder_query.unflatten(-1, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(-1, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(-1, (attn.heads, -1)) + + encoder_query = attn.norm_added_q(encoder_query) + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([encoder_query, query], dim=1) + key = torch.cat([encoder_key, key], dim=1) + value = torch.cat([encoder_value, value], dim=1) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( + [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 + ) + hidden_states = attn.to_out[0](hidden_states.contiguous()) + hidden_states = attn.to_out[1](hidden_states) + encoder_hidden_states = attn.to_add_out(encoder_hidden_states.contiguous()) + + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +# Based on https://github.com/huggingface/diffusers/blob/55d49d4379007740af20629bb61aba9546c6b053/src/diffusers/models/transformers/transformer_flux.py +class BriaFiboAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = BriaFiboAttnProcessor + _available_processors = [BriaFiboAttnProcessor] + + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + added_kv_proj_dim: int | None = None, + added_proj_bias: bool | None = True, + out_bias: bool = True, + eps: float = 1e-5, + out_dim: int = None, + context_pre_only: bool | None = None, + pre_only: bool = False, + elementwise_affine: bool = True, + processor=None, + ): + super().__init__() + + self.head_dim = dim_head + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.query_dim = query_dim + self.use_bias = bias + self.dropout = dropout + self.out_dim = out_dim if out_dim is not None else query_dim + self.context_pre_only = context_pre_only + self.pre_only = pre_only + self.heads = out_dim // dim_head if out_dim is not None else heads + self.added_kv_proj_dim = added_kv_proj_dim + self.added_proj_bias = added_proj_bias + + self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_v = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + + if not self.pre_only: + self.to_out = torch.nn.ModuleList([]) + self.to_out.append(torch.nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(torch.nn.Dropout(dropout)) + + if added_kv_proj_dim is not None: + self.norm_added_q = torch.nn.RMSNorm(dim_head, eps=eps) + self.norm_added_k = torch.nn.RMSNorm(dim_head, eps=eps) + self.add_q_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.to_add_out = torch.nn.Linear(self.inner_dim, query_dim, bias=out_bias) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + quiet_attn_parameters = {"ip_adapter_masks", "ip_hidden_states"} + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters and k not in quiet_attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"joint_attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs) + + +class BriaFiboEmbedND(torch.nn.Module): + # modified from https://github.com/black-forest-labs/flux/blob/c00d7c60b085fce8058b9df845e036090873f2ce/src/flux/modules/layers.py#L11 + def __init__(self, theta: int, axes_dim: list[int]): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + n_axes = ids.shape[-1] + cos_out = [] + sin_out = [] + pos = ids.float() + is_mps = ids.device.type == "mps" + freqs_dtype = torch.float32 if is_mps else torch.float64 + for i in range(n_axes): + cos, sin = get_1d_rotary_pos_embed( + self.axes_dim[i], + pos[:, i], + theta=self.theta, + repeat_interleave_real=True, + use_real=True, + freqs_dtype=freqs_dtype, + ) + cos_out.append(cos) + sin_out.append(sin) + freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device) + freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device) + return freqs_cos, freqs_sin + + +@maybe_allow_in_graph +class BriaFiboSingleTransformerBlock(nn.Module): + def __init__(self, dim: int, num_attention_heads: int, attention_head_dim: int, mlp_ratio: float = 4.0): + super().__init__() + self.mlp_hidden_dim = int(dim * mlp_ratio) + + self.norm = AdaLayerNormZeroSingle(dim) + self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim) + self.act_mlp = nn.GELU(approximate="tanh") + self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim) + + processor = BriaAttnProcessor() + + self.attn = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + processor=processor, + qk_norm="rms_norm", + eps=1e-6, + pre_only=True, + ) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor: + residual = hidden_states + norm_hidden_states, gate = self.norm(hidden_states, emb=temb) + mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states)) + joint_attention_kwargs = joint_attention_kwargs or {} + attn_output = self.attn( + hidden_states=norm_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + gate = gate.unsqueeze(1) + hidden_states = gate * self.proj_out(hidden_states) + hidden_states = residual + hidden_states + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + return hidden_states + + +class BriaFiboTextProjection(nn.Module): + def __init__(self, in_features, hidden_size): + super().__init__() + self.linear = nn.Linear(in_features=in_features, out_features=hidden_size, bias=False) + + def forward(self, caption): + hidden_states = self.linear(caption) + return hidden_states + + +@maybe_allow_in_graph +# Based on from diffusers.models.transformers.transformer_flux.FluxTransformerBlock +class BriaFiboTransformerBlock(nn.Module): + def __init__( + self, dim: int, num_attention_heads: int, attention_head_dim: int, qk_norm: str = "rms_norm", eps: float = 1e-6 + ): + super().__init__() + + self.norm1 = AdaLayerNormZero(dim) + self.norm1_context = AdaLayerNormZero(dim) + + self.attn = BriaFiboAttention( + query_dim=dim, + added_kv_proj_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=False, + bias=True, + processor=BriaFiboAttnProcessor(), + eps=eps, + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + joint_attention_kwargs = joint_attention_kwargs or {} + + # Attention. + attention_outputs = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + if len(attention_outputs) == 2: + attn_output, context_attn_output = attention_outputs + elif len(attention_outputs) == 3: + attn_output, context_attn_output, ip_attn_output = attention_outputs + + # Process attention outputs for the `hidden_states`. + attn_output = gate_msa.unsqueeze(1) * attn_output + hidden_states = hidden_states + attn_output + + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp.unsqueeze(1) * ff_output + + hidden_states = hidden_states + ff_output + if len(attention_outputs) == 3: + hidden_states = hidden_states + ip_attn_output + + # Process attention outputs for the `encoder_hidden_states`. + context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output + encoder_hidden_states = encoder_hidden_states + context_attn_output + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + + return encoder_hidden_states, hidden_states + + +class BriaFiboTimesteps(nn.Module): + def __init__( + self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float, scale: int = 1, time_theta=10000 + ): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + self.downscale_freq_shift = downscale_freq_shift + self.scale = scale + self.time_theta = time_theta + + def forward(self, timesteps): + t_emb = get_timestep_embedding( + timesteps, + self.num_channels, + flip_sin_to_cos=self.flip_sin_to_cos, + downscale_freq_shift=self.downscale_freq_shift, + scale=self.scale, + max_period=self.time_theta, + ) + return t_emb + + +class BriaFiboTimestepProjEmbeddings(nn.Module): + def __init__(self, embedding_dim, time_theta): + super().__init__() + + self.time_proj = BriaFiboTimesteps( + num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0, time_theta=time_theta + ) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + def forward(self, timestep, dtype): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=dtype)) # (N, D) + return timesteps_emb + + +class BriaFiboTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): + """ + Parameters: + patch_size (`int`): Patch size to turn the input data into small patches. + in_channels (`int`, *optional*, defaults to 16): The number of channels in the input. + num_layers (`int`, *optional*, defaults to 18): The number of layers of MMDiT blocks to use. + num_single_layers (`int`, *optional*, defaults to 18): The number of layers of single DiT blocks to use. + attention_head_dim (`int`, *optional*, defaults to 64): The number of channels in each head. + num_attention_heads (`int`, *optional*, defaults to 18): The number of heads to use for multi-head attention. + joint_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use. + pooled_projection_dim (`int`): Number of dimensions to use when projecting the `pooled_projections`. + guidance_embeds (`bool`, defaults to False): Whether to use guidance embeddings. + ... + """ + + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + patch_size: int = 1, + in_channels: int = 64, + num_layers: int = 19, + num_single_layers: int = 38, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 4096, + pooled_projection_dim: int = None, + guidance_embeds: bool = False, + axes_dims_rope: list[int] = [16, 56, 56], + rope_theta=10000, + time_theta=10000, + text_encoder_dim: int = 2048, + ): + super().__init__() + self.out_channels = in_channels + self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim + + self.pos_embed = BriaFiboEmbedND(theta=rope_theta, axes_dim=axes_dims_rope) + + self.time_embed = BriaFiboTimestepProjEmbeddings(embedding_dim=self.inner_dim, time_theta=time_theta) + + if guidance_embeds: + self.guidance_embed = BriaFiboTimestepProjEmbeddings(embedding_dim=self.inner_dim) + + self.context_embedder = nn.Linear(self.config.joint_attention_dim, self.inner_dim) + self.x_embedder = torch.nn.Linear(self.config.in_channels, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + BriaFiboTransformerBlock( + dim=self.inner_dim, + num_attention_heads=self.config.num_attention_heads, + attention_head_dim=self.config.attention_head_dim, + ) + for i in range(self.config.num_layers) + ] + ) + + self.single_transformer_blocks = nn.ModuleList( + [ + BriaFiboSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=self.config.num_attention_heads, + attention_head_dim=self.config.attention_head_dim, + ) + for i in range(self.config.num_single_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + + caption_projection = [ + BriaFiboTextProjection(in_features=text_encoder_dim, hidden_size=self.inner_dim // 2) + for i in range(self.config.num_layers + self.config.num_single_layers) + ] + self.caption_projection = nn.ModuleList(caption_projection) + + @apply_lora_scale("joint_attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + text_encoder_layers: list = None, + pooled_projections: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + guidance: torch.Tensor = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.FloatTensor | Transformer2DModelOutput: + """ + + Args: + hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`): + Input `hidden_states`. + encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + pooled_projections (`torch.FloatTensor` of shape `(batch_size, projection_dim)`): Embeddings projected + from the embeddings of input conditions. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + joint_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + + hidden_states = self.x_embedder(hidden_states) + + timestep = timestep.to(hidden_states.dtype) + if guidance is not None: + guidance = guidance.to(hidden_states.dtype) + else: + guidance = None + + temb = self.time_embed(timestep, dtype=hidden_states.dtype) + + if guidance: + temb += self.guidance_embed(guidance, dtype=hidden_states.dtype) + + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + if len(txt_ids.shape) == 3: + txt_ids = txt_ids[0] + + if len(img_ids.shape) == 3: + img_ids = img_ids[0] + + ids = torch.cat((txt_ids, img_ids), dim=0) + image_rotary_emb = self.pos_embed(ids) + + new_text_encoder_layers = [] + for i, text_encoder_layer in enumerate(text_encoder_layers): + text_encoder_layer = self.caption_projection[i](text_encoder_layer) + new_text_encoder_layers.append(text_encoder_layer) + text_encoder_layers = new_text_encoder_layers + + block_id = 0 + for index_block, block in enumerate(self.transformer_blocks): + current_text_encoder_layer = text_encoder_layers[block_id] + encoder_hidden_states = torch.cat( + [encoder_hidden_states[:, :, : self.inner_dim // 2], current_text_encoder_layer], dim=-1 + ) + block_id += 1 + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + joint_attention_kwargs, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + joint_attention_kwargs=joint_attention_kwargs, + ) + + for index_block, block in enumerate(self.single_transformer_blocks): + current_text_encoder_layer = text_encoder_layers[block_id] + encoder_hidden_states = torch.cat( + [encoder_hidden_states[:, :, : self.inner_dim // 2], current_text_encoder_layer], dim=-1 + ) + block_id += 1 + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + temb, + image_rotary_emb, + joint_attention_kwargs, + ) + + else: + hidden_states = block( + hidden_states=hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + joint_attention_kwargs=joint_attention_kwargs, + ) + + encoder_hidden_states = hidden_states[:, : encoder_hidden_states.shape[1], ...] + hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...] + + hidden_states = self.norm_out(hidden_states, temb) + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_chroma.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_chroma.py new file mode 100644 index 0000000000000000000000000000000000000000..d7cc96d018b3ccdd9196d41d8c8fb35d0280885b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_chroma.py @@ -0,0 +1,624 @@ +# Copyright 2025 Black Forest Labs, The HuggingFace Team and loadstone-rock . All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Any + +import numpy as np +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FluxTransformer2DLoadersMixin, FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, deprecate, logging +from ...utils.import_utils import is_torch_npu_available +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionMixin, FeedForward +from ..cache_utils import CacheMixin +from ..embeddings import FluxPosEmbed, PixArtAlphaTextProjection, Timesteps, get_timestep_embedding +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import CombinedTimestepLabelEmbeddings, FP32LayerNorm, RMSNorm +from .transformer_flux import FluxAttention, FluxAttnProcessor + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class ChromaAdaLayerNormZeroPruned(nn.Module): + r""" + Norm layer adaptive layer norm zero (adaLN-Zero). + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`): The size of the embeddings dictionary. + """ + + def __init__(self, embedding_dim: int, num_embeddings: int | None = None, norm_type="layer_norm", bias=True): + super().__init__() + if num_embeddings is not None: + self.emb = CombinedTimestepLabelEmbeddings(num_embeddings, embedding_dim) + else: + self.emb = None + + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6) + elif norm_type == "fp32_layer_norm": + self.norm = FP32LayerNorm(embedding_dim, elementwise_affine=False, bias=False) + else: + raise ValueError( + f"Unsupported `norm_type` ({norm_type}) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'." + ) + + def forward( + self, + x: torch.Tensor, + timestep: torch.Tensor | None = None, + class_labels: torch.LongTensor | None = None, + hidden_dtype: torch.dtype | None = None, + emb: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + if self.emb is not None: + emb = self.emb(timestep, class_labels, hidden_dtype=hidden_dtype) + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = emb.flatten(1, 2).chunk(6, dim=1) + x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None] + return x, gate_msa, shift_mlp, scale_mlp, gate_mlp + + +class ChromaAdaLayerNormZeroSinglePruned(nn.Module): + r""" + Norm layer adaptive layer norm zero (adaLN-Zero). + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_embeddings (`int`): The size of the embeddings dictionary. + """ + + def __init__(self, embedding_dim: int, norm_type="layer_norm", bias=True): + super().__init__() + + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6) + else: + raise ValueError( + f"Unsupported `norm_type` ({norm_type}) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'." + ) + + def forward( + self, + x: torch.Tensor, + emb: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + shift_msa, scale_msa, gate_msa = emb.flatten(1, 2).chunk(3, dim=1) + x = self.norm(x) * (1 + scale_msa[:, None]) + shift_msa[:, None] + return x, gate_msa + + +class ChromaAdaLayerNormContinuousPruned(nn.Module): + r""" + Adaptive normalization layer with a norm layer (layer_norm or rms_norm). + + Args: + embedding_dim (`int`): Embedding dimension to use during projection. + conditioning_embedding_dim (`int`): Dimension of the input condition. + elementwise_affine (`bool`, defaults to `True`): + Boolean flag to denote if affine transformation should be applied. + eps (`float`, defaults to 1e-5): Epsilon factor. + bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use. + norm_type (`str`, defaults to `"layer_norm"`): + Normalization layer to use. Values supported: "layer_norm", "rms_norm". + """ + + def __init__( + self, + embedding_dim: int, + conditioning_embedding_dim: int, + # NOTE: It is a bit weird that the norm layer can be configured to have scale and shift parameters + # because the output is immediately scaled and shifted by the projected conditioning embeddings. + # Note that AdaLayerNorm does not let the norm layer have scale and shift parameters. + # However, this is how it was implemented in the original code, and it's rather likely you should + # set `elementwise_affine` to False. + elementwise_affine=True, + eps=1e-5, + bias=True, + norm_type="layer_norm", + ): + super().__init__() + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, eps, elementwise_affine, bias) + elif norm_type == "rms_norm": + self.norm = RMSNorm(embedding_dim, eps, elementwise_affine) + else: + raise ValueError(f"unknown norm_type {norm_type}") + + def forward(self, x: torch.Tensor, emb: torch.Tensor) -> torch.Tensor: + # convert back to the original dtype in case `conditioning_embedding`` is upcasted to float32 (needed for hunyuanDiT) + shift, scale = torch.chunk(emb.flatten(1, 2).to(x.dtype), 2, dim=1) + x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :] + return x + + +class ChromaCombinedTimestepTextProjEmbeddings(nn.Module): + def __init__(self, num_channels: int, out_dim: int): + super().__init__() + + self.time_proj = Timesteps(num_channels=num_channels, flip_sin_to_cos=True, downscale_freq_shift=0) + self.guidance_proj = Timesteps(num_channels=num_channels, flip_sin_to_cos=True, downscale_freq_shift=0) + + self.register_buffer( + "mod_proj", + get_timestep_embedding( + torch.arange(out_dim) * 1000, 2 * num_channels, flip_sin_to_cos=True, downscale_freq_shift=0 + ), + persistent=False, + ) + + def forward(self, timestep: torch.Tensor) -> torch.Tensor: + mod_index_length = self.mod_proj.shape[0] + batch_size = timestep.shape[0] + + timesteps_proj = self.time_proj(timestep).to(dtype=timestep.dtype) + guidance_proj = self.guidance_proj(torch.tensor([0] * batch_size)).to( + dtype=timestep.dtype, device=timestep.device + ) + + mod_proj = self.mod_proj.to(dtype=timesteps_proj.dtype, device=timesteps_proj.device).repeat(batch_size, 1, 1) + timestep_guidance = ( + torch.cat([timesteps_proj, guidance_proj], dim=1).unsqueeze(1).repeat(1, mod_index_length, 1) + ) + input_vec = torch.cat([timestep_guidance, mod_proj], dim=-1) + return input_vec.to(timestep.dtype) + + +class ChromaApproximator(nn.Module): + def __init__(self, in_dim: int, out_dim: int, hidden_dim: int, n_layers: int = 5): + super().__init__() + self.in_proj = nn.Linear(in_dim, hidden_dim, bias=True) + self.layers = nn.ModuleList( + [PixArtAlphaTextProjection(hidden_dim, hidden_dim, act_fn="silu") for _ in range(n_layers)] + ) + self.norms = nn.ModuleList([nn.RMSNorm(hidden_dim) for _ in range(n_layers)]) + self.out_proj = nn.Linear(hidden_dim, out_dim) + + def forward(self, x): + x = self.in_proj(x) + + for layer, norms in zip(self.layers, self.norms): + x = x + layer(norms(x)) + + return self.out_proj(x) + + +@maybe_allow_in_graph +class ChromaSingleTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float = 4.0, + ): + super().__init__() + self.mlp_hidden_dim = int(dim * mlp_ratio) + self.norm = ChromaAdaLayerNormZeroSinglePruned(dim) + self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim) + self.act_mlp = nn.GELU(approximate="tanh") + self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim) + + if is_torch_npu_available(): + from ..attention_processor import FluxAttnProcessor2_0_NPU + + deprecation_message = ( + "Defaulting to FluxAttnProcessor2_0_NPU for NPU devices will be removed. Attention processors " + "should be set explicitly using the `set_attn_processor` method." + ) + deprecate("npu_processor", "0.34.0", deprecation_message) + processor = FluxAttnProcessor2_0_NPU() + else: + processor = FluxAttnProcessor() + + self.attn = FluxAttention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + processor=processor, + eps=1e-6, + pre_only=True, + ) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + attention_mask: torch.Tensor | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor: + residual = hidden_states + norm_hidden_states, gate = self.norm(hidden_states, emb=temb) + mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states)) + joint_attention_kwargs = joint_attention_kwargs or {} + + if attention_mask is not None: + attention_mask = attention_mask[:, None, None, :] * attention_mask[:, None, :, None] + + attn_output = self.attn( + hidden_states=norm_hidden_states, + image_rotary_emb=image_rotary_emb, + attention_mask=attention_mask, + **joint_attention_kwargs, + ) + + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + gate = gate.unsqueeze(1) + hidden_states = gate * self.proj_out(hidden_states) + hidden_states = residual + hidden_states + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + return hidden_states + + +@maybe_allow_in_graph +class ChromaTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + qk_norm: str = "rms_norm", + eps: float = 1e-6, + ): + super().__init__() + self.norm1 = ChromaAdaLayerNormZeroPruned(dim) + self.norm1_context = ChromaAdaLayerNormZeroPruned(dim) + + self.attn = FluxAttention( + query_dim=dim, + added_kv_proj_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=False, + bias=True, + processor=FluxAttnProcessor(), + eps=eps, + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + attention_mask: torch.Tensor | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + temb_img, temb_txt = temb[:, :6], temb[:, 6:] + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb_img) + + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb_txt + ) + joint_attention_kwargs = joint_attention_kwargs or {} + if attention_mask is not None: + attention_mask = attention_mask[:, None, None, :] * attention_mask[:, None, :, None] + + # Attention. + attention_outputs = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + attention_mask=attention_mask, + **joint_attention_kwargs, + ) + + if len(attention_outputs) == 2: + attn_output, context_attn_output = attention_outputs + elif len(attention_outputs) == 3: + attn_output, context_attn_output, ip_attn_output = attention_outputs + + # Process attention outputs for the `hidden_states`. + attn_output = gate_msa.unsqueeze(1) * attn_output + hidden_states = hidden_states + attn_output + + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp.unsqueeze(1) * ff_output + + hidden_states = hidden_states + ff_output + if len(attention_outputs) == 3: + hidden_states = hidden_states + ip_attn_output + + # Process attention outputs for the `encoder_hidden_states`. + + context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output + encoder_hidden_states = encoder_hidden_states + context_attn_output + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + + return encoder_hidden_states, hidden_states + + +class ChromaTransformer2DModel( + ModelMixin, + ConfigMixin, + PeftAdapterMixin, + FromOriginalModelMixin, + FluxTransformer2DLoadersMixin, + CacheMixin, + AttentionMixin, +): + """ + The Transformer model introduced in Flux, modified for Chroma. + + Reference: https://huggingface.co/lodestones/Chroma1-HD + + Args: + patch_size (`int`, defaults to `1`): + Patch size to turn the input data into small patches. + in_channels (`int`, defaults to `64`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `None`): + The number of channels in the output. If not specified, it defaults to `in_channels`. + num_layers (`int`, defaults to `19`): + The number of layers of dual stream DiT blocks to use. + num_single_layers (`int`, defaults to `38`): + The number of layers of single stream DiT blocks to use. + attention_head_dim (`int`, defaults to `128`): + The number of dimensions to use for each attention head. + num_attention_heads (`int`, defaults to `24`): + The number of attention heads to use. + joint_attention_dim (`int`, defaults to `4096`): + The number of dimensions to use for the joint attention (embedding/channel dimension of + `encoder_hidden_states`). + axes_dims_rope (`tuple[int]`, defaults to `(16, 56, 56)`): + The dimensions to use for the rotary positional embeddings. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["ChromaTransformerBlock", "ChromaSingleTransformerBlock"] + _repeated_blocks = ["ChromaTransformerBlock", "ChromaSingleTransformerBlock"] + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + + @register_to_config + def __init__( + self, + patch_size: int = 1, + in_channels: int = 64, + out_channels: int | None = None, + num_layers: int = 19, + num_single_layers: int = 38, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 4096, + axes_dims_rope: tuple[int, ...] = (16, 56, 56), + approximator_num_channels: int = 64, + approximator_hidden_dim: int = 5120, + approximator_layers: int = 5, + ): + super().__init__() + self.out_channels = out_channels or in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + self.pos_embed = FluxPosEmbed(theta=10000, axes_dim=axes_dims_rope) + + self.time_text_embed = ChromaCombinedTimestepTextProjEmbeddings( + num_channels=approximator_num_channels // 4, + out_dim=3 * num_single_layers + 2 * 6 * num_layers + 2, + ) + self.distilled_guidance_layer = ChromaApproximator( + in_dim=approximator_num_channels, + out_dim=self.inner_dim, + hidden_dim=approximator_hidden_dim, + n_layers=approximator_layers, + ) + + self.context_embedder = nn.Linear(joint_attention_dim, self.inner_dim) + self.x_embedder = nn.Linear(in_channels, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + ChromaTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for _ in range(num_layers) + ] + ) + + self.single_transformer_blocks = nn.ModuleList( + [ + ChromaSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for _ in range(num_single_layers) + ] + ) + + self.norm_out = ChromaAdaLayerNormContinuousPruned( + self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6 + ) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + + @apply_lora_scale("joint_attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + attention_mask: torch.Tensor = None, + joint_attention_kwargs: dict[str, Any] | None = None, + controlnet_block_samples=None, + controlnet_single_block_samples=None, + return_dict: bool = True, + controlnet_blocks_repeat: bool = False, + ) -> torch.Tensor | Transformer2DModelOutput: + """ + The [`FluxTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`): + Input `hidden_states`. + encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + block_controlnet_hidden_states: (`list` of `torch.Tensor`): + A list of tensors that if specified are added to the residuals of transformer blocks. + joint_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + + hidden_states = self.x_embedder(hidden_states) + + timestep = timestep.to(hidden_states.dtype) * 1000 + + input_vec = self.time_text_embed(timestep) + pooled_temb = self.distilled_guidance_layer(input_vec) + + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + if txt_ids.ndim == 3: + logger.warning( + "Passing `txt_ids` 3d torch.Tensor is deprecated." + "Please remove the batch dimension and pass it as a 2d torch Tensor" + ) + txt_ids = txt_ids[0] + if img_ids.ndim == 3: + logger.warning( + "Passing `img_ids` 3d torch.Tensor is deprecated." + "Please remove the batch dimension and pass it as a 2d torch Tensor" + ) + img_ids = img_ids[0] + + ids = torch.cat((txt_ids, img_ids), dim=0) + image_rotary_emb = self.pos_embed(ids) + + if joint_attention_kwargs is not None and "ip_adapter_image_embeds" in joint_attention_kwargs: + ip_adapter_image_embeds = joint_attention_kwargs.pop("ip_adapter_image_embeds") + ip_hidden_states = self.encoder_hid_proj(ip_adapter_image_embeds) + joint_attention_kwargs.update({"ip_hidden_states": ip_hidden_states}) + + for index_block, block in enumerate(self.transformer_blocks): + img_offset = 3 * len(self.single_transformer_blocks) + txt_offset = img_offset + 6 * len(self.transformer_blocks) + img_modulation = img_offset + 6 * index_block + text_modulation = txt_offset + 6 * index_block + temb = torch.cat( + ( + pooled_temb[:, img_modulation : img_modulation + 6], + pooled_temb[:, text_modulation : text_modulation + 6], + ), + dim=1, + ) + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, temb, image_rotary_emb, attention_mask + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + attention_mask=attention_mask, + joint_attention_kwargs=joint_attention_kwargs, + ) + + # controlnet residual + if controlnet_block_samples is not None: + interval_control = len(self.transformer_blocks) / len(controlnet_block_samples) + interval_control = int(np.ceil(interval_control)) + # For Xlabs ControlNet. + if controlnet_blocks_repeat: + hidden_states = ( + hidden_states + controlnet_block_samples[index_block % len(controlnet_block_samples)] + ) + else: + hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control] + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + for index_block, block in enumerate(self.single_transformer_blocks): + start_idx = 3 * index_block + temb = pooled_temb[:, start_idx : start_idx + 3] + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + temb, + image_rotary_emb, + ) + + else: + hidden_states = block( + hidden_states=hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + attention_mask=attention_mask, + joint_attention_kwargs=joint_attention_kwargs, + ) + + # controlnet residual + if controlnet_single_block_samples is not None: + interval_control = len(self.single_transformer_blocks) / len(controlnet_single_block_samples) + interval_control = int(np.ceil(interval_control)) + hidden_states[:, encoder_hidden_states.shape[1] :, ...] = ( + hidden_states[:, encoder_hidden_states.shape[1] :, ...] + + controlnet_single_block_samples[index_block // interval_control] + ) + + hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...] + + temb = pooled_temb[:, -2:] + hidden_states = self.norm_out(hidden_states, temb) + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_chronoedit.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_chronoedit.py new file mode 100644 index 0000000000000000000000000000000000000000..25eb6f87a93ab7a376a9eae9005149b94d9e09f2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_chronoedit.py @@ -0,0 +1,724 @@ +# Copyright 2025 The ChronoEdit Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, deprecate, logging +from ...utils.torch_utils import maybe_allow_in_graph +from .._modeling_parallel import ContextParallelInput, ContextParallelOutput +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps, get_1d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import FP32LayerNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +# Copied from diffusers.models.transformers.transformer_wan._get_qkv_projections +def _get_qkv_projections(attn: "WanAttention", hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor): + # encoder_hidden_states is only passed for cross-attention + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + if attn.fused_projections: + if not attn.is_cross_attention: + # In self-attention layers, we can fuse the entire QKV projection into a single linear + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + else: + # In cross-attention layers, we can only fuse the KV projections into a single linear + query = attn.to_q(hidden_states) + key, value = attn.to_kv(encoder_hidden_states).chunk(2, dim=-1) + else: + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + return query, key, value + + +# Copied from diffusers.models.transformers.transformer_wan._get_added_kv_projections +def _get_added_kv_projections(attn: "WanAttention", encoder_hidden_states_img: torch.Tensor): + if attn.fused_projections: + key_img, value_img = attn.to_added_kv(encoder_hidden_states_img).chunk(2, dim=-1) + else: + key_img = attn.add_k_proj(encoder_hidden_states_img) + value_img = attn.add_v_proj(encoder_hidden_states_img) + return key_img, value_img + + +# modified from diffusers.models.transformers.transformer_wan.WanAttnProcessor +class WanAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "WanAttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to version 2.0 or higher." + ) + + def __call__( + self, + attn: "WanAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ) -> torch.Tensor: + encoder_hidden_states_img = None + if attn.add_k_proj is not None: + # 512 is the context length of the text encoder, hardcoded for now + image_context_length = encoder_hidden_states.shape[1] - 512 + encoder_hidden_states_img = encoder_hidden_states[:, :image_context_length] + encoder_hidden_states = encoder_hidden_states[:, image_context_length:] + + query, key, value = _get_qkv_projections(attn, hidden_states, encoder_hidden_states) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + if rotary_emb is not None: + + def apply_rotary_emb( + hidden_states: torch.Tensor, + freqs_cos: torch.Tensor, + freqs_sin: torch.Tensor, + ): + x1, x2 = hidden_states.unflatten(-1, (-1, 2)).unbind(-1) + cos = freqs_cos[..., 0::2] + sin = freqs_sin[..., 1::2] + out = torch.empty_like(hidden_states) + out[..., 0::2] = x1 * cos - x2 * sin + out[..., 1::2] = x1 * sin + x2 * cos + return out.type_as(hidden_states) + + query = apply_rotary_emb(query, *rotary_emb) + key = apply_rotary_emb(key, *rotary_emb) + + # I2V task + hidden_states_img = None + if encoder_hidden_states_img is not None: + key_img, value_img = _get_added_kv_projections(attn, encoder_hidden_states_img) + key_img = attn.norm_added_k(key_img) + + key_img = key_img.unflatten(2, (attn.heads, -1)) + value_img = value_img.unflatten(2, (attn.heads, -1)) + + hidden_states_img = dispatch_attention_fn( + query, + key_img, + value_img, + attn_mask=None, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + # Reference: https://github.com/huggingface/diffusers/pull/12660 + parallel_config=None, + ) + hidden_states_img = hidden_states_img.flatten(2, 3) + hidden_states_img = hidden_states_img.type_as(query) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + # Reference: https://github.com/huggingface/diffusers/pull/12660 + parallel_config=(self._parallel_config if encoder_hidden_states is None else None), + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.type_as(query) + + if hidden_states_img is not None: + hidden_states = hidden_states + hidden_states_img + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +# Copied from diffusers.models.transformers.transformer_wan.WanAttnProcessor2_0 +class WanAttnProcessor2_0: + def __new__(cls, *args, **kwargs): + deprecation_message = ( + "The WanAttnProcessor2_0 class is deprecated and will be removed in a future version. " + "Please use WanAttnProcessor instead. " + ) + deprecate("WanAttnProcessor2_0", "1.0.0", deprecation_message, standard_warn=False) + return WanAttnProcessor(*args, **kwargs) + + +# Copied from diffusers.models.transformers.transformer_wan.WanAttention +class WanAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = WanAttnProcessor + _available_processors = [WanAttnProcessor] + + def __init__( + self, + dim: int, + heads: int = 8, + dim_head: int = 64, + eps: float = 1e-5, + dropout: float = 0.0, + added_kv_proj_dim: int | None = None, + cross_attention_dim_head: int | None = None, + processor=None, + is_cross_attention=None, + ): + super().__init__() + + self.inner_dim = dim_head * heads + self.heads = heads + self.added_kv_proj_dim = added_kv_proj_dim + self.cross_attention_dim_head = cross_attention_dim_head + self.kv_inner_dim = self.inner_dim if cross_attention_dim_head is None else cross_attention_dim_head * heads + + self.to_q = torch.nn.Linear(dim, self.inner_dim, bias=True) + self.to_k = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_v = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_out = torch.nn.ModuleList( + [ + torch.nn.Linear(self.inner_dim, dim, bias=True), + torch.nn.Dropout(dropout), + ] + ) + self.norm_q = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + self.norm_k = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + + self.add_k_proj = self.add_v_proj = None + if added_kv_proj_dim is not None: + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.norm_added_k = torch.nn.RMSNorm(dim_head * heads, eps=eps) + + if is_cross_attention is not None: + self.is_cross_attention = is_cross_attention + else: + self.is_cross_attention = cross_attention_dim_head is not None + + self.set_processor(processor) + + def fuse_projections(self): + if getattr(self, "fused_projections", False): + return + + if not self.is_cross_attention: + concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_qkv = nn.Linear(in_features, out_features, bias=True) + self.to_qkv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + else: + concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_kv = nn.Linear(in_features, out_features, bias=True) + self.to_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + if self.added_kv_proj_dim is not None: + concatenated_weights = torch.cat([self.add_k_proj.weight.data, self.add_v_proj.weight.data]) + concatenated_bias = torch.cat([self.add_k_proj.bias.data, self.add_v_proj.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_added_kv = nn.Linear(in_features, out_features, bias=True) + self.to_added_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + self.fused_projections = True + + @torch.no_grad() + def unfuse_projections(self): + if not getattr(self, "fused_projections", False): + return + + if hasattr(self, "to_qkv"): + delattr(self, "to_qkv") + if hasattr(self, "to_kv"): + delattr(self, "to_kv") + if hasattr(self, "to_added_kv"): + delattr(self, "to_added_kv") + + self.fused_projections = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + **kwargs, + ) -> torch.Tensor: + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, rotary_emb, **kwargs) + + +# Copied from diffusers.models.transformers.transformer_wan.WanImageEmbedding +class WanImageEmbedding(torch.nn.Module): + def __init__(self, in_features: int, out_features: int, pos_embed_seq_len=None): + super().__init__() + + self.norm1 = FP32LayerNorm(in_features) + self.ff = FeedForward(in_features, out_features, mult=1, activation_fn="gelu") + self.norm2 = FP32LayerNorm(out_features) + if pos_embed_seq_len is not None: + self.pos_embed = nn.Parameter(torch.zeros(1, pos_embed_seq_len, in_features)) + else: + self.pos_embed = None + + def forward(self, encoder_hidden_states_image: torch.Tensor) -> torch.Tensor: + if self.pos_embed is not None: + batch_size, seq_len, embed_dim = encoder_hidden_states_image.shape + encoder_hidden_states_image = encoder_hidden_states_image.view(-1, 2 * seq_len, embed_dim) + encoder_hidden_states_image = encoder_hidden_states_image + self.pos_embed + + hidden_states = self.norm1(encoder_hidden_states_image) + hidden_states = self.ff(hidden_states) + hidden_states = self.norm2(hidden_states) + return hidden_states + + +# Copied from diffusers.models.transformers.transformer_wan.WanTimeTextImageEmbedding +class WanTimeTextImageEmbedding(nn.Module): + def __init__( + self, + dim: int, + time_freq_dim: int, + time_proj_dim: int, + text_embed_dim: int, + image_embed_dim: int | None = None, + pos_embed_seq_len: int | None = None, + ): + super().__init__() + + self.timesteps_proj = Timesteps(num_channels=time_freq_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.time_embedder = TimestepEmbedding(in_channels=time_freq_dim, time_embed_dim=dim) + self.act_fn = nn.SiLU() + self.time_proj = nn.Linear(dim, time_proj_dim) + self.text_embedder = PixArtAlphaTextProjection(text_embed_dim, dim, act_fn="gelu_tanh") + + self.image_embedder = None + if image_embed_dim is not None: + self.image_embedder = WanImageEmbedding(image_embed_dim, dim, pos_embed_seq_len=pos_embed_seq_len) + + def forward( + self, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + timestep_seq_len: int | None = None, + ): + timestep = self.timesteps_proj(timestep) + if timestep_seq_len is not None: + timestep = timestep.unflatten(0, (-1, timestep_seq_len)) + + time_embedder_dtype = next(iter(self.time_embedder.parameters())).dtype + if timestep.dtype != time_embedder_dtype and time_embedder_dtype != torch.int8: + timestep = timestep.to(time_embedder_dtype) + temb = self.time_embedder(timestep).type_as(encoder_hidden_states) + timestep_proj = self.time_proj(self.act_fn(temb)) + + encoder_hidden_states = self.text_embedder(encoder_hidden_states) + if encoder_hidden_states_image is not None: + encoder_hidden_states_image = self.image_embedder(encoder_hidden_states_image) + + return temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image + + +class ChronoEditRotaryPosEmbed(nn.Module): + def __init__( + self, + attention_head_dim: int, + patch_size: tuple[int, int, int], + max_seq_len: int, + theta: float = 10000.0, + temporal_skip_len: int = 8, + ): + super().__init__() + + self.attention_head_dim = attention_head_dim + self.patch_size = patch_size + self.max_seq_len = max_seq_len + self.temporal_skip_len = temporal_skip_len + + h_dim = w_dim = 2 * (attention_head_dim // 6) + t_dim = attention_head_dim - h_dim - w_dim + freqs_dtype = torch.float32 if torch.backends.mps.is_available() else torch.float64 + + freqs_cos = [] + freqs_sin = [] + + for dim in [t_dim, h_dim, w_dim]: + freq_cos, freq_sin = get_1d_rotary_pos_embed( + dim, + max_seq_len, + theta, + use_real=True, + repeat_interleave_real=True, + freqs_dtype=freqs_dtype, + ) + freqs_cos.append(freq_cos) + freqs_sin.append(freq_sin) + + self.register_buffer("freqs_cos", torch.cat(freqs_cos, dim=1), persistent=False) + self.register_buffer("freqs_sin", torch.cat(freqs_sin, dim=1), persistent=False) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.patch_size + ppf, pph, ppw = num_frames // p_t, height // p_h, width // p_w + + split_sizes = [ + self.attention_head_dim - 2 * (self.attention_head_dim // 3), + self.attention_head_dim // 3, + self.attention_head_dim // 3, + ] + + freqs_cos = self.freqs_cos.split(split_sizes, dim=1) + freqs_sin = self.freqs_sin.split(split_sizes, dim=1) + + if num_frames == 2: + freqs_cos_f = freqs_cos[0][: self.temporal_skip_len][[0, -1]].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + else: + freqs_cos_f = freqs_cos[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_h = freqs_cos[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_w = freqs_cos[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + if num_frames == 2: + freqs_sin_f = freqs_sin[0][: self.temporal_skip_len][[0, -1]].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + else: + freqs_sin_f = freqs_sin[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_h = freqs_sin[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_w = freqs_sin[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_cos = torch.cat([freqs_cos_f, freqs_cos_h, freqs_cos_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + freqs_sin = torch.cat([freqs_sin_f, freqs_sin_h, freqs_sin_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + + return freqs_cos, freqs_sin + + +@maybe_allow_in_graph +# Copied from diffusers.models.transformers.transformer_wan.WanTransformerBlock +class WanTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + ffn_dim: int, + num_heads: int, + qk_norm: str = "rms_norm_across_heads", + cross_attn_norm: bool = False, + eps: float = 1e-6, + added_kv_proj_dim: int | None = None, + ): + super().__init__() + + # 1. Self-attention + self.norm1 = FP32LayerNorm(dim, eps, elementwise_affine=False) + self.attn1 = WanAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + cross_attention_dim_head=None, + processor=WanAttnProcessor(), + ) + + # 2. Cross-attention + self.attn2 = WanAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + added_kv_proj_dim=added_kv_proj_dim, + cross_attention_dim_head=dim // num_heads, + processor=WanAttnProcessor(), + ) + self.norm2 = FP32LayerNorm(dim, eps, elementwise_affine=True) if cross_attn_norm else nn.Identity() + + # 3. Feed-forward + self.ffn = FeedForward(dim, inner_dim=ffn_dim, activation_fn="gelu-approximate") + self.norm3 = FP32LayerNorm(dim, eps, elementwise_affine=False) + + self.scale_shift_table = nn.Parameter(torch.randn(1, 6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + rotary_emb: torch.Tensor, + ) -> torch.Tensor: + if temb.ndim == 4: + # temb: batch_size, seq_len, 6, inner_dim (wan2.2 ti2v) + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table.unsqueeze(0) + temb.float() + ).chunk(6, dim=2) + # batch_size, seq_len, 1, inner_dim + shift_msa = shift_msa.squeeze(2) + scale_msa = scale_msa.squeeze(2) + gate_msa = gate_msa.squeeze(2) + c_shift_msa = c_shift_msa.squeeze(2) + c_scale_msa = c_scale_msa.squeeze(2) + c_gate_msa = c_gate_msa.squeeze(2) + else: + # temb: batch_size, 6, inner_dim (wan2.1/wan2.2 14B) + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table + temb.float() + ).chunk(6, dim=1) + + # 1. Self-attention + norm_hidden_states = (self.norm1(hidden_states.float()) * (1 + scale_msa) + shift_msa).type_as(hidden_states) + attn_output = self.attn1(norm_hidden_states, None, None, rotary_emb) + hidden_states = (hidden_states.float() + attn_output * gate_msa).type_as(hidden_states) + + # 2. Cross-attention + norm_hidden_states = self.norm2(hidden_states.float()).type_as(hidden_states) + attn_output = self.attn2(norm_hidden_states, encoder_hidden_states, None, None) + hidden_states = hidden_states + attn_output + + # 3. Feed-forward + norm_hidden_states = (self.norm3(hidden_states.float()) * (1 + c_scale_msa) + c_shift_msa).type_as( + hidden_states + ) + ff_output = self.ffn(norm_hidden_states) + hidden_states = (hidden_states.float() + ff_output.float() * c_gate_msa).type_as(hidden_states) + + return hidden_states + + +# modified from diffusers.models.transformers.transformer_wan.WanTransformer3DModel +class ChronoEditTransformer3DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin +): + r""" + A Transformer model for video-like data used in the ChronoEdit model. + + Args: + patch_size (`tuple[int]`, defaults to `(1, 2, 2)`): + 3D patch dimensions for video embedding (t_patch, h_patch, w_patch). + num_attention_heads (`int`, defaults to `40`): + Fixed length for text embeddings. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_dim (`int`, defaults to `512`): + Input dimension for text embeddings. + freq_dim (`int`, defaults to `256`): + Dimension for sinusoidal time embeddings. + ffn_dim (`int`, defaults to `13824`): + Intermediate dimension in feed-forward network. + num_layers (`int`, defaults to `40`): + The number of layers of transformer blocks to use. + window_size (`tuple[int]`, defaults to `(-1, -1)`): + Window size for local attention (-1 indicates global attention). + cross_attn_norm (`bool`, defaults to `True`): + Enable cross-attention normalization. + qk_norm (`bool`, defaults to `True`): + Enable query/key normalization. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + add_img_emb (`bool`, defaults to `False`): + Whether to use img_emb. + added_kv_proj_dim (`int`, *optional*, defaults to `None`): + The number of channels to use for the added key and value projections. If `None`, no projection is used. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["patch_embedding", "condition_embedder", "norm"] + _no_split_modules = ["WanTransformerBlock"] + _keep_in_fp32_modules = ["time_embedder", "scale_shift_table", "norm1", "norm2", "norm3"] + _keys_to_ignore_on_load_unexpected = ["norm_added_q"] + _repeated_blocks = ["WanTransformerBlock"] + _cp_plan = { + "rope": { + 0: ContextParallelInput(split_dim=1, expected_dims=4, split_output=True), + 1: ContextParallelInput(split_dim=1, expected_dims=4, split_output=True), + }, + "blocks.0": { + "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + }, + # Reference: https://github.com/huggingface/diffusers/pull/12660 + # We need to disable the splitting of encoder_hidden_states because + # the image_encoder consistently generates 257 tokens for image_embed. This causes + # the shape of encoder_hidden_states—whose token count is always 769 (512 + 257) + # after concatenation—to be indivisible by the number of devices in the CP. + "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), + } + + @register_to_config + def __init__( + self, + patch_size: tuple[int] = (1, 2, 2), + num_attention_heads: int = 40, + attention_head_dim: int = 128, + in_channels: int = 16, + out_channels: int = 16, + text_dim: int = 4096, + freq_dim: int = 256, + ffn_dim: int = 13824, + num_layers: int = 40, + cross_attn_norm: bool = True, + qk_norm: str | None = "rms_norm_across_heads", + eps: float = 1e-6, + image_dim: int | None = None, + added_kv_proj_dim: int | None = None, + rope_max_seq_len: int = 1024, + pos_embed_seq_len: int | None = None, + rope_temporal_skip_len: int = 8, + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + # 1. Patch & position embedding + self.rope = ChronoEditRotaryPosEmbed( + attention_head_dim, patch_size, rope_max_seq_len, temporal_skip_len=rope_temporal_skip_len + ) + self.patch_embedding = nn.Conv3d(in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + + # 2. Condition embeddings + # image_embedding_dim=1280 for I2V model + self.condition_embedder = WanTimeTextImageEmbedding( + dim=inner_dim, + time_freq_dim=freq_dim, + time_proj_dim=inner_dim * 6, + text_embed_dim=text_dim, + image_embed_dim=image_dim, + pos_embed_seq_len=pos_embed_seq_len, + ) + + # 3. Transformer blocks + self.blocks = nn.ModuleList( + [ + WanTransformerBlock( + inner_dim, ffn_dim, num_attention_heads, qk_norm, cross_attn_norm, eps, added_kv_proj_dim + ) + for _ in range(num_layers) + ] + ) + + # 4. Output norm & projection + self.norm_out = FP32LayerNorm(inner_dim, eps, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, out_channels * math.prod(patch_size)) + self.scale_shift_table = nn.Parameter(torch.randn(1, 2, inner_dim) / inner_dim**0.5) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + return_dict: bool = True, + attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor | dict[str, torch.Tensor]: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.config.patch_size + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p_h + post_patch_width = width // p_w + + rotary_emb = self.rope(hidden_states) + + hidden_states = self.patch_embedding(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) + + # timestep shape: batch_size, or batch_size, seq_len (wan 2.2 ti2v) + if timestep.ndim == 2: + ts_seq_len = timestep.shape[1] + timestep = timestep.flatten() # batch_size * seq_len + else: + ts_seq_len = None + + temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image = self.condition_embedder( + timestep, encoder_hidden_states, encoder_hidden_states_image, timestep_seq_len=ts_seq_len + ) + if ts_seq_len is not None: + # batch_size, seq_len, 6, inner_dim + timestep_proj = timestep_proj.unflatten(2, (6, -1)) + else: + # batch_size, 6, inner_dim + timestep_proj = timestep_proj.unflatten(1, (6, -1)) + + if encoder_hidden_states_image is not None: + encoder_hidden_states = torch.concat([encoder_hidden_states_image, encoder_hidden_states], dim=1) + + # 4. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.blocks: + hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, timestep_proj, rotary_emb + ) + else: + for block in self.blocks: + hidden_states = block(hidden_states, encoder_hidden_states, timestep_proj, rotary_emb) + + # 5. Output norm, projection & unpatchify + if temb.ndim == 3: + # batch_size, seq_len, inner_dim (wan 2.2 ti2v) + shift, scale = (self.scale_shift_table.unsqueeze(0).to(temb.device) + temb.unsqueeze(2)).chunk(2, dim=2) + shift = shift.squeeze(2) + scale = scale.squeeze(2) + else: + # batch_size, inner_dim + shift, scale = (self.scale_shift_table.to(temb.device) + temb.unsqueeze(1)).chunk(2, dim=1) + + # Move the shift and scale tensors to the same device as hidden_states. + # When using multi-GPU inference via accelerate these will be on the + # first device rather than the last device, which hidden_states ends up + # on. + shift = shift.to(hidden_states.device) + scale = scale.to(hidden_states.device) + + hidden_states = (self.norm_out(hidden_states.float()) * (1 + scale) + shift).type_as(hidden_states) + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, p_t, p_h, p_w, -1 + ) + hidden_states = hidden_states.permute(0, 7, 1, 4, 2, 5, 3, 6) + output = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cogview3plus.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cogview3plus.py new file mode 100644 index 0000000000000000000000000000000000000000..ad6a442acbcc1ef4c92fc8593d2d0f547b0e3f14 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cogview3plus.py @@ -0,0 +1,308 @@ +# Copyright 2025 The CogView team, Tsinghua University & ZhipuAI and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ..attention import AttentionMixin, FeedForward +from ..attention_processor import Attention, CogVideoXAttnProcessor2_0 +from ..embeddings import CogView3CombinedTimestepSizeEmbeddings, CogView3PlusPatchEmbed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, CogView3PlusAdaLayerNormZeroTextImage + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class CogView3PlusTransformerBlock(nn.Module): + r""" + Transformer block used in [CogView](https://github.com/THUDM/CogView3) model. + + Args: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`): + The number of channels in each head. + time_embed_dim (`int`): + The number of channels in timestep embedding. + """ + + def __init__( + self, + dim: int = 2560, + num_attention_heads: int = 64, + attention_head_dim: int = 40, + time_embed_dim: int = 512, + ): + super().__init__() + + self.norm1 = CogView3PlusAdaLayerNormZeroTextImage(embedding_dim=time_embed_dim, dim=dim) + + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + out_dim=dim, + bias=True, + qk_norm="layer_norm", + elementwise_affine=False, + eps=1e-6, + processor=CogVideoXAttnProcessor2_0(), + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + emb: torch.Tensor, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_length = encoder_hidden_states.size(1) + + # norm & modulate + ( + norm_hidden_states, + gate_msa, + shift_mlp, + scale_mlp, + gate_mlp, + norm_encoder_hidden_states, + c_gate_msa, + c_shift_mlp, + c_scale_mlp, + c_gate_mlp, + ) = self.norm1(hidden_states, encoder_hidden_states, emb) + + # attention + attn_hidden_states, attn_encoder_hidden_states = self.attn1( + hidden_states=norm_hidden_states, encoder_hidden_states=norm_encoder_hidden_states + ) + + hidden_states = hidden_states + gate_msa.unsqueeze(1) * attn_hidden_states + encoder_hidden_states = encoder_hidden_states + c_gate_msa.unsqueeze(1) * attn_encoder_hidden_states + + # norm & modulate + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + # feed-forward + norm_hidden_states = torch.cat([norm_encoder_hidden_states, norm_hidden_states], dim=1) + ff_output = self.ff(norm_hidden_states) + + hidden_states = hidden_states + gate_mlp.unsqueeze(1) * ff_output[:, text_seq_length:] + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * ff_output[:, :text_seq_length] + + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + return hidden_states, encoder_hidden_states + + +class CogView3PlusTransformer2DModel(ModelMixin, AttentionMixin, ConfigMixin): + r""" + The Transformer model introduced in [CogView3: Finer and Faster Text-to-Image Generation via Relay + Diffusion](https://huggingface.co/papers/2403.05121). + + Args: + patch_size (`int`, defaults to `2`): + The size of the patches to use in the patch embedding layer. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + num_layers (`int`, defaults to `30`): + The number of layers of Transformer blocks to use. + attention_head_dim (`int`, defaults to `40`): + The number of channels in each head. + num_attention_heads (`int`, defaults to `64`): + The number of heads to use for multi-head attention. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + time_embed_dim (`int`, defaults to `512`): + Output dimension of timestep embeddings. + condition_dim (`int`, defaults to `256`): + The embedding dimension of the input SDXL-style resolution conditions (original_size, target_size, + crop_coords). + pos_embed_max_size (`int`, defaults to `128`): + The maximum resolution of the positional embeddings, from which slices of shape `H x W` are taken and added + to input patched latents, where `H` and `W` are the latent height and width respectively. A value of 128 + means that the maximum supported height and width for image generation is `128 * vae_scale_factor * + patch_size => 128 * 8 * 2 => 2048`. + sample_size (`int`, defaults to `128`): + The base resolution of input latents. If height/width is not provided during generation, this value is used + to determine the resolution as `sample_size * vae_scale_factor => 128 * 8 => 1024` + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["patch_embed", "norm"] + _no_split_modules = ["CogView3PlusTransformerBlock", "CogView3PlusPatchEmbed"] + + @register_to_config + def __init__( + self, + patch_size: int = 2, + in_channels: int = 16, + num_layers: int = 30, + attention_head_dim: int = 40, + num_attention_heads: int = 64, + out_channels: int = 16, + text_embed_dim: int = 4096, + time_embed_dim: int = 512, + condition_dim: int = 256, + pos_embed_max_size: int = 128, + sample_size: int = 128, + ): + super().__init__() + self.out_channels = out_channels + self.inner_dim = num_attention_heads * attention_head_dim + + # CogView3 uses 3 additional SDXL-like conditions - original_size, target_size, crop_coords + # Each of these are sincos embeddings of shape 2 * condition_dim + self.pooled_projection_dim = 3 * 2 * condition_dim + + self.patch_embed = CogView3PlusPatchEmbed( + in_channels=in_channels, + hidden_size=self.inner_dim, + patch_size=patch_size, + text_hidden_size=text_embed_dim, + pos_embed_max_size=pos_embed_max_size, + ) + + self.time_condition_embed = CogView3CombinedTimestepSizeEmbeddings( + embedding_dim=time_embed_dim, + condition_dim=condition_dim, + pooled_projection_dim=self.pooled_projection_dim, + timesteps_dim=self.inner_dim, + ) + + self.transformer_blocks = nn.ModuleList( + [ + CogView3PlusTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + time_embed_dim=time_embed_dim, + ) + for _ in range(num_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous( + embedding_dim=self.inner_dim, + conditioning_embedding_dim=time_embed_dim, + elementwise_affine=False, + eps=1e-6, + ) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: torch.LongTensor, + original_size: torch.Tensor, + target_size: torch.Tensor, + crop_coords: torch.Tensor, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + """ + The [`CogView3PlusTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor`): + Input `hidden_states` of shape `(batch size, channel, height, width)`. + encoder_hidden_states (`torch.Tensor`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) of shape + `(batch_size, sequence_len, text_embed_dim)` + timestep (`torch.LongTensor`): + Used to indicate denoising step. + original_size (`torch.Tensor`): + CogView3 uses SDXL-like micro-conditioning for original image size as explained in section 2.2 of + [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). + target_size (`torch.Tensor`): + CogView3 uses SDXL-like micro-conditioning for target image size as explained in section 2.2 of + [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). + crop_coords (`torch.Tensor`): + CogView3 uses SDXL-like micro-conditioning for crop coordinates as explained in section 2.2 of + [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + `torch.Tensor` or [`~models.transformer_2d.Transformer2DModelOutput`]: + The denoised latents using provided inputs as conditioning. + """ + height, width = hidden_states.shape[-2:] + text_seq_length = encoder_hidden_states.shape[1] + + hidden_states = self.patch_embed( + hidden_states, encoder_hidden_states + ) # takes care of adding positional embeddings too. + emb = self.time_condition_embed(timestep, original_size, target_size, crop_coords, hidden_states.dtype) + + encoder_hidden_states = hidden_states[:, :text_seq_length] + hidden_states = hidden_states[:, text_seq_length:] + + for index_block, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + emb, + ) + else: + hidden_states, encoder_hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + emb=emb, + ) + + hidden_states = self.norm_out(hidden_states, emb) + hidden_states = self.proj_out(hidden_states) # (batch_size, height*width, patch_size*patch_size*out_channels) + + # unpatchify + patch_size = self.config.patch_size + height = height // patch_size + width = width // patch_size + + hidden_states = hidden_states.reshape( + shape=(hidden_states.shape[0], height, width, self.out_channels, patch_size, patch_size) + ) + hidden_states = torch.einsum("nhwcpq->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(hidden_states.shape[0], self.out_channels, height * patch_size, width * patch_size) + ) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cogview4.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cogview4.py new file mode 100644 index 0000000000000000000000000000000000000000..308e0e6cccaf520ab5ad37db4197559e04276ff9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cogview4.py @@ -0,0 +1,764 @@ +# Copyright 2025 The CogView team, Tsinghua University & ZhipuAI and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import FeedForward +from ..attention_processor import Attention +from ..cache_utils import CacheMixin +from ..embeddings import CogView3CombinedTimestepSizeEmbeddings +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import LayerNorm, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class CogView4PatchEmbed(nn.Module): + def __init__( + self, + in_channels: int = 16, + hidden_size: int = 2560, + patch_size: int = 2, + text_hidden_size: int = 4096, + ): + super().__init__() + self.patch_size = patch_size + + self.proj = nn.Linear(in_channels * patch_size**2, hidden_size) + self.text_proj = nn.Linear(text_hidden_size, hidden_size) + + def forward(self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, channel, height, width = hidden_states.shape + post_patch_height = height // self.patch_size + post_patch_width = width // self.patch_size + + hidden_states = hidden_states.reshape( + batch_size, channel, post_patch_height, self.patch_size, post_patch_width, self.patch_size + ) + hidden_states = hidden_states.permute(0, 2, 4, 1, 3, 5).flatten(3, 5).flatten(1, 2) + hidden_states = self.proj(hidden_states) + encoder_hidden_states = self.text_proj(encoder_hidden_states) + + return hidden_states, encoder_hidden_states + + +class CogView4AdaLayerNormZero(nn.Module): + def __init__(self, embedding_dim: int, dim: int) -> None: + super().__init__() + + self.norm = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.norm_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.linear = nn.Linear(embedding_dim, 12 * dim, bias=True) + + def forward( + self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, temb: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + dtype = hidden_states.dtype + norm_hidden_states = self.norm(hidden_states).to(dtype=dtype) + norm_encoder_hidden_states = self.norm_context(encoder_hidden_states).to(dtype=dtype) + + emb = self.linear(temb) + ( + shift_msa, + c_shift_msa, + scale_msa, + c_scale_msa, + gate_msa, + c_gate_msa, + shift_mlp, + c_shift_mlp, + scale_mlp, + c_scale_mlp, + gate_mlp, + c_gate_mlp, + ) = emb.chunk(12, dim=1) + + hidden_states = norm_hidden_states * (1 + scale_msa.unsqueeze(1)) + shift_msa.unsqueeze(1) + encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_msa.unsqueeze(1)) + c_shift_msa.unsqueeze(1) + + return ( + hidden_states, + gate_msa, + shift_mlp, + scale_mlp, + gate_mlp, + encoder_hidden_states, + c_gate_msa, + c_shift_mlp, + c_scale_mlp, + c_gate_mlp, + ) + + +class CogView4AttnProcessor: + """ + Processor for implementing scaled dot-product attention for the CogView4 model. It applies a rotary embedding on + query and key vectors, but does not include spatial normalization. + + The processor supports passing an attention mask for text tokens. The attention mask should have shape (batch_size, + text_seq_length) where 1 indicates a non-padded token and 0 indicates a padded token. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("CogView4AttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + dtype = encoder_hidden_states.dtype + + batch_size, text_seq_length, embed_dim = encoder_hidden_states.shape + batch_size, image_seq_length, embed_dim = hidden_states.shape + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + # 1. QKV projections + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(2, (attn.heads, -1)).transpose(1, 2) + key = key.unflatten(2, (attn.heads, -1)).transpose(1, 2) + value = value.unflatten(2, (attn.heads, -1)).transpose(1, 2) + + # 2. QK normalization + if attn.norm_q is not None: + query = attn.norm_q(query).to(dtype=dtype) + if attn.norm_k is not None: + key = attn.norm_k(key).to(dtype=dtype) + + # 3. Rotational positional embeddings applied to latent stream + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + query[:, :, text_seq_length:, :] = apply_rotary_emb( + query[:, :, text_seq_length:, :], image_rotary_emb, use_real_unbind_dim=-2 + ) + key[:, :, text_seq_length:, :] = apply_rotary_emb( + key[:, :, text_seq_length:, :], image_rotary_emb, use_real_unbind_dim=-2 + ) + + # 4. Attention + if attention_mask is not None: + text_attn_mask = attention_mask + assert text_attn_mask.dim() == 2, "the shape of text_attn_mask should be (batch_size, text_seq_length)" + text_attn_mask = text_attn_mask.float().to(query.device) + mix_attn_mask = torch.ones((batch_size, text_seq_length + image_seq_length), device=query.device) + mix_attn_mask[:, :text_seq_length] = text_attn_mask + mix_attn_mask = mix_attn_mask.unsqueeze(2) + attn_mask_matrix = mix_attn_mask @ mix_attn_mask.transpose(1, 2) + attention_mask = (attn_mask_matrix > 0).unsqueeze(1).to(query.dtype) + + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + hidden_states = hidden_states.transpose(1, 2).flatten(2, 3) + hidden_states = hidden_states.type_as(query) + + # 5. Output projection + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + encoder_hidden_states, hidden_states = hidden_states.split( + [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1 + ) + return hidden_states, encoder_hidden_states + + +class CogView4TrainingAttnProcessor: + """ + Training Processor for implementing scaled dot-product attention for the CogView4 model. It applies a rotary + embedding on query and key vectors, but does not include spatial normalization. + + This processor differs from CogView4AttnProcessor in several important ways: + 1. It supports attention masking with variable sequence lengths for multi-resolution training + 2. It unpacks and repacks sequences for efficient training with variable sequence lengths when batch_flag is + provided + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("CogView4AttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + latent_attn_mask: torch.Tensor | None = None, + text_attn_mask: torch.Tensor | None = None, + batch_flag: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]] | None = None, + **kwargs, + ) -> tuple[torch.Tensor, torch.Tensor]: + """ + Args: + attn (`Attention`): + The attention module. + hidden_states (`torch.Tensor`): + The input hidden states. + encoder_hidden_states (`torch.Tensor`): + The encoder hidden states for cross-attention. + latent_attn_mask (`torch.Tensor`, *optional*): + Mask for latent tokens where 0 indicates pad token and 1 indicates non-pad token. If None, full + attention is used for all latent tokens. Note: the shape of latent_attn_mask is (batch_size, + num_latent_tokens). + text_attn_mask (`torch.Tensor`, *optional*): + Mask for text tokens where 0 indicates pad token and 1 indicates non-pad token. If None, full attention + is used for all text tokens. + batch_flag (`torch.Tensor`, *optional*): + Values from 0 to n-1 indicating which samples belong to the same batch. Samples with the same + batch_flag are packed together. Example: [0, 1, 1, 2, 2] means sample 0 forms batch0, samples 1-2 form + batch1, and samples 3-4 form batch2. If None, no packing is used. + image_rotary_emb (`tuple[torch.Tensor, torch.Tensor]` or `list[tuple[torch.Tensor, torch.Tensor]]`, *optional*): + The rotary embedding for the image part of the input. + Returns: + `tuple[torch.Tensor, torch.Tensor]`: The processed hidden states for both image and text streams. + """ + + # Get dimensions and device info + batch_size, text_seq_length, embed_dim = encoder_hidden_states.shape + batch_size, image_seq_length, embed_dim = hidden_states.shape + dtype = encoder_hidden_states.dtype + device = encoder_hidden_states.device + latent_hidden_states = hidden_states + # Combine text and image streams for joint processing + mixed_hidden_states = torch.cat([encoder_hidden_states, latent_hidden_states], dim=1) + + # 1. Construct attention mask and maybe packing input + # Create default masks if not provided + if text_attn_mask is None: + text_attn_mask = torch.ones((batch_size, text_seq_length), dtype=torch.int32, device=device) + if latent_attn_mask is None: + latent_attn_mask = torch.ones((batch_size, image_seq_length), dtype=torch.int32, device=device) + + # Validate mask shapes and types + assert text_attn_mask.dim() == 2, "the shape of text_attn_mask should be (batch_size, text_seq_length)" + assert text_attn_mask.dtype == torch.int32, "the dtype of text_attn_mask should be torch.int32" + assert latent_attn_mask.dim() == 2, "the shape of latent_attn_mask should be (batch_size, num_latent_tokens)" + assert latent_attn_mask.dtype == torch.int32, "the dtype of latent_attn_mask should be torch.int32" + + # Create combined mask for text and image tokens + mixed_attn_mask = torch.ones( + (batch_size, text_seq_length + image_seq_length), dtype=torch.int32, device=device + ) + mixed_attn_mask[:, :text_seq_length] = text_attn_mask + mixed_attn_mask[:, text_seq_length:] = latent_attn_mask + + # Convert mask to attention matrix format (where 1 means attend, 0 means don't attend) + mixed_attn_mask_input = mixed_attn_mask.unsqueeze(2).to(dtype=dtype) + attn_mask_matrix = mixed_attn_mask_input @ mixed_attn_mask_input.transpose(1, 2) + + # Handle batch packing if enabled + if batch_flag is not None: + assert batch_flag.dim() == 1 + # Determine packed batch size based on batch_flag + packing_batch_size = torch.max(batch_flag).item() + 1 + + # Calculate actual sequence lengths for each sample based on masks + text_seq_length = torch.sum(text_attn_mask, dim=1) + latent_seq_length = torch.sum(latent_attn_mask, dim=1) + mixed_seq_length = text_seq_length + latent_seq_length + + # Calculate packed sequence lengths for each packed batch + mixed_seq_length_packed = [ + torch.sum(mixed_attn_mask[batch_flag == batch_idx]).item() for batch_idx in range(packing_batch_size) + ] + + assert len(mixed_seq_length_packed) == packing_batch_size + + # Pack sequences by removing padding tokens + mixed_attn_mask_flatten = mixed_attn_mask.flatten(0, 1) + mixed_hidden_states_flatten = mixed_hidden_states.flatten(0, 1) + mixed_hidden_states_unpad = mixed_hidden_states_flatten[mixed_attn_mask_flatten == 1] + assert torch.sum(mixed_seq_length) == mixed_hidden_states_unpad.shape[0] + + # Split the unpadded sequence into packed batches + mixed_hidden_states_packed = torch.split(mixed_hidden_states_unpad, mixed_seq_length_packed) + + # Re-pad to create packed batches with right-side padding + mixed_hidden_states_packed_padded = torch.nn.utils.rnn.pad_sequence( + mixed_hidden_states_packed, + batch_first=True, + padding_value=0.0, + padding_side="right", + ) + + # Create attention mask for packed batches + l = mixed_hidden_states_packed_padded.shape[1] + attn_mask_matrix = torch.zeros( + (packing_batch_size, l, l), + dtype=dtype, + device=device, + ) + + # Fill attention mask with block diagonal matrices + # This ensures that tokens can only attend to other tokens within the same original sample + for idx, mask in enumerate(attn_mask_matrix): + seq_lengths = mixed_seq_length[batch_flag == idx] + offset = 0 + for length in seq_lengths: + # Create a block of 1s for each sample in the packed batch + mask[offset : offset + length, offset : offset + length] = 1 + offset += length + + attn_mask_matrix = attn_mask_matrix.to(dtype=torch.bool) + attn_mask_matrix = attn_mask_matrix.unsqueeze(1) # Add attention head dim + attention_mask = attn_mask_matrix + + # Prepare hidden states for attention computation + if batch_flag is None: + # If no packing, just combine text and image tokens + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + else: + # If packing, use the packed sequence + hidden_states = mixed_hidden_states_packed_padded + + # 2. QKV projections - convert hidden states to query, key, value + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + # Reshape for multi-head attention: [batch, seq_len, heads*dim] -> [batch, heads, seq_len, dim] + query = query.unflatten(2, (attn.heads, -1)).transpose(1, 2) + key = key.unflatten(2, (attn.heads, -1)).transpose(1, 2) + value = value.unflatten(2, (attn.heads, -1)).transpose(1, 2) + + # 3. QK normalization - apply layer norm to queries and keys if configured + if attn.norm_q is not None: + query = attn.norm_q(query).to(dtype=dtype) + if attn.norm_k is not None: + key = attn.norm_k(key).to(dtype=dtype) + + # 4. Apply rotary positional embeddings to image tokens only + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + if batch_flag is None: + # Apply RoPE only to image tokens (after text tokens) + query[:, :, text_seq_length:, :] = apply_rotary_emb( + query[:, :, text_seq_length:, :], image_rotary_emb, use_real_unbind_dim=-2 + ) + key[:, :, text_seq_length:, :] = apply_rotary_emb( + key[:, :, text_seq_length:, :], image_rotary_emb, use_real_unbind_dim=-2 + ) + else: + # For packed batches, need to carefully apply RoPE to appropriate tokens + assert query.shape[0] == packing_batch_size + assert key.shape[0] == packing_batch_size + assert len(image_rotary_emb) == batch_size + + rope_idx = 0 + for idx in range(packing_batch_size): + offset = 0 + # Get text and image sequence lengths for samples in this packed batch + text_seq_length_bi = text_seq_length[batch_flag == idx] + latent_seq_length_bi = latent_seq_length[batch_flag == idx] + + # Apply RoPE to each image segment in the packed sequence + for tlen, llen in zip(text_seq_length_bi, latent_seq_length_bi): + mlen = tlen + llen + # Apply RoPE only to image tokens (after text tokens) + query[idx, :, offset + tlen : offset + mlen, :] = apply_rotary_emb( + query[idx, :, offset + tlen : offset + mlen, :], + image_rotary_emb[rope_idx], + use_real_unbind_dim=-2, + ) + key[idx, :, offset + tlen : offset + mlen, :] = apply_rotary_emb( + key[idx, :, offset + tlen : offset + mlen, :], + image_rotary_emb[rope_idx], + use_real_unbind_dim=-2, + ) + offset += mlen + rope_idx += 1 + + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + + # Reshape back: [batch, heads, seq_len, dim] -> [batch, seq_len, heads*dim] + hidden_states = hidden_states.transpose(1, 2).flatten(2, 3) + hidden_states = hidden_states.type_as(query) + + # 5. Output projection - project attention output to model dimension + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + # Split the output back into text and image streams + if batch_flag is None: + # Simple split for non-packed case + encoder_hidden_states, hidden_states = hidden_states.split( + [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1 + ) + else: + # For packed case: need to unpack, split text/image, then restore to original shapes + # First, unpad the sequence based on the packed sequence lengths + hidden_states_unpad = torch.nn.utils.rnn.unpad_sequence( + hidden_states, + lengths=torch.tensor(mixed_seq_length_packed), + batch_first=True, + ) + # Concatenate all unpadded sequences + hidden_states_flatten = torch.cat(hidden_states_unpad, dim=0) + # Split by original sample sequence lengths + hidden_states_unpack = torch.split(hidden_states_flatten, mixed_seq_length.tolist()) + assert len(hidden_states_unpack) == batch_size + + # Further split each sample's sequence into text and image parts + hidden_states_unpack = [ + torch.split(h, [tlen, llen]) + for h, tlen, llen in zip(hidden_states_unpack, text_seq_length, latent_seq_length) + ] + # Separate text and image sequences + encoder_hidden_states_unpad = [h[0] for h in hidden_states_unpack] + hidden_states_unpad = [h[1] for h in hidden_states_unpack] + + # Update the original tensors with the processed values, respecting the attention masks + for idx in range(batch_size): + # Place unpacked text tokens back in the encoder_hidden_states tensor + encoder_hidden_states[idx][text_attn_mask[idx] == 1] = encoder_hidden_states_unpad[idx] + # Place unpacked image tokens back in the latent_hidden_states tensor + latent_hidden_states[idx][latent_attn_mask[idx] == 1] = hidden_states_unpad[idx] + + # Update the output hidden states + hidden_states = latent_hidden_states + + return hidden_states, encoder_hidden_states + + +@maybe_allow_in_graph +class CogView4TransformerBlock(nn.Module): + def __init__( + self, + dim: int = 2560, + num_attention_heads: int = 64, + attention_head_dim: int = 40, + time_embed_dim: int = 512, + ) -> None: + super().__init__() + + # 1. Attention + self.norm1 = CogView4AdaLayerNormZero(time_embed_dim, dim) + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + out_dim=dim, + bias=True, + qk_norm="layer_norm", + elementwise_affine=False, + eps=1e-5, + processor=CogView4AttnProcessor(), + ) + + # 2. Feedforward + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]] | None = None, + attention_mask: dict[str, torch.Tensor] | None = None, + attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + # 1. Timestep conditioning + ( + norm_hidden_states, + gate_msa, + shift_mlp, + scale_mlp, + gate_mlp, + norm_encoder_hidden_states, + c_gate_msa, + c_shift_mlp, + c_scale_mlp, + c_gate_mlp, + ) = self.norm1(hidden_states, encoder_hidden_states, temb) + + # 2. Attention + if attention_kwargs is None: + attention_kwargs = {} + attn_hidden_states, attn_encoder_hidden_states = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + attention_mask=attention_mask, + **attention_kwargs, + ) + hidden_states = hidden_states + attn_hidden_states * gate_msa.unsqueeze(1) + encoder_hidden_states = encoder_hidden_states + attn_encoder_hidden_states * c_gate_msa.unsqueeze(1) + + # 3. Feedforward + norm_hidden_states = self.norm2(hidden_states) * (1 + scale_mlp.unsqueeze(1)) + shift_mlp.unsqueeze(1) + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) * ( + 1 + c_scale_mlp.unsqueeze(1) + ) + c_shift_mlp.unsqueeze(1) + + ff_output = self.ff(norm_hidden_states) + ff_output_context = self.ff(norm_encoder_hidden_states) + hidden_states = hidden_states + ff_output * gate_mlp.unsqueeze(1) + encoder_hidden_states = encoder_hidden_states + ff_output_context * c_gate_mlp.unsqueeze(1) + + return hidden_states, encoder_hidden_states + + +class CogView4RotaryPosEmbed(nn.Module): + def __init__(self, dim: int, patch_size: int, rope_axes_dim: tuple[int, int], theta: float = 10000.0) -> None: + super().__init__() + + self.dim = dim + self.patch_size = patch_size + self.rope_axes_dim = rope_axes_dim + self.theta = theta + + def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + batch_size, num_channels, height, width = hidden_states.shape + height, width = height // self.patch_size, width // self.patch_size + + dim_h, dim_w = self.dim // 2, self.dim // 2 + h_inv_freq = 1.0 / ( + self.theta ** (torch.arange(0, dim_h, 2, dtype=torch.float32)[: (dim_h // 2)].float() / dim_h) + ) + w_inv_freq = 1.0 / ( + self.theta ** (torch.arange(0, dim_w, 2, dtype=torch.float32)[: (dim_w // 2)].float() / dim_w) + ) + h_seq = torch.arange(self.rope_axes_dim[0]) + w_seq = torch.arange(self.rope_axes_dim[1]) + freqs_h = torch.outer(h_seq, h_inv_freq) + freqs_w = torch.outer(w_seq, w_inv_freq) + + h_idx = torch.arange(height, device=freqs_h.device) + w_idx = torch.arange(width, device=freqs_w.device) + inner_h_idx = h_idx * self.rope_axes_dim[0] // height + inner_w_idx = w_idx * self.rope_axes_dim[1] // width + + freqs_h = freqs_h[inner_h_idx] + freqs_w = freqs_w[inner_w_idx] + + # Create position matrices for height and width + # [height, 1, dim//4] and [1, width, dim//4] + freqs_h = freqs_h.unsqueeze(1) + freqs_w = freqs_w.unsqueeze(0) + # Broadcast freqs_h and freqs_w to [height, width, dim//4] + freqs_h = freqs_h.expand(height, width, -1) + freqs_w = freqs_w.expand(height, width, -1) + + # Concatenate along last dimension to get [height, width, dim//2] + freqs = torch.cat([freqs_h, freqs_w], dim=-1) + freqs = torch.cat([freqs, freqs], dim=-1) # [height, width, dim] + freqs = freqs.reshape(height * width, -1) + return (freqs.cos(), freqs.sin()) + + +class CogView4AdaLayerNormContinuous(nn.Module): + """ + CogView4-only final AdaLN: LN(x) -> Linear(cond) -> chunk -> affine. Matches Megatron: **no activation** before the + Linear on conditioning embedding. + """ + + def __init__( + self, + embedding_dim: int, + conditioning_embedding_dim: int, + elementwise_affine: bool = True, + eps: float = 1e-5, + bias: bool = True, + norm_type: str = "layer_norm", + ): + super().__init__() + self.linear = nn.Linear(conditioning_embedding_dim, embedding_dim * 2, bias=bias) + if norm_type == "layer_norm": + self.norm = LayerNorm(embedding_dim, eps, elementwise_affine, bias) + elif norm_type == "rms_norm": + self.norm = RMSNorm(embedding_dim, eps, elementwise_affine) + else: + raise ValueError(f"unknown norm_type {norm_type}") + + def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor: + # *** NO SiLU here *** + emb = self.linear(conditioning_embedding.to(x.dtype)) + scale, shift = torch.chunk(emb, 2, dim=1) + x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :] + return x + + +class CogView4Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, CacheMixin): + r""" + Args: + patch_size (`int`, defaults to `2`): + The size of the patches to use in the patch embedding layer. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + num_layers (`int`, defaults to `30`): + The number of layers of Transformer blocks to use. + attention_head_dim (`int`, defaults to `40`): + The number of channels in each head. + num_attention_heads (`int`, defaults to `64`): + The number of heads to use for multi-head attention. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + time_embed_dim (`int`, defaults to `512`): + Output dimension of timestep embeddings. + condition_dim (`int`, defaults to `256`): + The embedding dimension of the input SDXL-style resolution conditions (original_size, target_size, + crop_coords). + pos_embed_max_size (`int`, defaults to `128`): + The maximum resolution of the positional embeddings, from which slices of shape `H x W` are taken and added + to input patched latents, where `H` and `W` are the latent height and width respectively. A value of 128 + means that the maximum supported height and width for image generation is `128 * vae_scale_factor * + patch_size => 128 * 8 * 2 => 2048`. + sample_size (`int`, defaults to `128`): + The base resolution of input latents. If height/width is not provided during generation, this value is used + to determine the resolution as `sample_size * vae_scale_factor => 128 * 8 => 1024` + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["CogView4TransformerBlock", "CogView4PatchEmbed", "CogView4PatchEmbed"] + _skip_layerwise_casting_patterns = ["patch_embed", "norm", "proj_out"] + + @register_to_config + def __init__( + self, + patch_size: int = 2, + in_channels: int = 16, + out_channels: int = 16, + num_layers: int = 30, + attention_head_dim: int = 40, + num_attention_heads: int = 64, + text_embed_dim: int = 4096, + time_embed_dim: int = 512, + condition_dim: int = 256, + pos_embed_max_size: int = 128, + sample_size: int = 128, + rope_axes_dim: tuple[int, int] = (256, 256), + ): + super().__init__() + + # CogView4 uses 3 additional SDXL-like conditions - original_size, target_size, crop_coords + # Each of these are sincos embeddings of shape 2 * condition_dim + pooled_projection_dim = 3 * 2 * condition_dim + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels + + # 1. RoPE + self.rope = CogView4RotaryPosEmbed(attention_head_dim, patch_size, rope_axes_dim, theta=10000.0) + + # 2. Patch & Text-timestep embedding + self.patch_embed = CogView4PatchEmbed(in_channels, inner_dim, patch_size, text_embed_dim) + + self.time_condition_embed = CogView3CombinedTimestepSizeEmbeddings( + embedding_dim=time_embed_dim, + condition_dim=condition_dim, + pooled_projection_dim=pooled_projection_dim, + timesteps_dim=inner_dim, + ) + + # 3. Transformer blocks + self.transformer_blocks = nn.ModuleList( + [ + CogView4TransformerBlock(inner_dim, num_attention_heads, attention_head_dim, time_embed_dim) + for _ in range(num_layers) + ] + ) + + # 4. Output projection + self.norm_out = CogView4AdaLayerNormContinuous(inner_dim, time_embed_dim, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, patch_size * patch_size * out_channels, bias=True) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: torch.LongTensor, + original_size: torch.Tensor, + target_size: torch.Tensor, + crop_coords: torch.Tensor, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]] | None = None, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + batch_size, num_channels, height, width = hidden_states.shape + + # 1. RoPE + if image_rotary_emb is None: + image_rotary_emb = self.rope(hidden_states) + + # 2. Patch & Timestep embeddings + p = self.config.patch_size + post_patch_height = height // p + post_patch_width = width // p + + hidden_states, encoder_hidden_states = self.patch_embed(hidden_states, encoder_hidden_states) + + temb = self.time_condition_embed(timestep, original_size, target_size, crop_coords, hidden_states.dtype) + temb = F.silu(temb) + + # 3. Transformer blocks + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + attention_mask, + attention_kwargs, + ) + else: + hidden_states, encoder_hidden_states = block( + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + attention_mask, + attention_kwargs, + ) + + # 4. Output norm & projection + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + # 5. Unpatchify + hidden_states = hidden_states.reshape(batch_size, post_patch_height, post_patch_width, -1, p, p) + output = hidden_states.permute(0, 3, 1, 4, 2, 5).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cosmos.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cosmos.py new file mode 100644 index 0000000000000000000000000000000000000000..46746a19a6787a060d98c22abaf70a73c6cfda2b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_cosmos.py @@ -0,0 +1,812 @@ +# Copyright 2025 The NVIDIA Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin +from ...utils import is_torchvision_available +from ..attention import FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..attention_processor import Attention +from ..embeddings import Timesteps +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import RMSNorm + + +if is_torchvision_available(): + from torchvision import transforms + + +class CosmosPatchEmbed(nn.Module): + def __init__( + self, in_channels: int, out_channels: int, patch_size: tuple[int, int, int], bias: bool = True + ) -> None: + super().__init__() + self.patch_size = patch_size + + self.proj = nn.Linear(in_channels * patch_size[0] * patch_size[1] * patch_size[2], out_channels, bias=bias) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.patch_size + hidden_states = hidden_states.reshape( + batch_size, num_channels, num_frames // p_t, p_t, height // p_h, p_h, width // p_w, p_w + ) + hidden_states = hidden_states.permute(0, 2, 4, 6, 1, 3, 5, 7).flatten(4, 7) + hidden_states = self.proj(hidden_states) + return hidden_states + + +class CosmosTimestepEmbedding(nn.Module): + def __init__(self, in_features: int, out_features: int) -> None: + super().__init__() + self.linear_1 = nn.Linear(in_features, out_features, bias=False) + self.activation = nn.SiLU() + self.linear_2 = nn.Linear(out_features, 3 * out_features, bias=False) + + def forward(self, timesteps: torch.Tensor) -> torch.Tensor: + emb = self.linear_1(timesteps) + emb = self.activation(emb) + emb = self.linear_2(emb) + return emb + + +class CosmosEmbedding(nn.Module): + def __init__(self, embedding_dim: int, condition_dim: int) -> None: + super().__init__() + + self.time_proj = Timesteps(embedding_dim, flip_sin_to_cos=True, downscale_freq_shift=0.0) + self.t_embedder = CosmosTimestepEmbedding(embedding_dim, condition_dim) + self.norm = RMSNorm(embedding_dim, eps=1e-6, elementwise_affine=True) + + def forward(self, hidden_states: torch.Tensor, timestep: torch.LongTensor) -> torch.Tensor: + timesteps_proj = self.time_proj(timestep).type_as(hidden_states) + temb = self.t_embedder(timesteps_proj) + embedded_timestep = self.norm(timesteps_proj) + return temb, embedded_timestep + + +class CosmosAdaLayerNorm(nn.Module): + def __init__(self, in_features: int, hidden_features: int) -> None: + super().__init__() + self.embedding_dim = in_features + + self.activation = nn.SiLU() + self.norm = nn.LayerNorm(in_features, elementwise_affine=False, eps=1e-6) + self.linear_1 = nn.Linear(in_features, hidden_features, bias=False) + self.linear_2 = nn.Linear(hidden_features, 2 * in_features, bias=False) + + def forward( + self, hidden_states: torch.Tensor, embedded_timestep: torch.Tensor, temb: torch.Tensor | None = None + ) -> torch.Tensor: + embedded_timestep = self.activation(embedded_timestep) + embedded_timestep = self.linear_1(embedded_timestep) + embedded_timestep = self.linear_2(embedded_timestep) + + if temb is not None: + embedded_timestep = embedded_timestep + temb[..., : 2 * self.embedding_dim] + + shift, scale = embedded_timestep.chunk(2, dim=-1) + hidden_states = self.norm(hidden_states) + + if embedded_timestep.ndim == 2: + shift, scale = (x.unsqueeze(1) for x in (shift, scale)) + + hidden_states = hidden_states * (1 + scale) + shift + return hidden_states + + +class CosmosAdaLayerNormZero(nn.Module): + def __init__(self, in_features: int, hidden_features: int | None = None) -> None: + super().__init__() + + self.norm = nn.LayerNorm(in_features, elementwise_affine=False, eps=1e-6) + self.activation = nn.SiLU() + + if hidden_features is None: + self.linear_1 = nn.Identity() + else: + self.linear_1 = nn.Linear(in_features, hidden_features, bias=False) + + self.linear_2 = nn.Linear(hidden_features, 3 * in_features, bias=False) + + def forward( + self, + hidden_states: torch.Tensor, + embedded_timestep: torch.Tensor, + temb: torch.Tensor | None = None, + ) -> torch.Tensor: + embedded_timestep = self.activation(embedded_timestep) + embedded_timestep = self.linear_1(embedded_timestep) + embedded_timestep = self.linear_2(embedded_timestep) + + if temb is not None: + embedded_timestep = embedded_timestep + temb + + shift, scale, gate = embedded_timestep.chunk(3, dim=-1) + hidden_states = self.norm(hidden_states) + + if embedded_timestep.ndim == 2: + shift, scale, gate = (x.unsqueeze(1) for x in (shift, scale, gate)) + + hidden_states = hidden_states * (1 + scale) + shift + return hidden_states, gate + + +class CosmosAttnProcessor2_0: + def __init__(self): + if not hasattr(torch.nn.functional, "scaled_dot_product_attention"): + raise ImportError("CosmosAttnProcessor2_0 requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + # 1. QKV projections + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query = query.unflatten(2, (attn.heads, -1)).transpose(1, 2) + key = key.unflatten(2, (attn.heads, -1)).transpose(1, 2) + value = value.unflatten(2, (attn.heads, -1)).transpose(1, 2) + + # 2. QK normalization + query = attn.norm_q(query) + key = attn.norm_k(key) + + # 3. Apply RoPE + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + query = apply_rotary_emb(query, image_rotary_emb, use_real=True, use_real_unbind_dim=-2) + key = apply_rotary_emb(key, image_rotary_emb, use_real=True, use_real_unbind_dim=-2) + + # 4. Prepare for GQA + if torch.onnx.is_in_onnx_export(): + query_idx = torch.tensor(query.size(3), device=query.device) + key_idx = torch.tensor(key.size(3), device=key.device) + value_idx = torch.tensor(value.size(3), device=value.device) + else: + query_idx = query.size(3) + key_idx = key.size(3) + value_idx = value.size(3) + key = key.repeat_interleave(query_idx // key_idx, dim=3) + value = value.repeat_interleave(query_idx // value_idx, dim=3) + + # 5. Attention + hidden_states = dispatch_attention_fn( + query.transpose(1, 2), + key.transpose(1, 2), + value.transpose(1, 2), + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + ) + hidden_states = hidden_states.flatten(2, 3).type_as(query) + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states + + +class CosmosAttnProcessor2_5: + def __init__(self): + if not hasattr(torch.nn.functional, "scaled_dot_product_attention"): + raise ImportError("CosmosAttnProcessor2_5 requires PyTorch 2.0. Please upgrade PyTorch to 2.0 or newer.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: tuple[torch.Tensor, torch.Tensor], + attention_mask: tuple[torch.Tensor, torch.Tensor], + image_rotary_emb=None, + ) -> torch.Tensor: + if not isinstance(encoder_hidden_states, tuple): + raise ValueError("Expected encoder_hidden_states as (text_context, img_context) tuple.") + + text_context, img_context = encoder_hidden_states if encoder_hidden_states else (None, None) + text_mask, img_mask = attention_mask if attention_mask else (None, None) + + if text_context is None: + text_context = hidden_states + + query = attn.to_q(hidden_states) + key = attn.to_k(text_context) + value = attn.to_v(text_context) + + query = query.unflatten(2, (attn.heads, -1)).transpose(1, 2) + key = key.unflatten(2, (attn.heads, -1)).transpose(1, 2) + value = value.unflatten(2, (attn.heads, -1)).transpose(1, 2) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + query = apply_rotary_emb(query, image_rotary_emb, use_real=True, use_real_unbind_dim=-2) + key = apply_rotary_emb(key, image_rotary_emb, use_real=True, use_real_unbind_dim=-2) + + if torch.onnx.is_in_onnx_export(): + query_idx = torch.tensor(query.size(3), device=query.device) + key_idx = torch.tensor(key.size(3), device=key.device) + value_idx = torch.tensor(value.size(3), device=value.device) + else: + query_idx = query.size(3) + key_idx = key.size(3) + value_idx = value.size(3) + key = key.repeat_interleave(query_idx // key_idx, dim=3) + value = value.repeat_interleave(query_idx // value_idx, dim=3) + + attn_out = dispatch_attention_fn( + query.transpose(1, 2), + key.transpose(1, 2), + value.transpose(1, 2), + attn_mask=text_mask, + dropout_p=0.0, + is_causal=False, + ) + attn_out = attn_out.flatten(2, 3).type_as(query) + + if img_context is not None: + q_img = attn.q_img(hidden_states) + k_img = attn.k_img(img_context) + v_img = attn.v_img(img_context) + + batch_size = hidden_states.shape[0] + dim_head = attn.out_dim // attn.heads + + q_img = q_img.view(batch_size, -1, attn.heads, dim_head).transpose(1, 2) + k_img = k_img.view(batch_size, -1, attn.heads, dim_head).transpose(1, 2) + v_img = v_img.view(batch_size, -1, attn.heads, dim_head).transpose(1, 2) + + q_img = attn.q_img_norm(q_img) + k_img = attn.k_img_norm(k_img) + + q_img_idx = q_img.size(3) + k_img_idx = k_img.size(3) + v_img_idx = v_img.size(3) + k_img = k_img.repeat_interleave(q_img_idx // k_img_idx, dim=3) + v_img = v_img.repeat_interleave(q_img_idx // v_img_idx, dim=3) + + img_out = dispatch_attention_fn( + q_img.transpose(1, 2), + k_img.transpose(1, 2), + v_img.transpose(1, 2), + attn_mask=img_mask, + dropout_p=0.0, + is_causal=False, + ) + img_out = img_out.flatten(2, 3).type_as(q_img) + hidden_states = attn_out + img_out + else: + hidden_states = attn_out + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +class CosmosAttention(Attention): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # add parameters for image q/k/v + inner_dim = self.heads * self.to_q.out_features // self.heads + self.q_img = nn.Linear(self.query_dim, inner_dim, bias=False) + self.k_img = nn.Linear(self.query_dim, inner_dim, bias=False) + self.v_img = nn.Linear(self.query_dim, inner_dim, bias=False) + self.q_img_norm = RMSNorm(self.to_q.out_features // self.heads, eps=1e-6, elementwise_affine=True) + self.k_img_norm = RMSNorm(self.to_k.out_features // self.heads, eps=1e-6, elementwise_affine=True) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: tuple[torch.Tensor, torch.Tensor], + attention_mask: torch.Tensor | None = None, + **cross_attention_kwargs, + ) -> torch.Tensor: + return super().forward( + hidden_states=hidden_states, + # NOTE: type-hint in base class can be ignored + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + **cross_attention_kwargs, + ) + + +class CosmosTransformerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + cross_attention_dim: int, + mlp_ratio: float = 4.0, + adaln_lora_dim: int = 256, + qk_norm: str = "rms_norm", + out_bias: bool = False, + img_context: bool = False, + before_proj: bool = False, + after_proj: bool = False, + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.norm1 = CosmosAdaLayerNormZero(in_features=hidden_size, hidden_features=adaln_lora_dim) + self.img_context = img_context + self.attn1 = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + heads=num_attention_heads, + dim_head=attention_head_dim, + qk_norm=qk_norm, + elementwise_affine=True, + out_bias=out_bias, + processor=CosmosAttnProcessor2_0(), + ) + + self.norm2 = CosmosAdaLayerNormZero(in_features=hidden_size, hidden_features=adaln_lora_dim) + if img_context: + self.attn2 = CosmosAttention( + query_dim=hidden_size, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + qk_norm=qk_norm, + elementwise_affine=True, + out_bias=out_bias, + processor=CosmosAttnProcessor2_5(), + ) + else: + self.attn2 = Attention( + query_dim=hidden_size, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + qk_norm=qk_norm, + elementwise_affine=True, + out_bias=out_bias, + processor=CosmosAttnProcessor2_0(), + ) + + self.norm3 = CosmosAdaLayerNormZero(in_features=hidden_size, hidden_features=adaln_lora_dim) + self.ff = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu", bias=out_bias) + + # NOTE: zero conv for CosmosControlNet + self.before_proj = None + self.after_proj = None + if before_proj: + self.before_proj = nn.Linear(hidden_size, hidden_size) + if after_proj: + self.after_proj = nn.Linear(hidden_size, hidden_size) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None | tuple[torch.Tensor | None, torch.Tensor | None], + embedded_timestep: torch.Tensor, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + extra_pos_emb: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + controlnet_residual: torch.Tensor | None = None, + latents: torch.Tensor | None = None, + block_idx: int | None = None, + ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: + if self.before_proj is not None: + hidden_states = self.before_proj(hidden_states) + latents + + if extra_pos_emb is not None: + hidden_states = hidden_states + extra_pos_emb + + # 1. Self Attention + norm_hidden_states, gate = self.norm1(hidden_states, embedded_timestep, temb) + attn_output = self.attn1(norm_hidden_states, image_rotary_emb=image_rotary_emb) + hidden_states = hidden_states + gate * attn_output + + # 2. Cross Attention + norm_hidden_states, gate = self.norm2(hidden_states, embedded_timestep, temb) + attn_output = self.attn2( + norm_hidden_states, encoder_hidden_states=encoder_hidden_states, attention_mask=attention_mask + ) + hidden_states = hidden_states + gate * attn_output + + # 3. Feed Forward + norm_hidden_states, gate = self.norm3(hidden_states, embedded_timestep, temb) + ff_output = self.ff(norm_hidden_states) + hidden_states = hidden_states + gate * ff_output + + if controlnet_residual is not None: + assert self.after_proj is None + # NOTE: this is assumed to be scaled by the controlnet + hidden_states += controlnet_residual + + if self.after_proj is not None: + assert controlnet_residual is None + hs_proj = self.after_proj(hidden_states) + return hidden_states, hs_proj + + return hidden_states + + +class CosmosRotaryPosEmbed(nn.Module): + def __init__( + self, + hidden_size: int, + max_size: tuple[int, int, int] = (128, 240, 240), + patch_size: tuple[int, int, int] = (1, 2, 2), + base_fps: int = 24, + rope_scale: tuple[float, float, float] = (2.0, 1.0, 1.0), + ) -> None: + super().__init__() + + self.max_size = [size // patch for size, patch in zip(max_size, patch_size)] + self.patch_size = patch_size + self.base_fps = base_fps + + self.dim_h = hidden_size // 6 * 2 + self.dim_w = hidden_size // 6 * 2 + self.dim_t = hidden_size - self.dim_h - self.dim_w + + self.h_ntk_factor = rope_scale[1] ** (self.dim_h / (self.dim_h - 2)) + self.w_ntk_factor = rope_scale[2] ** (self.dim_w / (self.dim_w - 2)) + self.t_ntk_factor = rope_scale[0] ** (self.dim_t / (self.dim_t - 2)) + + def forward(self, hidden_states: torch.Tensor, fps: int | None = None) -> tuple[torch.Tensor, torch.Tensor]: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + pe_size = [num_frames // self.patch_size[0], height // self.patch_size[1], width // self.patch_size[2]] + device = hidden_states.device + + h_theta = 10000.0 * self.h_ntk_factor + w_theta = 10000.0 * self.w_ntk_factor + t_theta = 10000.0 * self.t_ntk_factor + + seq = torch.arange(max(self.max_size), device=device, dtype=torch.float32) + dim_h_range = ( + torch.arange(0, self.dim_h, 2, device=device, dtype=torch.float32)[: (self.dim_h // 2)] / self.dim_h + ) + dim_w_range = ( + torch.arange(0, self.dim_w, 2, device=device, dtype=torch.float32)[: (self.dim_w // 2)] / self.dim_w + ) + dim_t_range = ( + torch.arange(0, self.dim_t, 2, device=device, dtype=torch.float32)[: (self.dim_t // 2)] / self.dim_t + ) + h_spatial_freqs = 1.0 / (h_theta**dim_h_range) + w_spatial_freqs = 1.0 / (w_theta**dim_w_range) + temporal_freqs = 1.0 / (t_theta**dim_t_range) + + emb_h = torch.outer(seq[: pe_size[1]], h_spatial_freqs)[None, :, None, :].repeat(pe_size[0], 1, pe_size[2], 1) + emb_w = torch.outer(seq[: pe_size[2]], w_spatial_freqs)[None, None, :, :].repeat(pe_size[0], pe_size[1], 1, 1) + + # Apply sequence scaling in temporal dimension + if fps is None: + # Images + emb_t = torch.outer(seq[: pe_size[0]], temporal_freqs) + else: + # Videos + emb_t = torch.outer(seq[: pe_size[0]] / fps * self.base_fps, temporal_freqs) + + emb_t = emb_t[:, None, None, :].repeat(1, pe_size[1], pe_size[2], 1) + freqs = torch.cat([emb_t, emb_h, emb_w] * 2, dim=-1).flatten(0, 2).float() + cos = torch.cos(freqs) + sin = torch.sin(freqs) + return cos, sin + + +class CosmosLearnablePositionalEmbed(nn.Module): + def __init__( + self, + hidden_size: int, + max_size: tuple[int, int, int], + patch_size: tuple[int, int, int], + eps: float = 1e-6, + ) -> None: + super().__init__() + + self.max_size = [size // patch for size, patch in zip(max_size, patch_size)] + self.patch_size = patch_size + self.eps = eps + + self.pos_emb_t = nn.Parameter(torch.zeros(self.max_size[0], hidden_size)) + self.pos_emb_h = nn.Parameter(torch.zeros(self.max_size[1], hidden_size)) + self.pos_emb_w = nn.Parameter(torch.zeros(self.max_size[2], hidden_size)) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + pe_size = [num_frames // self.patch_size[0], height // self.patch_size[1], width // self.patch_size[2]] + + emb_t = self.pos_emb_t[: pe_size[0]][None, :, None, None, :].repeat(batch_size, 1, pe_size[1], pe_size[2], 1) + emb_h = self.pos_emb_h[: pe_size[1]][None, None, :, None, :].repeat(batch_size, pe_size[0], 1, pe_size[2], 1) + emb_w = self.pos_emb_w[: pe_size[2]][None, None, None, :, :].repeat(batch_size, pe_size[0], pe_size[1], 1, 1) + emb = emb_t + emb_h + emb_w + emb = emb.flatten(1, 3) + + norm = torch.linalg.vector_norm(emb, dim=-1, keepdim=True, dtype=torch.float32) + norm = torch.add(self.eps, norm, alpha=np.sqrt(norm.numel() / emb.numel())) + return (emb / norm).type_as(hidden_states) + + +class CosmosTransformer3DModel(ModelMixin, ConfigMixin, FromOriginalModelMixin): + r""" + A Transformer model for video-like data used in [Cosmos](https://github.com/NVIDIA/Cosmos). + + Args: + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + num_attention_heads (`int`, defaults to `32`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each attention head. + num_layers (`int`, defaults to `28`): + The number of layers of transformer blocks to use. + mlp_ratio (`float`, defaults to `4.0`): + The ratio of the hidden layer size to the input size in the feedforward network. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + adaln_lora_dim (`int`, defaults to `256`): + The hidden dimension of the Adaptive LayerNorm LoRA layer. + max_size (`tuple[int, int, int]`, defaults to `(128, 240, 240)`): + The maximum size of the input latent tensors in the temporal, height, and width dimensions. + patch_size (`tuple[int, int, int]`, defaults to `(1, 2, 2)`): + The patch size to use for patchifying the input latent tensors in the temporal, height, and width + dimensions. + rope_scale (`tuple[float, float, float]`, defaults to `(2.0, 1.0, 1.0)`): + The scaling factor to use for RoPE in the temporal, height, and width dimensions. + concat_padding_mask (`bool`, defaults to `True`): + Whether to concatenate the padding mask to the input latent tensors. + extra_pos_embed_type (`str`, *optional*, defaults to `learnable`): + The type of extra positional embeddings to use. Can be one of `None` or `learnable`. + controlnet_block_every_n (`int`, *optional*): + Interval between transformer blocks that should receive control residuals (for example, `7` to inject after + every seventh block). Required for Cosmos Transfer2.5. + img_context_dim_in (`int`, *optional*): + The dimension of the input image context feature vector, i.e. it is the D in [B, N, D]. + img_context_num_tokens (`int`): + The number of tokens in the image context feature vector, i.e. it is the N in [B, N, D]. If + `img_context_dim_in` is not provided, then this parameter is ignored. + img_context_dim_out (`int`): + The output dimension of the image context projection layer. If `img_context_dim_in` is not provided, then + this parameter is ignored. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["patch_embed", "final_layer", "norm"] + _no_split_modules = ["CosmosTransformerBlock"] + _keep_in_fp32_modules = ["learnable_pos_embed"] + + @register_to_config + def __init__( + self, + in_channels: int = 16, + out_channels: int = 16, + num_attention_heads: int = 32, + attention_head_dim: int = 128, + num_layers: int = 28, + mlp_ratio: float = 4.0, + text_embed_dim: int = 1024, + adaln_lora_dim: int = 256, + max_size: tuple[int, int, int] = (128, 240, 240), + patch_size: tuple[int, int, int] = (1, 2, 2), + rope_scale: tuple[float, float, float] = (2.0, 1.0, 1.0), + concat_padding_mask: bool = True, + extra_pos_embed_type: str | None = "learnable", + use_crossattn_projection: bool = False, + crossattn_proj_in_channels: int = 1024, + encoder_hidden_states_channels: int = 1024, + controlnet_block_every_n: int | None = None, + img_context_dim_in: int | None = None, + img_context_num_tokens: int = 256, + img_context_dim_out: int = 2048, + ) -> None: + super().__init__() + hidden_size = num_attention_heads * attention_head_dim + + # 1. Patch Embedding + patch_embed_in_channels = in_channels + 1 if concat_padding_mask else in_channels + self.patch_embed = CosmosPatchEmbed(patch_embed_in_channels, hidden_size, patch_size, bias=False) + + # 2. Positional Embedding + self.rope = CosmosRotaryPosEmbed( + hidden_size=attention_head_dim, max_size=max_size, patch_size=patch_size, rope_scale=rope_scale + ) + + self.learnable_pos_embed = None + if extra_pos_embed_type == "learnable": + self.learnable_pos_embed = CosmosLearnablePositionalEmbed( + hidden_size=hidden_size, + max_size=max_size, + patch_size=patch_size, + ) + + # 3. Time Embedding + self.time_embed = CosmosEmbedding(hidden_size, hidden_size) + + # 4. Transformer Blocks + self.transformer_blocks = nn.ModuleList( + [ + CosmosTransformerBlock( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + cross_attention_dim=text_embed_dim, + mlp_ratio=mlp_ratio, + adaln_lora_dim=adaln_lora_dim, + qk_norm="rms_norm", + out_bias=False, + img_context=self.config.img_context_dim_in is not None and self.config.img_context_dim_in > 0, + ) + for _ in range(num_layers) + ] + ) + + # 5. Output norm & projection + self.norm_out = CosmosAdaLayerNorm(hidden_size, adaln_lora_dim) + self.proj_out = nn.Linear( + hidden_size, patch_size[0] * patch_size[1] * patch_size[2] * out_channels, bias=False + ) + + if self.config.use_crossattn_projection: + self.crossattn_proj = nn.Sequential( + nn.Linear(crossattn_proj_in_channels, encoder_hidden_states_channels, bias=True), + nn.GELU(), + ) + + self.gradient_checkpointing = False + + if self.config.img_context_dim_in: + self.img_context_proj = nn.Sequential( + nn.Linear(self.config.img_context_dim_in, self.config.img_context_dim_out, bias=True), + nn.GELU(), + ) + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + block_controlnet_hidden_states: list[torch.Tensor] | None = None, + attention_mask: torch.Tensor | None = None, + fps: int | None = None, + condition_mask: torch.Tensor | None = None, + padding_mask: torch.Tensor | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + + # 1. Concatenate padding mask if needed & prepare attention mask + if condition_mask is not None: + hidden_states = torch.cat([hidden_states, condition_mask], dim=1) + + if self.config.concat_padding_mask: + padding_mask_resized = transforms.functional.resize( + padding_mask, list(hidden_states.shape[-2:]), interpolation=transforms.InterpolationMode.NEAREST + ) + hidden_states = torch.cat( + [hidden_states, padding_mask_resized.unsqueeze(2).repeat(batch_size, 1, num_frames, 1, 1)], dim=1 + ) + + if attention_mask is not None: + attention_mask = attention_mask.unsqueeze(1).unsqueeze(1) # [B, 1, 1, S] + + # 2. Generate positional embeddings + image_rotary_emb = self.rope(hidden_states, fps=fps) + extra_pos_emb = self.learnable_pos_embed(hidden_states) if self.config.extra_pos_embed_type else None + + # 3. Patchify input + p_t, p_h, p_w = self.config.patch_size + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p_h + post_patch_width = width // p_w + + hidden_states = self.patch_embed(hidden_states) + hidden_states = hidden_states.flatten(1, 3) # [B, T, H, W, C] -> [B, THW, C] + + # 4. Timestep embeddings + if timestep.ndim == 1: + temb, embedded_timestep = self.time_embed(hidden_states, timestep) + elif timestep.ndim == 5: + assert timestep.shape == (batch_size, 1, num_frames, 1, 1), ( + f"Expected timestep to have shape [B, 1, T, 1, 1], but got {timestep.shape}" + ) + timestep = timestep.flatten() + temb, embedded_timestep = self.time_embed(hidden_states, timestep) + # We can do this because num_frames == post_patch_num_frames, as p_t is 1 + temb, embedded_timestep = ( + x.view(batch_size, post_patch_num_frames, 1, 1, -1) + .expand(-1, -1, post_patch_height, post_patch_width, -1) + .flatten(1, 3) + for x in (temb, embedded_timestep) + ) # [BT, C] -> [B, T, 1, 1, C] -> [B, T, H, W, C] -> [B, THW, C] + else: + raise ValueError(f"Expected timestep to have shape [B, 1, T, 1, 1] or [T], but got {timestep.shape}") + + # 5. Process encoder hidden states + text_context, img_context = ( + encoder_hidden_states if isinstance(encoder_hidden_states, tuple) else (encoder_hidden_states, None) + ) + if self.config.use_crossattn_projection: + text_context = self.crossattn_proj(text_context) + + if img_context is not None and self.config.img_context_dim_in: + img_context = self.img_context_proj(img_context) + + processed_encoder_hidden_states = ( + (text_context, img_context) if isinstance(encoder_hidden_states, tuple) else text_context + ) + + # 6. Build controlnet block index map + controlnet_block_index_map = {} + if block_controlnet_hidden_states is not None: + n_blocks = len(self.transformer_blocks) + controlnet_block_index_map = { + block_idx: block_controlnet_hidden_states[idx] + for idx, block_idx in list(enumerate(range(0, n_blocks, self.config.controlnet_block_every_n))) + } + + # 7. Transformer blocks + for block_idx, block in enumerate(self.transformer_blocks): + controlnet_residual = controlnet_block_index_map.get(block_idx) + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + processed_encoder_hidden_states, + embedded_timestep, + temb, + image_rotary_emb, + extra_pos_emb, + attention_mask, + controlnet_residual, + ) + else: + hidden_states = block( + hidden_states, + processed_encoder_hidden_states, + embedded_timestep, + temb, + image_rotary_emb, + extra_pos_emb, + attention_mask, + controlnet_residual, + ) + + # 8. Output norm & projection & unpatchify + hidden_states = self.norm_out(hidden_states, embedded_timestep, temb) + hidden_states = self.proj_out(hidden_states) + hidden_states = hidden_states.unflatten(2, (p_h, p_w, p_t, -1)) + hidden_states = hidden_states.unflatten(1, (post_patch_num_frames, post_patch_height, post_patch_width)) + # NOTE: The permutation order here is not the inverse operation of what happens when patching as usually expected. + # It might be a source of confusion to the reader, but this is correct + hidden_states = hidden_states.permute(0, 7, 1, 6, 2, 4, 3, 5) + hidden_states = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (hidden_states,) + + return Transformer2DModelOutput(sample=hidden_states) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_easyanimate.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_easyanimate.py new file mode 100644 index 0000000000000000000000000000000000000000..a665d420c2309f825a561822d4e96ae88d48c038 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_easyanimate.py @@ -0,0 +1,525 @@ +# Copyright 2025 The EasyAnimate team and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn.functional as F +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import Attention, FeedForward +from ..embeddings import TimestepEmbedding, Timesteps, get_3d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNorm, FP32LayerNorm, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class EasyAnimateLayerNormZero(nn.Module): + def __init__( + self, + conditioning_dim: int, + embedding_dim: int, + elementwise_affine: bool = True, + eps: float = 1e-5, + bias: bool = True, + norm_type: str = "fp32_layer_norm", + ) -> None: + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(conditioning_dim, 6 * embedding_dim, bias=bias) + + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=elementwise_affine, eps=eps) + elif norm_type == "fp32_layer_norm": + self.norm = FP32LayerNorm(embedding_dim, elementwise_affine=elementwise_affine, eps=eps) + else: + raise ValueError( + f"Unsupported `norm_type` ({norm_type}) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'." + ) + + def forward( + self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, temb: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + shift, scale, gate, enc_shift, enc_scale, enc_gate = self.linear(self.silu(temb)).chunk(6, dim=1) + hidden_states = self.norm(hidden_states) * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1) + encoder_hidden_states = self.norm(encoder_hidden_states) * (1 + enc_scale.unsqueeze(1)) + enc_shift.unsqueeze( + 1 + ) + return hidden_states, encoder_hidden_states, gate, enc_gate + + +class EasyAnimateRotaryPosEmbed(nn.Module): + def __init__(self, patch_size: int, rope_dim: list[int]) -> None: + super().__init__() + + self.patch_size = patch_size + self.rope_dim = rope_dim + + def get_resize_crop_region_for_grid(self, src, tgt_width, tgt_height): + tw = tgt_width + th = tgt_height + h, w = src + r = h / w + if r > (th / tw): + resize_height = th + resize_width = int(round(th / h * w)) + else: + resize_width = tw + resize_height = int(round(tw / w * h)) + + crop_top = int(round((th - resize_height) / 2.0)) + crop_left = int(round((tw - resize_width) / 2.0)) + + return (crop_top, crop_left), (crop_top + resize_height, crop_left + resize_width) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + bs, c, num_frames, grid_height, grid_width = hidden_states.size() + grid_height = grid_height // self.patch_size + grid_width = grid_width // self.patch_size + base_size_width = 90 // self.patch_size + base_size_height = 60 // self.patch_size + + grid_crops_coords = self.get_resize_crop_region_for_grid( + (grid_height, grid_width), base_size_width, base_size_height + ) + image_rotary_emb = get_3d_rotary_pos_embed( + self.rope_dim, + grid_crops_coords, + grid_size=(grid_height, grid_width), + temporal_size=hidden_states.size(2), + use_real=True, + ) + return image_rotary_emb + + +class EasyAnimateAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the EasyAnimateTransformer3DModel model. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "EasyAnimateAttnProcessor2_0 requires PyTorch 2.0 or above. To use it, please install PyTorch 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + if attn.add_q_proj is None and encoder_hidden_states is not None: + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + # 1. QKV projections + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(2, (attn.heads, -1)).transpose(1, 2) + key = key.unflatten(2, (attn.heads, -1)).transpose(1, 2) + value = value.unflatten(2, (attn.heads, -1)).transpose(1, 2) + + # 2. QK normalization + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # 3. Encoder condition QKV projection and normalization + if attn.add_q_proj is not None and encoder_hidden_states is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + encoder_query = encoder_query.unflatten(2, (attn.heads, -1)).transpose(1, 2) + encoder_key = encoder_key.unflatten(2, (attn.heads, -1)).transpose(1, 2) + encoder_value = encoder_value.unflatten(2, (attn.heads, -1)).transpose(1, 2) + + if attn.norm_added_q is not None: + encoder_query = attn.norm_added_q(encoder_query) + if attn.norm_added_k is not None: + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([encoder_query, query], dim=2) + key = torch.cat([encoder_key, key], dim=2) + value = torch.cat([encoder_value, value], dim=2) + + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + query[:, :, encoder_hidden_states.shape[1] :] = apply_rotary_emb( + query[:, :, encoder_hidden_states.shape[1] :], image_rotary_emb + ) + if not attn.is_cross_attention: + key[:, :, encoder_hidden_states.shape[1] :] = apply_rotary_emb( + key[:, :, encoder_hidden_states.shape[1] :], image_rotary_emb + ) + + # 5. Attention + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False + ) + hidden_states = hidden_states.transpose(1, 2).flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + # 6. Output projection + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = ( + hidden_states[:, : encoder_hidden_states.shape[1]], + hidden_states[:, encoder_hidden_states.shape[1] :], + ) + + if getattr(attn, "to_out", None) is not None: + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + if getattr(attn, "to_add_out", None) is not None: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + else: + if getattr(attn, "to_out", None) is not None: + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states, encoder_hidden_states + + +@maybe_allow_in_graph +class EasyAnimateTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + time_embed_dim: int, + dropout: float = 0.0, + activation_fn: str = "gelu-approximate", + norm_elementwise_affine: bool = True, + norm_eps: float = 1e-6, + final_dropout: bool = True, + ff_inner_dim: int | None = None, + ff_bias: bool = True, + qk_norm: bool = True, + after_norm: bool = False, + norm_type: str = "fp32_layer_norm", + is_mmdit_block: bool = True, + ): + super().__init__() + + # Attention Part + self.norm1 = EasyAnimateLayerNormZero( + time_embed_dim, dim, norm_elementwise_affine, norm_eps, norm_type=norm_type, bias=True + ) + + self.attn1 = Attention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + qk_norm="layer_norm" if qk_norm else None, + eps=1e-6, + bias=True, + added_proj_bias=True, + added_kv_proj_dim=dim if is_mmdit_block else None, + context_pre_only=False if is_mmdit_block else None, + processor=EasyAnimateAttnProcessor2_0(), + ) + + # FFN Part + self.norm2 = EasyAnimateLayerNormZero( + time_embed_dim, dim, norm_elementwise_affine, norm_eps, norm_type=norm_type, bias=True + ) + self.ff = FeedForward( + dim, + dropout=dropout, + activation_fn=activation_fn, + final_dropout=final_dropout, + inner_dim=ff_inner_dim, + bias=ff_bias, + ) + + self.txt_ff = None + if is_mmdit_block: + self.txt_ff = FeedForward( + dim, + dropout=dropout, + activation_fn=activation_fn, + final_dropout=final_dropout, + inner_dim=ff_inner_dim, + bias=ff_bias, + ) + + self.norm3 = None + if after_norm: + self.norm3 = FP32LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + # 1. Attention + norm_hidden_states, norm_encoder_hidden_states, gate_msa, enc_gate_msa = self.norm1( + hidden_states, encoder_hidden_states, temb + ) + attn_hidden_states, attn_encoder_hidden_states = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = hidden_states + gate_msa.unsqueeze(1) * attn_hidden_states + encoder_hidden_states = encoder_hidden_states + enc_gate_msa.unsqueeze(1) * attn_encoder_hidden_states + + # 2. Feed-forward + norm_hidden_states, norm_encoder_hidden_states, gate_ff, enc_gate_ff = self.norm2( + hidden_states, encoder_hidden_states, temb + ) + if self.norm3 is not None: + norm_hidden_states = self.norm3(self.ff(norm_hidden_states)) + if self.txt_ff is not None: + norm_encoder_hidden_states = self.norm3(self.txt_ff(norm_encoder_hidden_states)) + else: + norm_encoder_hidden_states = self.norm3(self.ff(norm_encoder_hidden_states)) + else: + norm_hidden_states = self.ff(norm_hidden_states) + if self.txt_ff is not None: + norm_encoder_hidden_states = self.txt_ff(norm_encoder_hidden_states) + else: + norm_encoder_hidden_states = self.ff(norm_encoder_hidden_states) + hidden_states = hidden_states + gate_ff.unsqueeze(1) * norm_hidden_states + encoder_hidden_states = encoder_hidden_states + enc_gate_ff.unsqueeze(1) * norm_encoder_hidden_states + return hidden_states, encoder_hidden_states + + +class EasyAnimateTransformer3DModel(ModelMixin, ConfigMixin): + """ + A Transformer model for video-like data in [EasyAnimate](https://github.com/aigc-apps/EasyAnimate). + + Parameters: + num_attention_heads (`int`, defaults to `48`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `64`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `16`): + The number of channels in the output. + patch_size (`int`, defaults to `2`): + The size of the patches to use in the patch embedding layer. + sample_width (`int`, defaults to `90`): + The width of the input latents. + sample_height (`int`, defaults to `60`): + The height of the input latents. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to use in feed-forward. + timestep_activation_fn (`str`, defaults to `"silu"`): + Activation function to use when generating the timestep embeddings. + num_layers (`int`, defaults to `30`): + The number of layers of Transformer blocks to use. + mmdit_layers (`int`, defaults to `1000`): + The number of layers of Multi Modal Transformer blocks to use. + dropout (`float`, defaults to `0.0`): + The dropout probability to use. + time_embed_dim (`int`, defaults to `512`): + Output dimension of timestep embeddings. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + norm_eps (`float`, defaults to `1e-5`): + The epsilon value to use in normalization layers. + norm_elementwise_affine (`bool`, defaults to `True`): + Whether to use elementwise affine in normalization layers. + flip_sin_to_cos (`bool`, defaults to `True`): + Whether to flip the sin to cos in the time embedding. + time_position_encoding_type (`str`, defaults to `3d_rope`): + Type of time position encoding. + after_norm (`bool`, defaults to `False`): + Flag to apply normalization after. + resize_inpaint_mask_directly (`bool`, defaults to `True`): + Flag to resize inpaint mask directly. + enable_text_attention_mask (`bool`, defaults to `True`): + Flag to enable text attention mask. + add_noise_in_inpaint_model (`bool`, defaults to `False`): + Flag to add noise in inpaint model. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["EasyAnimateTransformerBlock"] + _skip_layerwise_casting_patterns = ["^proj$", "norm", "^proj_out$"] + + @register_to_config + def __init__( + self, + num_attention_heads: int = 48, + attention_head_dim: int = 64, + in_channels: int | None = None, + out_channels: int | None = None, + patch_size: int | None = None, + sample_width: int = 90, + sample_height: int = 60, + activation_fn: str = "gelu-approximate", + timestep_activation_fn: str = "silu", + freq_shift: int = 0, + num_layers: int = 48, + mmdit_layers: int = 48, + dropout: float = 0.0, + time_embed_dim: int = 512, + add_norm_text_encoder: bool = False, + text_embed_dim: int = 3584, + text_embed_dim_t5: int = None, + norm_eps: float = 1e-5, + norm_elementwise_affine: bool = True, + flip_sin_to_cos: bool = True, + time_position_encoding_type: str = "3d_rope", + after_norm=False, + resize_inpaint_mask_directly: bool = True, + enable_text_attention_mask: bool = True, + add_noise_in_inpaint_model: bool = True, + ): + super().__init__() + inner_dim = num_attention_heads * attention_head_dim + + # 1. Timestep embedding + self.time_proj = Timesteps(inner_dim, flip_sin_to_cos, freq_shift) + self.time_embedding = TimestepEmbedding(inner_dim, time_embed_dim, timestep_activation_fn) + self.rope_embedding = EasyAnimateRotaryPosEmbed(patch_size, attention_head_dim) + + # 2. Patch embedding + self.proj = nn.Conv2d( + in_channels, inner_dim, kernel_size=(patch_size, patch_size), stride=patch_size, bias=True + ) + + # 3. Text refined embedding + self.text_proj = None + self.text_proj_t5 = None + if not add_norm_text_encoder: + self.text_proj = nn.Linear(text_embed_dim, inner_dim) + if text_embed_dim_t5 is not None: + self.text_proj_t5 = nn.Linear(text_embed_dim_t5, inner_dim) + else: + self.text_proj = nn.Sequential( + RMSNorm(text_embed_dim, 1e-6, elementwise_affine=True), nn.Linear(text_embed_dim, inner_dim) + ) + if text_embed_dim_t5 is not None: + self.text_proj_t5 = nn.Sequential( + RMSNorm(text_embed_dim, 1e-6, elementwise_affine=True), nn.Linear(text_embed_dim_t5, inner_dim) + ) + + # 4. Transformer blocks + self.transformer_blocks = nn.ModuleList( + [ + EasyAnimateTransformerBlock( + dim=inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + time_embed_dim=time_embed_dim, + dropout=dropout, + activation_fn=activation_fn, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + after_norm=after_norm, + is_mmdit_block=True if _ < mmdit_layers else False, + ) + for _ in range(num_layers) + ] + ) + self.norm_final = nn.LayerNorm(inner_dim, norm_eps, norm_elementwise_affine) + + # 5. Output norm & projection + self.norm_out = AdaLayerNorm( + embedding_dim=time_embed_dim, + output_dim=2 * inner_dim, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + chunk_dim=1, + ) + self.proj_out = nn.Linear(inner_dim, patch_size * patch_size * out_channels) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.Tensor, + timestep_cond: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + encoder_hidden_states_t5: torch.Tensor | None = None, + inpaint_latents: torch.Tensor | None = None, + control_latents: torch.Tensor | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + batch_size, channels, video_length, height, width = hidden_states.size() + p = self.config.patch_size + post_patch_height = height // p + post_patch_width = width // p + + # 1. Time embedding + temb = self.time_proj(timestep).to(dtype=hidden_states.dtype) + temb = self.time_embedding(temb, timestep_cond) + image_rotary_emb = self.rope_embedding(hidden_states) + + # 2. Patch embedding + if inpaint_latents is not None: + hidden_states = torch.concat([hidden_states, inpaint_latents], 1) + if control_latents is not None: + hidden_states = torch.concat([hidden_states, control_latents], 1) + + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).flatten(0, 1) # [B, C, F, H, W] -> [BF, C, H, W] + hidden_states = self.proj(hidden_states) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)).permute( + 0, 2, 1, 3, 4 + ) # [BF, C, H, W] -> [B, F, C, H, W] + hidden_states = hidden_states.flatten(2, 4).transpose(1, 2) # [B, F, C, H, W] -> [B, FHW, C] + + # 3. Text embedding + encoder_hidden_states = self.text_proj(encoder_hidden_states) + if encoder_hidden_states_t5 is not None: + encoder_hidden_states_t5 = self.text_proj_t5(encoder_hidden_states_t5) + encoder_hidden_states = torch.cat([encoder_hidden_states, encoder_hidden_states_t5], dim=1).contiguous() + + # 4. Transformer blocks + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, temb, image_rotary_emb + ) + else: + hidden_states, encoder_hidden_states = block( + hidden_states, encoder_hidden_states, temb, image_rotary_emb + ) + + hidden_states = self.norm_final(hidden_states) + + # 5. Output norm & projection + hidden_states = self.norm_out(hidden_states, temb=temb) + hidden_states = self.proj_out(hidden_states) + + # 6. Unpatchify + p = self.config.patch_size + output = hidden_states.reshape(batch_size, video_length, post_patch_height, post_patch_width, channels, p, p) + output = output.permute(0, 4, 1, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_flux.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_flux.py new file mode 100644 index 0000000000000000000000000000000000000000..78a77ebcfea9652358d99e4b973078300a64c13f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_flux.py @@ -0,0 +1,778 @@ +# Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from typing import Any + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FluxTransformer2DLoadersMixin, FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from .._modeling_parallel import ContextParallelInput, ContextParallelOutput +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import ( + CombinedTimestepGuidanceTextProjEmbeddings, + CombinedTimestepTextProjEmbeddings, + apply_rotary_emb, + get_1d_rotary_pos_embed, +) +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _get_projections(attn: "FluxAttention", hidden_states, encoder_hidden_states=None): + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + encoder_query = encoder_key = encoder_value = None + if encoder_hidden_states is not None and attn.added_kv_proj_dim is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_fused_projections(attn: "FluxAttention", hidden_states, encoder_hidden_states=None): + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + + encoder_query = encoder_key = encoder_value = (None,) + if encoder_hidden_states is not None and hasattr(attn, "to_added_qkv"): + encoder_query, encoder_key, encoder_value = attn.to_added_qkv(encoder_hidden_states).chunk(3, dim=-1) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_qkv_projections(attn: "FluxAttention", hidden_states, encoder_hidden_states=None): + if attn.fused_projections: + return _get_fused_projections(attn, hidden_states, encoder_hidden_states) + return _get_projections(attn, hidden_states, encoder_hidden_states) + + +class FluxAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + + def __call__( + self, + attn: "FluxAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( + attn, hidden_states, encoder_hidden_states + ) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if attn.added_kv_proj_dim is not None: + encoder_query = encoder_query.unflatten(-1, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(-1, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(-1, (attn.heads, -1)) + + encoder_query = attn.norm_added_q(encoder_query) + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([encoder_query, query], dim=1) + key = torch.cat([encoder_key, key], dim=1) + value = torch.cat([encoder_value, value], dim=1) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( + [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 + ) + hidden_states = attn.to_out[0](hidden_states.contiguous()) + hidden_states = attn.to_out[1](hidden_states) + encoder_hidden_states = attn.to_add_out(encoder_hidden_states.contiguous()) + + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class FluxIPAdapterAttnProcessor(torch.nn.Module): + """Flux Attention processor for IP-Adapter.""" + + _attention_backend = None + _parallel_config = None + + def __init__( + self, hidden_size: int, cross_attention_dim: int, num_tokens=(4,), scale=1.0, device=None, dtype=None + ): + super().__init__() + + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + f"{self.__class__.__name__} requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + self.hidden_size = hidden_size + self.cross_attention_dim = cross_attention_dim + + if not isinstance(num_tokens, (tuple, list)): + num_tokens = [num_tokens] + + if not isinstance(scale, list): + scale = [scale] * len(num_tokens) + if len(scale) != len(num_tokens): + raise ValueError("`scale` should be a list of integers with the same length as `num_tokens`.") + self.scale = scale + + self.to_k_ip = nn.ModuleList( + [ + nn.Linear(cross_attention_dim, hidden_size, bias=True, device=device, dtype=dtype) + for _ in range(len(num_tokens)) + ] + ) + self.to_v_ip = nn.ModuleList( + [ + nn.Linear(cross_attention_dim, hidden_size, bias=True, device=device, dtype=dtype) + for _ in range(len(num_tokens)) + ] + ) + + def __call__( + self, + attn: "FluxAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ip_hidden_states: list[torch.Tensor] | None = None, + ip_adapter_masks: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size = hidden_states.shape[0] + + query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( + attn, hidden_states, encoder_hidden_states + ) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + query = attn.norm_q(query) + key = attn.norm_k(key) + ip_query = query + + if encoder_hidden_states is not None: + encoder_query = encoder_query.unflatten(-1, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(-1, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(-1, (attn.heads, -1)) + + encoder_query = attn.norm_added_q(encoder_query) + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([encoder_query, query], dim=1) + key = torch.cat([encoder_key, key], dim=1) + value = torch.cat([encoder_value, value], dim=1) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( + [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 + ) + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + # IP-adapter + ip_attn_output = torch.zeros_like(hidden_states) + + for current_ip_hidden_states, scale, to_k_ip, to_v_ip in zip( + ip_hidden_states, self.scale, self.to_k_ip, self.to_v_ip + ): + ip_key = to_k_ip(current_ip_hidden_states) + ip_value = to_v_ip(current_ip_hidden_states) + + ip_key = ip_key.view(batch_size, -1, attn.heads, attn.head_dim) + ip_value = ip_value.view(batch_size, -1, attn.heads, attn.head_dim) + + current_ip_hidden_states = dispatch_attention_fn( + ip_query, + ip_key, + ip_value, + attn_mask=None, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + current_ip_hidden_states = current_ip_hidden_states.reshape(batch_size, -1, attn.heads * attn.head_dim) + current_ip_hidden_states = current_ip_hidden_states.to(ip_query.dtype) + ip_attn_output += scale * current_ip_hidden_states + + return hidden_states, encoder_hidden_states, ip_attn_output + else: + return hidden_states + + +class FluxAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = FluxAttnProcessor + _available_processors = [ + FluxAttnProcessor, + FluxIPAdapterAttnProcessor, + ] + + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + added_kv_proj_dim: int | None = None, + added_proj_bias: bool | None = True, + out_bias: bool = True, + eps: float = 1e-5, + out_dim: int = None, + context_pre_only: bool | None = None, + pre_only: bool = False, + elementwise_affine: bool = True, + processor=None, + ): + super().__init__() + + self.head_dim = dim_head + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.query_dim = query_dim + self.use_bias = bias + self.dropout = dropout + self.out_dim = out_dim if out_dim is not None else query_dim + self.context_pre_only = context_pre_only + self.pre_only = pre_only + self.heads = out_dim // dim_head if out_dim is not None else heads + self.added_kv_proj_dim = added_kv_proj_dim + self.added_proj_bias = added_proj_bias + + self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_v = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + + if not self.pre_only: + self.to_out = torch.nn.ModuleList([]) + self.to_out.append(torch.nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(torch.nn.Dropout(dropout)) + + if added_kv_proj_dim is not None: + self.norm_added_q = torch.nn.RMSNorm(dim_head, eps=eps) + self.norm_added_k = torch.nn.RMSNorm(dim_head, eps=eps) + self.add_q_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.to_add_out = torch.nn.Linear(self.inner_dim, query_dim, bias=out_bias) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + quiet_attn_parameters = {"ip_adapter_masks", "ip_hidden_states"} + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters and k not in quiet_attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"joint_attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs) + + +@maybe_allow_in_graph +class FluxSingleTransformerBlock(nn.Module): + def __init__(self, dim: int, num_attention_heads: int, attention_head_dim: int, mlp_ratio: float = 4.0): + super().__init__() + self.mlp_hidden_dim = int(dim * mlp_ratio) + + self.norm = AdaLayerNormZeroSingle(dim) + self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim) + self.act_mlp = nn.GELU(approximate="tanh") + self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim) + + self.attn = FluxAttention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + processor=FluxAttnProcessor(), + eps=1e-6, + pre_only=True, + ) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_len = encoder_hidden_states.shape[1] + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + residual = hidden_states + norm_hidden_states, gate = self.norm(hidden_states, emb=temb) + mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states)) + joint_attention_kwargs = joint_attention_kwargs or {} + attn_output = self.attn( + hidden_states=norm_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + gate = gate.unsqueeze(1) + hidden_states = gate * self.proj_out(hidden_states) + hidden_states = residual + hidden_states + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + encoder_hidden_states, hidden_states = hidden_states[:, :text_seq_len], hidden_states[:, text_seq_len:] + return encoder_hidden_states, hidden_states + + +@maybe_allow_in_graph +class FluxTransformerBlock(nn.Module): + def __init__( + self, dim: int, num_attention_heads: int, attention_head_dim: int, qk_norm: str = "rms_norm", eps: float = 1e-6 + ): + super().__init__() + + self.norm1 = AdaLayerNormZero(dim) + self.norm1_context = AdaLayerNormZero(dim) + + self.attn = FluxAttention( + query_dim=dim, + added_kv_proj_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=False, + bias=True, + processor=FluxAttnProcessor(), + eps=eps, + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + joint_attention_kwargs = joint_attention_kwargs or {} + + # Attention. + attention_outputs = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + if len(attention_outputs) == 2: + attn_output, context_attn_output = attention_outputs + elif len(attention_outputs) == 3: + attn_output, context_attn_output, ip_attn_output = attention_outputs + + # Process attention outputs for the `hidden_states`. + attn_output = gate_msa.unsqueeze(1) * attn_output + hidden_states = hidden_states + attn_output + + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp.unsqueeze(1) * ff_output + + hidden_states = hidden_states + ff_output + if len(attention_outputs) == 3: + hidden_states = hidden_states + ip_attn_output + + # Process attention outputs for the `encoder_hidden_states`. + context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output + encoder_hidden_states = encoder_hidden_states + context_attn_output + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + + return encoder_hidden_states, hidden_states + + +class FluxPosEmbed(nn.Module): + # modified from https://github.com/black-forest-labs/flux/blob/c00d7c60b085fce8058b9df845e036090873f2ce/src/flux/modules/layers.py#L11 + def __init__(self, theta: int, axes_dim: list[int]): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + n_axes = ids.shape[-1] + cos_out = [] + sin_out = [] + pos = ids.float() + is_mps = ids.device.type == "mps" + is_npu = ids.device.type == "npu" + freqs_dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + for i in range(n_axes): + cos, sin = get_1d_rotary_pos_embed( + self.axes_dim[i], + pos[:, i], + theta=self.theta, + repeat_interleave_real=True, + use_real=True, + freqs_dtype=freqs_dtype, + ) + cos_out.append(cos) + sin_out.append(sin) + freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device) + freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device) + return freqs_cos, freqs_sin + + +class FluxTransformer2DModel( + ModelMixin, + ConfigMixin, + PeftAdapterMixin, + FromOriginalModelMixin, + FluxTransformer2DLoadersMixin, + CacheMixin, + AttentionMixin, +): + """ + The Transformer model introduced in Flux. + + Reference: https://blackforestlabs.ai/announcing-black-forest-labs/ + + Args: + patch_size (`int`, defaults to `1`): + Patch size to turn the input data into small patches. + in_channels (`int`, defaults to `64`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `None`): + The number of channels in the output. If not specified, it defaults to `in_channels`. + num_layers (`int`, defaults to `19`): + The number of layers of dual stream DiT blocks to use. + num_single_layers (`int`, defaults to `38`): + The number of layers of single stream DiT blocks to use. + attention_head_dim (`int`, defaults to `128`): + The number of dimensions to use for each attention head. + num_attention_heads (`int`, defaults to `24`): + The number of attention heads to use. + joint_attention_dim (`int`, defaults to `4096`): + The number of dimensions to use for the joint attention (embedding/channel dimension of + `encoder_hidden_states`). + pooled_projection_dim (`int`, defaults to `768`): + The number of dimensions to use for the pooled projection. + guidance_embeds (`bool`, defaults to `False`): + Whether to use guidance embeddings for guidance-distilled variant of the model. + axes_dims_rope (`tuple[int]`, defaults to `(16, 56, 56)`): + The dimensions to use for the rotary positional embeddings. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["FluxTransformerBlock", "FluxSingleTransformerBlock"] + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + _repeated_blocks = ["FluxTransformerBlock", "FluxSingleTransformerBlock"] + _cp_plan = { + "": { + "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "img_ids": ContextParallelInput(split_dim=0, expected_dims=2, split_output=False), + "txt_ids": ContextParallelInput(split_dim=0, expected_dims=2, split_output=False), + }, + "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), + } + + @register_to_config + def __init__( + self, + patch_size: int = 1, + in_channels: int = 64, + out_channels: int | None = None, + num_layers: int = 19, + num_single_layers: int = 38, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 4096, + pooled_projection_dim: int = 768, + guidance_embeds: bool = False, + axes_dims_rope: tuple[int, int, int] = (16, 56, 56), + ): + super().__init__() + self.out_channels = out_channels or in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + self.pos_embed = FluxPosEmbed(theta=10000, axes_dim=axes_dims_rope) + + text_time_guidance_cls = ( + CombinedTimestepGuidanceTextProjEmbeddings if guidance_embeds else CombinedTimestepTextProjEmbeddings + ) + self.time_text_embed = text_time_guidance_cls( + embedding_dim=self.inner_dim, pooled_projection_dim=pooled_projection_dim + ) + + self.context_embedder = nn.Linear(joint_attention_dim, self.inner_dim) + self.x_embedder = nn.Linear(in_channels, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + FluxTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for _ in range(num_layers) + ] + ) + + self.single_transformer_blocks = nn.ModuleList( + [ + FluxSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for _ in range(num_single_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + + @apply_lora_scale("joint_attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + pooled_projections: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + guidance: torch.Tensor = None, + joint_attention_kwargs: dict[str, Any] | None = None, + controlnet_block_samples=None, + controlnet_single_block_samples=None, + return_dict: bool = True, + controlnet_blocks_repeat: bool = False, + ) -> torch.Tensor | Transformer2DModelOutput: + """ + The [`FluxTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`): + Input `hidden_states`. + encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + pooled_projections (`torch.Tensor` of shape `(batch_size, projection_dim)`): Embeddings projected + from the embeddings of input conditions. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + block_controlnet_hidden_states: (`list` of `torch.Tensor`): + A list of tensors that if specified are added to the residuals of transformer blocks. + joint_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + + hidden_states = self.x_embedder(hidden_states) + + timestep = timestep.to(hidden_states.dtype) * 1000 + if guidance is not None: + guidance = guidance.to(hidden_states.dtype) * 1000 + + temb = ( + self.time_text_embed(timestep, pooled_projections) + if guidance is None + else self.time_text_embed(timestep, guidance, pooled_projections) + ) + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + if txt_ids.ndim == 3: + logger.warning( + "Passing `txt_ids` 3d torch.Tensor is deprecated." + "Please remove the batch dimension and pass it as a 2d torch Tensor" + ) + txt_ids = txt_ids[0] + if img_ids.ndim == 3: + logger.warning( + "Passing `img_ids` 3d torch.Tensor is deprecated." + "Please remove the batch dimension and pass it as a 2d torch Tensor" + ) + img_ids = img_ids[0] + + ids = torch.cat((txt_ids, img_ids), dim=0) + image_rotary_emb = self.pos_embed(ids) + + if joint_attention_kwargs is not None and "ip_adapter_image_embeds" in joint_attention_kwargs: + ip_adapter_image_embeds = joint_attention_kwargs.pop("ip_adapter_image_embeds") + ip_hidden_states = self.encoder_hid_proj(ip_adapter_image_embeds) + joint_attention_kwargs.update({"ip_hidden_states": ip_hidden_states}) + + for index_block, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + joint_attention_kwargs, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + joint_attention_kwargs=joint_attention_kwargs, + ) + + # controlnet residual + if controlnet_block_samples is not None: + interval_control = len(self.transformer_blocks) / len(controlnet_block_samples) + interval_control = int(np.ceil(interval_control)) + # For Xlabs ControlNet. + if controlnet_blocks_repeat: + hidden_states = ( + hidden_states + controlnet_block_samples[index_block % len(controlnet_block_samples)] + ) + else: + hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control] + + for index_block, block in enumerate(self.single_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + joint_attention_kwargs, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + joint_attention_kwargs=joint_attention_kwargs, + ) + + # controlnet residual + if controlnet_single_block_samples is not None: + interval_control = len(self.single_transformer_blocks) / len(controlnet_single_block_samples) + interval_control = int(np.ceil(interval_control)) + hidden_states = hidden_states + controlnet_single_block_samples[index_block // interval_control] + + hidden_states = self.norm_out(hidden_states, temb) + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_flux2.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_flux2.py new file mode 100644 index 0000000000000000000000000000000000000000..f77498c74fc1a3f5da8962a35c3dd31926a7a033 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_flux2.py @@ -0,0 +1,907 @@ +# Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FluxTransformer2DLoadersMixin, FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from .._modeling_parallel import ContextParallelInput, ContextParallelOutput +from ..attention import AttentionMixin, AttentionModuleMixin +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import ( + TimestepEmbedding, + Timesteps, + apply_rotary_emb, + get_1d_rotary_pos_embed, +) +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _get_projections(attn: "Flux2Attention", hidden_states, encoder_hidden_states=None): + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + encoder_query = encoder_key = encoder_value = None + if encoder_hidden_states is not None and attn.added_kv_proj_dim is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_fused_projections(attn: "Flux2Attention", hidden_states, encoder_hidden_states=None): + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + + encoder_query = encoder_key = encoder_value = (None,) + if encoder_hidden_states is not None and hasattr(attn, "to_added_qkv"): + encoder_query, encoder_key, encoder_value = attn.to_added_qkv(encoder_hidden_states).chunk(3, dim=-1) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_qkv_projections(attn: "Flux2Attention", hidden_states, encoder_hidden_states=None): + if attn.fused_projections: + return _get_fused_projections(attn, hidden_states, encoder_hidden_states) + return _get_projections(attn, hidden_states, encoder_hidden_states) + + +class Flux2SwiGLU(nn.Module): + """ + Flux 2 uses a SwiGLU-style activation in the transformer feedforward sub-blocks, but with the linear projection + layer fused into the first linear layer of the FF sub-block. Thus, this module has no trainable parameters. + """ + + def __init__(self): + super().__init__() + self.gate_fn = nn.SiLU() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x1, x2 = x.chunk(2, dim=-1) + x = self.gate_fn(x1) * x2 + return x + + +class Flux2FeedForward(nn.Module): + def __init__( + self, + dim: int, + dim_out: int | None = None, + mult: float = 3.0, + inner_dim: int | None = None, + bias: bool = False, + ): + super().__init__() + if inner_dim is None: + inner_dim = int(dim * mult) + dim_out = dim_out or dim + + # Flux2SwiGLU will reduce the dimension by half + self.linear_in = nn.Linear(dim, inner_dim * 2, bias=bias) + self.act_fn = Flux2SwiGLU() + self.linear_out = nn.Linear(inner_dim, dim_out, bias=bias) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.linear_in(x) + x = self.act_fn(x) + x = self.linear_out(x) + return x + + +class Flux2AttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + + def __call__( + self, + attn: "Flux2Attention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( + attn, hidden_states, encoder_hidden_states + ) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if attn.added_kv_proj_dim is not None: + encoder_query = encoder_query.unflatten(-1, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(-1, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(-1, (attn.heads, -1)) + + encoder_query = attn.norm_added_q(encoder_query) + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([encoder_query, query], dim=1) + key = torch.cat([encoder_key, key], dim=1) + value = torch.cat([encoder_value, value], dim=1) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( + [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 + ) + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + if encoder_hidden_states is not None: + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class Flux2Attention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = Flux2AttnProcessor + _available_processors = [Flux2AttnProcessor] + + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + added_kv_proj_dim: int | None = None, + added_proj_bias: bool | None = True, + out_bias: bool = True, + eps: float = 1e-5, + out_dim: int = None, + elementwise_affine: bool = True, + processor=None, + ): + super().__init__() + + self.head_dim = dim_head + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.query_dim = query_dim + self.out_dim = out_dim if out_dim is not None else query_dim + self.heads = out_dim // dim_head if out_dim is not None else heads + + self.use_bias = bias + self.dropout = dropout + + self.added_kv_proj_dim = added_kv_proj_dim + self.added_proj_bias = added_proj_bias + + self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_v = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + + # QK Norm + self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + + self.to_out = torch.nn.ModuleList([]) + self.to_out.append(torch.nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(torch.nn.Dropout(dropout)) + + if added_kv_proj_dim is not None: + self.norm_added_q = torch.nn.RMSNorm(dim_head, eps=eps) + self.norm_added_k = torch.nn.RMSNorm(dim_head, eps=eps) + self.add_q_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.to_add_out = torch.nn.Linear(self.inner_dim, query_dim, bias=out_bias) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"joint_attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs) + + +class Flux2ParallelSelfAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + + def __call__( + self, + attn: "Flux2ParallelSelfAttention", + hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + # Parallel in (QKV + MLP in) projection + hidden_states = attn.to_qkv_mlp_proj(hidden_states) + qkv, mlp_hidden_states = torch.split( + hidden_states, [3 * attn.inner_dim, attn.mlp_hidden_dim * attn.mlp_mult_factor], dim=-1 + ) + + # Handle the attention logic + query, key, value = qkv.chunk(3, dim=-1) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + # Handle the feedforward (FF) logic + mlp_hidden_states = attn.mlp_act_fn(mlp_hidden_states) + + # Concatenate and parallel output projection + hidden_states = torch.cat([hidden_states, mlp_hidden_states], dim=-1) + hidden_states = attn.to_out(hidden_states) + + return hidden_states + + +class Flux2ParallelSelfAttention(torch.nn.Module, AttentionModuleMixin): + """ + Flux 2 parallel self-attention for the Flux 2 single-stream transformer blocks. + + This implements a parallel transformer block, where the attention QKV projections are fused to the feedforward (FF) + input projections, and the attention output projections are fused to the FF output projections. See the [ViT-22B + paper](https://arxiv.org/abs/2302.05442) for a visual depiction of this type of transformer block. + """ + + _default_processor_cls = Flux2ParallelSelfAttnProcessor + _available_processors = [Flux2ParallelSelfAttnProcessor] + # Does not support QKV fusion as the QKV projections are always fused + _supports_qkv_fusion = False + + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + out_bias: bool = True, + eps: float = 1e-5, + out_dim: int = None, + elementwise_affine: bool = True, + mlp_ratio: float = 4.0, + mlp_mult_factor: int = 2, + processor=None, + ): + super().__init__() + + self.head_dim = dim_head + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.query_dim = query_dim + self.out_dim = out_dim if out_dim is not None else query_dim + self.heads = out_dim // dim_head if out_dim is not None else heads + + self.use_bias = bias + self.dropout = dropout + + self.mlp_ratio = mlp_ratio + self.mlp_hidden_dim = int(query_dim * self.mlp_ratio) + self.mlp_mult_factor = mlp_mult_factor + + # Fused QKV projections + MLP input projection + self.to_qkv_mlp_proj = torch.nn.Linear( + self.query_dim, self.inner_dim * 3 + self.mlp_hidden_dim * self.mlp_mult_factor, bias=bias + ) + self.mlp_act_fn = Flux2SwiGLU() + + # QK Norm + self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + + # Fused attention output projection + MLP output projection + self.to_out = torch.nn.Linear(self.inner_dim + self.mlp_hidden_dim, self.out_dim, bias=out_bias) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"joint_attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + return self.processor(self, hidden_states, attention_mask, image_rotary_emb, **kwargs) + + +class Flux2SingleTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float = 3.0, + eps: float = 1e-6, + bias: bool = False, + ): + super().__init__() + + self.norm = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + + # Note that the MLP in/out linear layers are fused with the attention QKV/out projections, respectively; this + # is often called a "parallel" transformer block. See the [ViT-22B paper](https://arxiv.org/abs/2302.05442) + # for a visual depiction of this type of transformer block. + self.attn = Flux2ParallelSelfAttention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=bias, + out_bias=bias, + eps=eps, + mlp_ratio=mlp_ratio, + mlp_mult_factor=2, + processor=Flux2ParallelSelfAttnProcessor(), + ) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None, + temb_mod: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + split_hidden_states: bool = False, + text_seq_len: int | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + # If encoder_hidden_states is None, hidden_states is assumed to have encoder_hidden_states already + # concatenated + if encoder_hidden_states is not None: + text_seq_len = encoder_hidden_states.shape[1] + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + mod_shift, mod_scale, mod_gate = Flux2Modulation.split(temb_mod, 1)[0] + + norm_hidden_states = self.norm(hidden_states) + norm_hidden_states = (1 + mod_scale) * norm_hidden_states + mod_shift + + joint_attention_kwargs = joint_attention_kwargs or {} + attn_output = self.attn( + hidden_states=norm_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + hidden_states = hidden_states + mod_gate * attn_output + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + if split_hidden_states: + encoder_hidden_states, hidden_states = hidden_states[:, :text_seq_len], hidden_states[:, text_seq_len:] + return encoder_hidden_states, hidden_states + else: + return hidden_states + + +class Flux2TransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float = 3.0, + eps: float = 1e-6, + bias: bool = False, + ): + super().__init__() + self.mlp_hidden_dim = int(dim * mlp_ratio) + + self.norm1 = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + self.norm1_context = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + + self.attn = Flux2Attention( + query_dim=dim, + added_kv_proj_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=bias, + added_proj_bias=bias, + out_bias=bias, + eps=eps, + processor=Flux2AttnProcessor(), + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + self.ff = Flux2FeedForward(dim=dim, dim_out=dim, mult=mlp_ratio, bias=bias) + + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + self.ff_context = Flux2FeedForward(dim=dim, dim_out=dim, mult=mlp_ratio, bias=bias) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb_mod_img: torch.Tensor, + temb_mod_txt: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + joint_attention_kwargs = joint_attention_kwargs or {} + + # Modulation parameters shape: [1, 1, self.dim] + (shift_msa, scale_msa, gate_msa), (shift_mlp, scale_mlp, gate_mlp) = Flux2Modulation.split(temb_mod_img, 2) + (c_shift_msa, c_scale_msa, c_gate_msa), (c_shift_mlp, c_scale_mlp, c_gate_mlp) = Flux2Modulation.split( + temb_mod_txt, 2 + ) + + # Img stream + norm_hidden_states = self.norm1(hidden_states) + norm_hidden_states = (1 + scale_msa) * norm_hidden_states + shift_msa + + # Conditioning txt stream + norm_encoder_hidden_states = self.norm1_context(encoder_hidden_states) + norm_encoder_hidden_states = (1 + c_scale_msa) * norm_encoder_hidden_states + c_shift_msa + + # Attention on concatenated img + txt stream + attention_outputs = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + attn_output, context_attn_output = attention_outputs + + # Process attention outputs for the image stream (`hidden_states`). + attn_output = gate_msa * attn_output + hidden_states = hidden_states + attn_output + + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp + + ff_output = self.ff(norm_hidden_states) + hidden_states = hidden_states + gate_mlp * ff_output + + # Process attention outputs for the text stream (`encoder_hidden_states`). + context_attn_output = c_gate_msa * context_attn_output + encoder_hidden_states = encoder_hidden_states + context_attn_output + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp) + c_shift_mlp + + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp * context_ff_output + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + + return encoder_hidden_states, hidden_states + + +class Flux2PosEmbed(nn.Module): + # modified from https://github.com/black-forest-labs/flux/blob/c00d7c60b085fce8058b9df845e036090873f2ce/src/flux/modules/layers.py#L11 + def __init__(self, theta: int, axes_dim: list[int]): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + # Expected ids shape: [S, len(self.axes_dim)] + cos_out = [] + sin_out = [] + pos = ids.float() + is_mps = ids.device.type == "mps" + is_npu = ids.device.type == "npu" + freqs_dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + # Unlike Flux 1, loop over len(self.axes_dim) rather than ids.shape[-1] + for i in range(len(self.axes_dim)): + cos, sin = get_1d_rotary_pos_embed( + self.axes_dim[i], + pos[..., i], + theta=self.theta, + repeat_interleave_real=True, + use_real=True, + freqs_dtype=freqs_dtype, + ) + cos_out.append(cos) + sin_out.append(sin) + freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device) + freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device) + return freqs_cos, freqs_sin + + +class Flux2TimestepGuidanceEmbeddings(nn.Module): + def __init__( + self, + in_channels: int = 256, + embedding_dim: int = 6144, + bias: bool = False, + guidance_embeds: bool = True, + ): + super().__init__() + + self.time_proj = Timesteps(num_channels=in_channels, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding( + in_channels=in_channels, time_embed_dim=embedding_dim, sample_proj_bias=bias + ) + + if guidance_embeds: + self.guidance_embedder = TimestepEmbedding( + in_channels=in_channels, time_embed_dim=embedding_dim, sample_proj_bias=bias + ) + else: + self.guidance_embedder = None + + def forward(self, timestep: torch.Tensor, guidance: torch.Tensor) -> torch.Tensor: + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(timestep.dtype)) # (N, D) + + if guidance is not None and self.guidance_embedder is not None: + guidance_proj = self.time_proj(guidance) + guidance_emb = self.guidance_embedder(guidance_proj.to(guidance.dtype)) # (N, D) + time_guidance_emb = timesteps_emb + guidance_emb + return time_guidance_emb + else: + return timesteps_emb + + +class Flux2Modulation(nn.Module): + def __init__(self, dim: int, mod_param_sets: int = 2, bias: bool = False): + super().__init__() + self.mod_param_sets = mod_param_sets + + self.linear = nn.Linear(dim, dim * 3 * self.mod_param_sets, bias=bias) + self.act_fn = nn.SiLU() + + def forward(self, temb: torch.Tensor) -> torch.Tensor: + mod = self.act_fn(temb) + mod = self.linear(mod) + return mod + + @staticmethod + # split inside the transformer blocks, to avoid passing tuples into checkpoints https://github.com/huggingface/diffusers/issues/12776 + def split(mod: torch.Tensor, mod_param_sets: int) -> tuple[tuple[torch.Tensor, torch.Tensor, torch.Tensor], ...]: + if mod.ndim == 2: + mod = mod.unsqueeze(1) + mod_params = torch.chunk(mod, 3 * mod_param_sets, dim=-1) + # Return tuple of 3-tuples of modulation params shift/scale/gate + return tuple(mod_params[3 * i : 3 * (i + 1)] for i in range(mod_param_sets)) + + +class Flux2Transformer2DModel( + ModelMixin, + ConfigMixin, + PeftAdapterMixin, + FromOriginalModelMixin, + FluxTransformer2DLoadersMixin, + CacheMixin, + AttentionMixin, +): + """ + The Transformer model introduced in Flux 2. + + Reference: https://blackforestlabs.ai/announcing-black-forest-labs/ + + Args: + patch_size (`int`, defaults to `1`): + Patch size to turn the input data into small patches. + in_channels (`int`, defaults to `128`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `None`): + The number of channels in the output. If not specified, it defaults to `in_channels`. + num_layers (`int`, defaults to `8`): + The number of layers of dual stream DiT blocks to use. + num_single_layers (`int`, defaults to `48`): + The number of layers of single stream DiT blocks to use. + attention_head_dim (`int`, defaults to `128`): + The number of dimensions to use for each attention head. + num_attention_heads (`int`, defaults to `48`): + The number of attention heads to use. + joint_attention_dim (`int`, defaults to `15360`): + The number of dimensions to use for the joint attention (embedding/channel dimension of + `encoder_hidden_states`). + pooled_projection_dim (`int`, defaults to `768`): + The number of dimensions to use for the pooled projection. + guidance_embeds (`bool`, defaults to `True`): + Whether to use guidance embeddings for guidance-distilled variant of the model. + axes_dims_rope (`tuple[int]`, defaults to `(32, 32, 32, 32)`): + The dimensions to use for the rotary positional embeddings. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["Flux2TransformerBlock", "Flux2SingleTransformerBlock"] + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + _repeated_blocks = ["Flux2TransformerBlock", "Flux2SingleTransformerBlock"] + _cp_plan = { + "": { + "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "img_ids": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "txt_ids": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + }, + "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), + } + + @register_to_config + def __init__( + self, + patch_size: int = 1, + in_channels: int = 128, + out_channels: int | None = None, + num_layers: int = 8, + num_single_layers: int = 48, + attention_head_dim: int = 128, + num_attention_heads: int = 48, + joint_attention_dim: int = 15360, + timestep_guidance_channels: int = 256, + mlp_ratio: float = 3.0, + axes_dims_rope: tuple[int, ...] = (32, 32, 32, 32), + rope_theta: int = 2000, + eps: float = 1e-6, + guidance_embeds: bool = True, + ): + super().__init__() + self.out_channels = out_channels or in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + # 1. Sinusoidal positional embedding for RoPE on image and text tokens + self.pos_embed = Flux2PosEmbed(theta=rope_theta, axes_dim=axes_dims_rope) + + # 2. Combined timestep + guidance embedding + self.time_guidance_embed = Flux2TimestepGuidanceEmbeddings( + in_channels=timestep_guidance_channels, + embedding_dim=self.inner_dim, + bias=False, + guidance_embeds=guidance_embeds, + ) + + # 3. Modulation (double stream and single stream blocks share modulation parameters, resp.) + # Two sets of shift/scale/gate modulation parameters for the double stream attn and FF sub-blocks + self.double_stream_modulation_img = Flux2Modulation(self.inner_dim, mod_param_sets=2, bias=False) + self.double_stream_modulation_txt = Flux2Modulation(self.inner_dim, mod_param_sets=2, bias=False) + # Only one set of modulation parameters as the attn and FF sub-blocks are run in parallel for single stream + self.single_stream_modulation = Flux2Modulation(self.inner_dim, mod_param_sets=1, bias=False) + + # 4. Input projections + self.x_embedder = nn.Linear(in_channels, self.inner_dim, bias=False) + self.context_embedder = nn.Linear(joint_attention_dim, self.inner_dim, bias=False) + + # 5. Double Stream Transformer Blocks + self.transformer_blocks = nn.ModuleList( + [ + Flux2TransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + mlp_ratio=mlp_ratio, + eps=eps, + bias=False, + ) + for _ in range(num_layers) + ] + ) + + # 6. Single Stream Transformer Blocks + self.single_transformer_blocks = nn.ModuleList( + [ + Flux2SingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + mlp_ratio=mlp_ratio, + eps=eps, + bias=False, + ) + for _ in range(num_single_layers) + ] + ) + + # 7. Output layers + self.norm_out = AdaLayerNormContinuous( + self.inner_dim, self.inner_dim, elementwise_affine=False, eps=eps, bias=False + ) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=False) + + self.gradient_checkpointing = False + + @apply_lora_scale("joint_attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + guidance: torch.Tensor = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.Tensor | Transformer2DModelOutput: + """ + The [`FluxTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`): + Input `hidden_states`. + encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + block_controlnet_hidden_states: (`list` of `torch.Tensor`): + A list of tensors that if specified are added to the residuals of transformer blocks. + joint_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + # 0. Handle input arguments + + num_txt_tokens = encoder_hidden_states.shape[1] + + # 1. Calculate timestep embedding and modulation parameters + timestep = timestep.to(hidden_states.dtype) * 1000 + + if guidance is not None: + guidance = guidance.to(hidden_states.dtype) * 1000 + + temb = self.time_guidance_embed(timestep, guidance) + + double_stream_mod_img = self.double_stream_modulation_img(temb) + double_stream_mod_txt = self.double_stream_modulation_txt(temb) + single_stream_mod = self.single_stream_modulation(temb) + + # 2. Input projection for image (hidden_states) and conditioning text (encoder_hidden_states) + hidden_states = self.x_embedder(hidden_states) + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + # 3. Calculate RoPE embeddings from image and text tokens + # NOTE: the below logic means that we can't support batched inference with images of different resolutions or + # text prompts of differents lengths. Is this a use case we want to support? + if img_ids.ndim == 3: + img_ids = img_ids[0] + if txt_ids.ndim == 3: + txt_ids = txt_ids[0] + + image_rotary_emb = self.pos_embed(img_ids) + text_rotary_emb = self.pos_embed(txt_ids) + concat_rotary_emb = ( + torch.cat([text_rotary_emb[0], image_rotary_emb[0]], dim=0), + torch.cat([text_rotary_emb[1], image_rotary_emb[1]], dim=0), + ) + + # 4. Double Stream Transformer Blocks + for index_block, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + double_stream_mod_img, + double_stream_mod_txt, + concat_rotary_emb, + joint_attention_kwargs, + ) + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb_mod_img=double_stream_mod_img, + temb_mod_txt=double_stream_mod_txt, + image_rotary_emb=concat_rotary_emb, + joint_attention_kwargs=joint_attention_kwargs, + ) + # Concatenate text and image streams for single-block inference + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + # 5. Single Stream Transformer Blocks + for index_block, block in enumerate(self.single_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + None, + single_stream_mod, + concat_rotary_emb, + joint_attention_kwargs, + ) + else: + hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=None, + temb_mod=single_stream_mod, + image_rotary_emb=concat_rotary_emb, + joint_attention_kwargs=joint_attention_kwargs, + ) + # Remove text tokens from concatenated stream + hidden_states = hidden_states[:, num_txt_tokens:, ...] + + # 6. Output layers + hidden_states = self.norm_out(hidden_states, temb) + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_glm_image.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_glm_image.py new file mode 100644 index 0000000000000000000000000000000000000000..8413b24fef455bed99e8724ad8f153c6bcb76264 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_glm_image.py @@ -0,0 +1,668 @@ +# Copyright 2025 The CogView team, Tsinghua University & ZhipuAI and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..attention_processor import Attention +from ..cache_utils import CacheMixin +from ..embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import LayerNorm, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class GlmImageCombinedTimestepSizeEmbeddings(nn.Module): + def __init__(self, embedding_dim: int, condition_dim: int, pooled_projection_dim: int, timesteps_dim: int = 256): + super().__init__() + + self.time_proj = Timesteps(num_channels=timesteps_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.condition_proj = Timesteps(num_channels=condition_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=timesteps_dim, time_embed_dim=embedding_dim) + self.condition_embedder = PixArtAlphaTextProjection(pooled_projection_dim, embedding_dim, act_fn="silu") + + def forward( + self, + timestep: torch.Tensor, + target_size: torch.Tensor, + crop_coords: torch.Tensor, + hidden_dtype: torch.dtype, + ) -> torch.Tensor: + timesteps_proj = self.time_proj(timestep) + + crop_coords_proj = self.condition_proj(crop_coords.flatten()).view(crop_coords.size(0), -1) + target_size_proj = self.condition_proj(target_size.flatten()).view(target_size.size(0), -1) + + # (B, 2 * condition_dim) + condition_proj = torch.cat([crop_coords_proj, target_size_proj], dim=1) + + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (B, embedding_dim) + condition_emb = self.condition_embedder(condition_proj.to(dtype=hidden_dtype)) # (B, embedding_dim) + + conditioning = timesteps_emb + condition_emb + conditioning = F.silu(conditioning) + + return conditioning + + +class GlmImageImageProjector(nn.Module): + def __init__( + self, + in_channels: int = 16, + hidden_size: int = 2560, + patch_size: int = 2, + ): + super().__init__() + self.patch_size = patch_size + + self.proj = nn.Linear(in_channels * patch_size**2, hidden_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, channel, height, width = hidden_states.shape + post_patch_height = height // self.patch_size + post_patch_width = width // self.patch_size + + hidden_states = hidden_states.reshape( + batch_size, channel, post_patch_height, self.patch_size, post_patch_width, self.patch_size + ) + hidden_states = hidden_states.permute(0, 2, 4, 1, 3, 5).flatten(3, 5).flatten(1, 2) + hidden_states = self.proj(hidden_states) + + return hidden_states + + +class GlmImageAdaLayerNormZero(nn.Module): + def __init__(self, embedding_dim: int, dim: int) -> None: + super().__init__() + + self.norm = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.norm_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.linear = nn.Linear(embedding_dim, 12 * dim, bias=True) + + def forward( + self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, temb: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + dtype = hidden_states.dtype + norm_hidden_states = self.norm(hidden_states).to(dtype=dtype) + norm_encoder_hidden_states = self.norm_context(encoder_hidden_states).to(dtype=dtype) + + emb = self.linear(temb) + ( + shift_msa, + c_shift_msa, + scale_msa, + c_scale_msa, + gate_msa, + c_gate_msa, + shift_mlp, + c_shift_mlp, + scale_mlp, + c_scale_mlp, + gate_mlp, + c_gate_mlp, + ) = emb.chunk(12, dim=1) + + hidden_states = norm_hidden_states * (1 + scale_msa.unsqueeze(1)) + shift_msa.unsqueeze(1) + encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_msa.unsqueeze(1)) + c_shift_msa.unsqueeze(1) + + return ( + hidden_states, + gate_msa, + shift_mlp, + scale_mlp, + gate_mlp, + encoder_hidden_states, + c_gate_msa, + c_shift_mlp, + c_scale_mlp, + c_gate_mlp, + ) + + +class GlmImageLayerKVCache: + """KV cache for GlmImage model. + Supports per-sample caching for batch processing where each sample may have different condition images. + """ + + def __init__(self): + self.k_caches: list[torch.Tensor | None] = [] + self.v_caches: list[torch.Tensor | None] = [] + self.mode: str | None = None # "write", "read", "skip" + self.current_sample_idx: int = 0 # Current sample index for writing + + def store(self, k: torch.Tensor, v: torch.Tensor): + """Store KV cache for the current sample.""" + # k, v shape: (1, seq_len, num_heads, head_dim) + if len(self.k_caches) <= self.current_sample_idx: + # First time storing for this sample + self.k_caches.append(k) + self.v_caches.append(v) + else: + # Append to existing cache for this sample (multiple condition images) + self.k_caches[self.current_sample_idx] = torch.cat([self.k_caches[self.current_sample_idx], k], dim=1) + self.v_caches[self.current_sample_idx] = torch.cat([self.v_caches[self.current_sample_idx], v], dim=1) + + def get(self, k: torch.Tensor, v: torch.Tensor): + """Get combined KV cache for all samples in the batch. + + Args: + k: Current key tensor, shape (batch_size, seq_len, num_heads, head_dim) + v: Current value tensor, shape (batch_size, seq_len, num_heads, head_dim) + Returns: + Combined key and value tensors with cached values prepended. + """ + batch_size = k.shape[0] + num_cached_samples = len(self.k_caches) + if num_cached_samples == 0: + return k, v + if num_cached_samples == 1: + # Single cache, expand for all batch samples (shared condition images) + k_cache_expanded = self.k_caches[0].expand(batch_size, -1, -1, -1) + v_cache_expanded = self.v_caches[0].expand(batch_size, -1, -1, -1) + elif num_cached_samples == batch_size: + # Per-sample cache, concatenate along batch dimension + k_cache_expanded = torch.cat(self.k_caches, dim=0) + v_cache_expanded = torch.cat(self.v_caches, dim=0) + else: + # Mismatch: try to handle by repeating the caches + # This handles cases like num_images_per_prompt > 1 + repeat_factor = batch_size // num_cached_samples + if batch_size % num_cached_samples == 0: + k_cache_list = [] + v_cache_list = [] + for i in range(num_cached_samples): + k_cache_list.append(self.k_caches[i].expand(repeat_factor, -1, -1, -1)) + v_cache_list.append(self.v_caches[i].expand(repeat_factor, -1, -1, -1)) + k_cache_expanded = torch.cat(k_cache_list, dim=0) + v_cache_expanded = torch.cat(v_cache_list, dim=0) + else: + raise ValueError( + f"Cannot match {num_cached_samples} cached samples to batch size {batch_size}. " + f"Batch size must be a multiple of the number of cached samples." + ) + + k_combined = torch.cat([k_cache_expanded, k], dim=1) + v_combined = torch.cat([v_cache_expanded, v], dim=1) + return k_combined, v_combined + + def clear(self): + self.k_caches = [] + self.v_caches = [] + self.mode = None + self.current_sample_idx = 0 + + def next_sample(self): + """Move to the next sample for writing.""" + self.current_sample_idx += 1 + + +class GlmImageKVCache: + """Container for all layers' KV caches. + Supports per-sample caching for batch processing where each sample may have different condition images. + """ + + def __init__(self, num_layers: int): + self.num_layers = num_layers + self.caches = [GlmImageLayerKVCache() for _ in range(num_layers)] + + def __getitem__(self, layer_idx: int) -> GlmImageLayerKVCache: + return self.caches[layer_idx] + + def set_mode(self, mode: str): + if mode is not None and mode not in ["write", "read", "skip"]: + raise ValueError(f"Invalid mode: {mode}, must be one of 'write', 'read', 'skip'") + for cache in self.caches: + cache.mode = mode + + def next_sample(self): + """Move to the next sample for writing. Call this after processing + all condition images for one batch sample.""" + for cache in self.caches: + cache.next_sample() + + def clear(self): + for cache in self.caches: + cache.clear() + + +class GlmImageAttnProcessor: + """ + Processor for implementing scaled dot-product attention for the GlmImage model. It applies a rotary embedding on + query and key vectors, but does not include spatial normalization. + + The processor supports passing an attention mask for text tokens. The attention mask should have shape (batch_size, + text_seq_length) where 1 indicates a non-padded token and 0 indicates a padded token. + """ + + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("GlmImageAttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + kv_cache: GlmImageLayerKVCache | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + dtype = encoder_hidden_states.dtype + + batch_size, text_seq_length, embed_dim = encoder_hidden_states.shape + batch_size, image_seq_length, embed_dim = hidden_states.shape + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + # 1. QKV projections + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + # 2. QK normalization + if attn.norm_q is not None: + query = attn.norm_q(query).to(dtype=dtype) + if attn.norm_k is not None: + key = attn.norm_k(key).to(dtype=dtype) + + # 3. Rotational positional embeddings applied to latent stream + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + query[:, text_seq_length:, :, :] = apply_rotary_emb( + query[:, text_seq_length:, :, :], image_rotary_emb, sequence_dim=1, use_real_unbind_dim=-2 + ) + key[:, text_seq_length:, :, :] = apply_rotary_emb( + key[:, text_seq_length:, :, :], image_rotary_emb, sequence_dim=1, use_real_unbind_dim=-2 + ) + + if kv_cache is not None: + if kv_cache.mode == "write": + kv_cache.store(key, value) + elif kv_cache.mode == "read": + key, value = kv_cache.get(key, value) + elif kv_cache.mode == "skip": + pass + + # 4. Attention + if attention_mask is not None: + text_attn_mask = attention_mask + assert text_attn_mask.dim() == 2, "the shape of text_attn_mask should be (batch_size, text_seq_length)" + text_attn_mask = text_attn_mask.float().to(query.device) + mix_attn_mask = torch.ones((batch_size, text_seq_length + image_seq_length), device=query.device) + mix_attn_mask[:, :text_seq_length] = text_attn_mask + mix_attn_mask = mix_attn_mask.unsqueeze(2) + attn_mask_matrix = mix_attn_mask @ mix_attn_mask.transpose(1, 2) + attention_mask = (attn_mask_matrix > 0).unsqueeze(1).to(query.dtype) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + # 5. Output projection + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + encoder_hidden_states, hidden_states = hidden_states.split( + [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1 + ) + return hidden_states, encoder_hidden_states + + +@maybe_allow_in_graph +class GlmImageTransformerBlock(nn.Module): + def __init__( + self, + dim: int = 2560, + num_attention_heads: int = 64, + attention_head_dim: int = 40, + time_embed_dim: int = 512, + ) -> None: + super().__init__() + + # 1. Attention + self.norm1 = GlmImageAdaLayerNormZero(time_embed_dim, dim) + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + out_dim=dim, + bias=True, + qk_norm="layer_norm", + elementwise_affine=False, + eps=1e-5, + processor=GlmImageAttnProcessor(), + ) + + # 2. Feedforward + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-5) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]] | None = None, + attention_mask: dict[str, torch.Tensor] | None = None, + attention_kwargs: dict[str, Any] | None = None, + kv_cache: GlmImageLayerKVCache | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + # 1. Timestep conditioning + ( + norm_hidden_states, + gate_msa, + shift_mlp, + scale_mlp, + gate_mlp, + norm_encoder_hidden_states, + c_gate_msa, + c_shift_mlp, + c_scale_mlp, + c_gate_mlp, + ) = self.norm1(hidden_states, encoder_hidden_states, temb) + + # 2. Attention + attention_kwargs = attention_kwargs or {} + + attn_hidden_states, attn_encoder_hidden_states = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + attention_mask=attention_mask, + kv_cache=kv_cache, + **attention_kwargs, + ) + hidden_states = hidden_states + attn_hidden_states * gate_msa.unsqueeze(1) + encoder_hidden_states = encoder_hidden_states + attn_encoder_hidden_states * c_gate_msa.unsqueeze(1) + + # 3. Feedforward + norm_hidden_states = self.norm2(hidden_states) * (1 + scale_mlp.unsqueeze(1)) + shift_mlp.unsqueeze(1) + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) * ( + 1 + c_scale_mlp.unsqueeze(1) + ) + c_shift_mlp.unsqueeze(1) + + ff_output = self.ff(norm_hidden_states) + ff_output_context = self.ff(norm_encoder_hidden_states) + hidden_states = hidden_states + ff_output * gate_mlp.unsqueeze(1) + encoder_hidden_states = encoder_hidden_states + ff_output_context * c_gate_mlp.unsqueeze(1) + + return hidden_states, encoder_hidden_states + + +class GlmImageRotaryPosEmbed(nn.Module): + def __init__(self, dim: int, patch_size: int, theta: float = 10000.0) -> None: + super().__init__() + + self.dim = dim + self.patch_size = patch_size + self.theta = theta + + def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + batch_size, num_channels, height, width = hidden_states.shape + height, width = height // self.patch_size, width // self.patch_size + + dim_h, dim_w = self.dim // 2, self.dim // 2 + h_inv_freq = 1.0 / ( + self.theta ** (torch.arange(0, dim_h, 2, dtype=torch.float32)[: (dim_h // 2)].float() / dim_h) + ) + w_inv_freq = 1.0 / ( + self.theta ** (torch.arange(0, dim_w, 2, dtype=torch.float32)[: (dim_w // 2)].float() / dim_w) + ) + h_seq = torch.arange(height) + w_seq = torch.arange(width) + freqs_h = torch.outer(h_seq, h_inv_freq) + freqs_w = torch.outer(w_seq, w_inv_freq) + + # Create position matrices for height and width + # [height, 1, dim//4] and [1, width, dim//4] + freqs_h = freqs_h.unsqueeze(1) + freqs_w = freqs_w.unsqueeze(0) + # Broadcast freqs_h and freqs_w to [height, width, dim//4] + freqs_h = freqs_h.expand(height, width, -1) + freqs_w = freqs_w.expand(height, width, -1) + + # Concatenate along last dimension to get [height, width, dim//2] + freqs = torch.cat([freqs_h, freqs_w], dim=-1) + freqs = torch.cat([freqs, freqs], dim=-1) # [height, width, dim] + freqs = freqs.reshape(height * width, -1) + return (freqs.cos(), freqs.sin()) + + +class GlmImageAdaLayerNormContinuous(nn.Module): + """ + GlmImage-only final AdaLN: LN(x) -> Linear(cond) -> chunk -> affine. Matches Megatron: **no activation** before the + Linear on conditioning embedding. + """ + + def __init__( + self, + embedding_dim: int, + conditioning_embedding_dim: int, + elementwise_affine: bool = True, + eps: float = 1e-5, + bias: bool = True, + norm_type: str = "layer_norm", + ): + super().__init__() + self.linear = nn.Linear(conditioning_embedding_dim, embedding_dim * 2, bias=bias) + if norm_type == "layer_norm": + self.norm = LayerNorm(embedding_dim, eps, elementwise_affine, bias) + elif norm_type == "rms_norm": + self.norm = RMSNorm(embedding_dim, eps, elementwise_affine) + else: + raise ValueError(f"unknown norm_type {norm_type}") + + def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor: + # *** NO SiLU here *** + emb = self.linear(conditioning_embedding.to(x.dtype)) + scale, shift = torch.chunk(emb, 2, dim=1) + x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :] + return x + + +class GlmImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, CacheMixin): + r""" + Args: + patch_size (`int`, defaults to `2`): + The size of the patches to use in the patch embedding layer. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + num_layers (`int`, defaults to `30`): + The number of layers of Transformer blocks to use. + attention_head_dim (`int`, defaults to `40`): + The number of channels in each head. + num_attention_heads (`int`, defaults to `64`): + The number of heads to use for multi-head attention. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_embed_dim (`int`, defaults to `1472`): + Input dimension of text embeddings from the text encoder. + time_embed_dim (`int`, defaults to `512`): + Output dimension of timestep embeddings. + condition_dim (`int`, defaults to `256`): + The embedding dimension of the input SDXL-style resolution conditions (original_size, target_size, + crop_coords). + pos_embed_max_size (`int`, defaults to `128`): + The maximum resolution of the positional embeddings, from which slices of shape `H x W` are taken and added + to input patched latents, where `H` and `W` are the latent height and width respectively. A value of 128 + means that the maximum supported height and width for image generation is `128 * vae_scale_factor * + patch_size => 128 * 8 * 2 => 2048`. + sample_size (`int`, defaults to `128`): + The base resolution of input latents. If height/width is not provided during generation, this value is used + to determine the resolution as `sample_size * vae_scale_factor => 128 * 8 => 1024` + """ + + _supports_gradient_checkpointing = True + _no_split_modules = [ + "GlmImageTransformerBlock", + "GlmImageImageProjector", + "GlmImageImageProjector", + ] + _skip_layerwise_casting_patterns = ["patch_embed", "norm", "proj_out"] + _skip_keys = ["kv_caches"] + + @register_to_config + def __init__( + self, + patch_size: int = 2, + in_channels: int = 16, + out_channels: int = 16, + num_layers: int = 30, + attention_head_dim: int = 40, + num_attention_heads: int = 64, + text_embed_dim: int = 1472, + time_embed_dim: int = 512, + condition_dim: int = 256, + prior_vq_quantizer_codebook_size: int = 16384, + ): + super().__init__() + + # GlmImage uses 2 additional SDXL-like conditions - target_size, crop_coords + # Each of these are sincos embeddings of shape 2 * condition_dim + pooled_projection_dim = 2 * 2 * condition_dim + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels + + # 1. RoPE + self.rope = GlmImageRotaryPosEmbed(attention_head_dim, patch_size, theta=10000.0) + + # 2. Patch & Text-timestep embedding + self.image_projector = GlmImageImageProjector(in_channels, inner_dim, patch_size) + self.glyph_projector = FeedForward(text_embed_dim, inner_dim, inner_dim=inner_dim, activation_fn="gelu") + self.prior_token_embedding = nn.Embedding(prior_vq_quantizer_codebook_size, inner_dim) + self.prior_projector = FeedForward(inner_dim, inner_dim, inner_dim=inner_dim, activation_fn="linear-silu") + + self.time_condition_embed = GlmImageCombinedTimestepSizeEmbeddings( + embedding_dim=time_embed_dim, + condition_dim=condition_dim, + pooled_projection_dim=pooled_projection_dim, + timesteps_dim=time_embed_dim, + ) + + # 3. Transformer blocks + self.transformer_blocks = nn.ModuleList( + [ + GlmImageTransformerBlock(inner_dim, num_attention_heads, attention_head_dim, time_embed_dim) + for _ in range(num_layers) + ] + ) + + # 4. Output projection + self.norm_out = GlmImageAdaLayerNormContinuous(inner_dim, time_embed_dim, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, patch_size * patch_size * out_channels, bias=True) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + prior_token_id: torch.Tensor, + prior_token_drop: torch.Tensor, + timestep: torch.LongTensor, + target_size: torch.Tensor, + crop_coords: torch.Tensor, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + attention_mask: torch.Tensor | None = None, + kv_caches: GlmImageKVCache | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]] | None = None, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + batch_size, num_channels, height, width = hidden_states.shape + + # 1. RoPE + if image_rotary_emb is None: + image_rotary_emb = self.rope(hidden_states) + + # 2. Patch & Timestep embeddings + p = self.config.patch_size + post_patch_height = height // p + post_patch_width = width // p + + hidden_states = self.image_projector(hidden_states) + encoder_hidden_states = self.glyph_projector(encoder_hidden_states) + prior_embedding = self.prior_token_embedding(prior_token_id) + prior_embedding[prior_token_drop] *= 0.0 + prior_hidden_states = self.prior_projector(prior_embedding) + + hidden_states = hidden_states + prior_hidden_states + + temb = self.time_condition_embed(timestep, target_size, crop_coords, hidden_states.dtype) + + # 3. Transformer blocks + for idx, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + attention_mask, + attention_kwargs, + kv_caches[idx] if kv_caches is not None else None, + ) + else: + hidden_states, encoder_hidden_states = block( + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + attention_mask, + attention_kwargs, + kv_cache=kv_caches[idx] if kv_caches is not None else None, + ) + + # 4. Output norm & projection + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + # 5. Unpatchify + hidden_states = hidden_states.reshape(batch_size, post_patch_height, post_patch_width, -1, p, p) + + # Rearrange tensor from (B, H_p, W_p, C, p, p) to (B, C, H_p * p, W_p * p) + output = hidden_states.permute(0, 3, 1, 4, 2, 5).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_helios.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_helios.py new file mode 100644 index 0000000000000000000000000000000000000000..9f3ef047d98db5b0ec7131bfa87ca3f8e3167137 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_helios.py @@ -0,0 +1,814 @@ +# Copyright 2025 The Helios Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from functools import lru_cache +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from .._modeling_parallel import ContextParallelInput, ContextParallelOutput +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import FP32LayerNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def pad_for_3d_conv(x, kernel_size): + b, c, t, h, w = x.shape + pt, ph, pw = kernel_size + pad_t = (pt - (t % pt)) % pt + pad_h = (ph - (h % ph)) % ph + pad_w = (pw - (w % pw)) % pw + return torch.nn.functional.pad(x, (0, pad_w, 0, pad_h, 0, pad_t), mode="replicate") + + +def center_down_sample_3d(x, kernel_size): + return torch.nn.functional.avg_pool3d(x, kernel_size, stride=kernel_size) + + +def apply_rotary_emb_transposed( + hidden_states: torch.Tensor, + freqs_cis: torch.Tensor, +): + x_1, x_2 = hidden_states.unflatten(-1, (-1, 2)).unbind(-1) + cos, sin = freqs_cis.unsqueeze(-2).chunk(2, dim=-1) + out = torch.empty_like(hidden_states) + out[..., 0::2] = x_1 * cos[..., 0::2] - x_2 * sin[..., 1::2] + out[..., 1::2] = x_1 * sin[..., 1::2] + x_2 * cos[..., 0::2] + return out.type_as(hidden_states) + + +def _get_qkv_projections(attn: "HeliosAttention", hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor): + # encoder_hidden_states is only passed for cross-attention + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + if attn.fused_projections: + if not attn.is_cross_attention: + # In self-attention layers, we can fuse the entire QKV projection into a single linear + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + else: + # In cross-attention layers, we can only fuse the KV projections into a single linear + query = attn.to_q(hidden_states) + key, value = attn.to_kv(encoder_hidden_states).chunk(2, dim=-1) + else: + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + return query, key, value + + +class HeliosOutputNorm(nn.Module): + def __init__(self, dim: int, eps: float = 1e-6, elementwise_affine: bool = False): + super().__init__() + self.scale_shift_table = nn.Parameter(torch.randn(1, 2, dim) / dim**0.5) + self.norm = FP32LayerNorm(dim, eps, elementwise_affine=False) + + def forward(self, hidden_states: torch.Tensor, temb: torch.Tensor, original_context_length: int): + temb = temb[:, -original_context_length:, :] + shift, scale = (self.scale_shift_table.unsqueeze(0).to(temb.device) + temb.unsqueeze(2)).chunk(2, dim=2) + shift, scale = shift.squeeze(2).to(hidden_states.device), scale.squeeze(2).to(hidden_states.device) + hidden_states = hidden_states[:, -original_context_length:, :] + hidden_states = (self.norm(hidden_states.float()) * (1 + scale) + shift).type_as(hidden_states) + return hidden_states + + +class HeliosAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "HeliosAttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to version 2.0 or higher." + ) + + def __call__( + self, + attn: "HeliosAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + original_context_length: int = None, + ) -> torch.Tensor: + query, key, value = _get_qkv_projections(attn, hidden_states, encoder_hidden_states) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + if rotary_emb is not None: + query = apply_rotary_emb_transposed(query, rotary_emb) + key = apply_rotary_emb_transposed(key, rotary_emb) + + if not attn.is_cross_attention and attn.is_amplify_history: + history_seq_len = hidden_states.shape[1] - original_context_length + + if history_seq_len > 0: + scale_key = 1.0 + torch.sigmoid(attn.history_key_scale) * (attn.max_scale - 1.0) + if attn.history_scale_mode == "per_head": + scale_key = scale_key.view(1, 1, -1, 1) + key = torch.cat([key[:, :history_seq_len] * scale_key, key[:, history_seq_len:]], dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + # Reference: https://github.com/huggingface/diffusers/pull/12909 + parallel_config=(self._parallel_config if encoder_hidden_states is None else None), + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.type_as(query) + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +class HeliosAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = HeliosAttnProcessor + _available_processors = [HeliosAttnProcessor] + + def __init__( + self, + dim: int, + heads: int = 8, + dim_head: int = 64, + eps: float = 1e-5, + dropout: float = 0.0, + added_kv_proj_dim: int | None = None, + cross_attention_dim_head: int | None = None, + processor=None, + is_cross_attention=None, + is_amplify_history=False, + history_scale_mode="per_head", # [scalar, per_head] + ): + super().__init__() + + self.inner_dim = dim_head * heads + self.heads = heads + self.added_kv_proj_dim = added_kv_proj_dim + self.cross_attention_dim_head = cross_attention_dim_head + self.kv_inner_dim = self.inner_dim if cross_attention_dim_head is None else cross_attention_dim_head * heads + + self.to_q = torch.nn.Linear(dim, self.inner_dim, bias=True) + self.to_k = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_v = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_out = torch.nn.ModuleList( + [ + torch.nn.Linear(self.inner_dim, dim, bias=True), + torch.nn.Dropout(dropout), + ] + ) + self.norm_q = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + self.norm_k = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + + self.add_k_proj = self.add_v_proj = None + if added_kv_proj_dim is not None: + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.norm_added_k = torch.nn.RMSNorm(dim_head * heads, eps=eps) + + if is_cross_attention is not None: + self.is_cross_attention = is_cross_attention + else: + self.is_cross_attention = cross_attention_dim_head is not None + + self.set_processor(processor) + + self.is_amplify_history = is_amplify_history + if is_amplify_history: + if history_scale_mode == "scalar": + self.history_key_scale = nn.Parameter(torch.ones(1)) + elif history_scale_mode == "per_head": + self.history_key_scale = nn.Parameter(torch.ones(heads)) + else: + raise ValueError(f"Unknown history_scale_mode: {history_scale_mode}") + self.history_scale_mode = history_scale_mode + self.max_scale = 10.0 + + def fuse_projections(self): + if getattr(self, "fused_projections", False): + return + + if not self.is_cross_attention: + concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_qkv = nn.Linear(in_features, out_features, bias=True) + self.to_qkv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + else: + concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_kv = nn.Linear(in_features, out_features, bias=True) + self.to_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + if self.added_kv_proj_dim is not None: + concatenated_weights = torch.cat([self.add_k_proj.weight.data, self.add_v_proj.weight.data]) + concatenated_bias = torch.cat([self.add_k_proj.bias.data, self.add_v_proj.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_added_kv = nn.Linear(in_features, out_features, bias=True) + self.to_added_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + self.fused_projections = True + + @torch.no_grad() + def unfuse_projections(self): + if not getattr(self, "fused_projections", False): + return + + if hasattr(self, "to_qkv"): + delattr(self, "to_qkv") + if hasattr(self, "to_kv"): + delattr(self, "to_kv") + if hasattr(self, "to_added_kv"): + delattr(self, "to_added_kv") + + self.fused_projections = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + original_context_length: int = None, + **kwargs, + ) -> torch.Tensor: + return self.processor( + self, + hidden_states, + encoder_hidden_states, + attention_mask, + rotary_emb, + original_context_length, + **kwargs, + ) + + +class HeliosTimeTextEmbedding(nn.Module): + def __init__( + self, + dim: int, + time_freq_dim: int, + time_proj_dim: int, + text_embed_dim: int, + ): + super().__init__() + + self.timesteps_proj = Timesteps(num_channels=time_freq_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.time_embedder = TimestepEmbedding(in_channels=time_freq_dim, time_embed_dim=dim) + self.act_fn = nn.SiLU() + self.time_proj = nn.Linear(dim, time_proj_dim) + self.text_embedder = PixArtAlphaTextProjection(text_embed_dim, dim, act_fn="gelu_tanh") + + def forward( + self, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + is_return_encoder_hidden_states: bool = True, + ): + timestep = self.timesteps_proj(timestep) + + time_embedder_dtype = next(iter(self.time_embedder.parameters())).dtype + if timestep.dtype != time_embedder_dtype and time_embedder_dtype != torch.int8: + timestep = timestep.to(time_embedder_dtype) + temb = self.time_embedder(timestep).type_as(encoder_hidden_states) + timestep_proj = self.time_proj(self.act_fn(temb)) + + if encoder_hidden_states is not None and is_return_encoder_hidden_states: + encoder_hidden_states = self.text_embedder(encoder_hidden_states) + + return temb, timestep_proj, encoder_hidden_states + + +class HeliosRotaryPosEmbed(nn.Module): + def __init__(self, rope_dim, theta): + super().__init__() + self.DT, self.DY, self.DX = rope_dim + self.theta = theta + self.register_buffer("freqs_base_t", self._get_freqs_base(self.DT), persistent=False) + self.register_buffer("freqs_base_y", self._get_freqs_base(self.DY), persistent=False) + self.register_buffer("freqs_base_x", self._get_freqs_base(self.DX), persistent=False) + + def _get_freqs_base(self, dim): + return 1.0 / (self.theta ** (torch.arange(0, dim, 2, dtype=torch.float32)[: (dim // 2)] / dim)) + + @torch.no_grad() + def get_frequency_batched(self, freqs_base, pos): + freqs = torch.einsum("d,bthw->dbthw", freqs_base, pos) + freqs = freqs.repeat_interleave(2, dim=0) + return freqs.cos(), freqs.sin() + + @torch.no_grad() + @lru_cache(maxsize=32) + def _get_spatial_meshgrid(self, height, width, device_str): + device = torch.device(device_str) + grid_y_coords = torch.arange(height, device=device, dtype=torch.float32) + grid_x_coords = torch.arange(width, device=device, dtype=torch.float32) + grid_y, grid_x = torch.meshgrid(grid_y_coords, grid_x_coords, indexing="ij") + return grid_y, grid_x + + @torch.no_grad() + def forward(self, frame_indices, height, width, device): + batch_size = frame_indices.shape[0] + num_frames = frame_indices.shape[1] + + frame_indices = frame_indices.to(device=device, dtype=torch.float32) + grid_y, grid_x = self._get_spatial_meshgrid(height, width, str(device)) + + grid_t = frame_indices[:, :, None, None].expand(batch_size, num_frames, height, width) + grid_y_batch = grid_y[None, None, :, :].expand(batch_size, num_frames, -1, -1) + grid_x_batch = grid_x[None, None, :, :].expand(batch_size, num_frames, -1, -1) + + freqs_cos_t, freqs_sin_t = self.get_frequency_batched(self.freqs_base_t, grid_t) + freqs_cos_y, freqs_sin_y = self.get_frequency_batched(self.freqs_base_y, grid_y_batch) + freqs_cos_x, freqs_sin_x = self.get_frequency_batched(self.freqs_base_x, grid_x_batch) + + result = torch.cat([freqs_cos_t, freqs_cos_y, freqs_cos_x, freqs_sin_t, freqs_sin_y, freqs_sin_x], dim=0) + + return result.permute(1, 0, 2, 3, 4) + + +@maybe_allow_in_graph +class HeliosTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + ffn_dim: int, + num_heads: int, + qk_norm: str = "rms_norm_across_heads", + cross_attn_norm: bool = False, + eps: float = 1e-6, + added_kv_proj_dim: int | None = None, + guidance_cross_attn: bool = False, + is_amplify_history: bool = False, + history_scale_mode: str = "per_head", # [scalar, per_head] + ): + super().__init__() + + # 1. Self-attention + self.norm1 = FP32LayerNorm(dim, eps, elementwise_affine=False) + self.attn1 = HeliosAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + cross_attention_dim_head=None, + processor=HeliosAttnProcessor(), + is_amplify_history=is_amplify_history, + history_scale_mode=history_scale_mode, + ) + + # 2. Cross-attention + self.attn2 = HeliosAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + added_kv_proj_dim=added_kv_proj_dim, + cross_attention_dim_head=dim // num_heads, + processor=HeliosAttnProcessor(), + ) + self.norm2 = FP32LayerNorm(dim, eps, elementwise_affine=True) if cross_attn_norm else nn.Identity() + + # 3. Feed-forward + self.ffn = FeedForward(dim, inner_dim=ffn_dim, activation_fn="gelu-approximate") + self.norm3 = FP32LayerNorm(dim, eps, elementwise_affine=False) + + self.scale_shift_table = nn.Parameter(torch.randn(1, 6, dim) / dim**0.5) + + # 4. Guidance cross-attention + self.guidance_cross_attn = guidance_cross_attn + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + rotary_emb: torch.Tensor, + original_context_length: int = None, + ) -> torch.Tensor: + if temb.ndim == 4: + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table.unsqueeze(0) + temb.float() + ).chunk(6, dim=2) + # batch_size, seq_len, 1, inner_dim + shift_msa = shift_msa.squeeze(2) + scale_msa = scale_msa.squeeze(2) + gate_msa = gate_msa.squeeze(2) + c_shift_msa = c_shift_msa.squeeze(2) + c_scale_msa = c_scale_msa.squeeze(2) + c_gate_msa = c_gate_msa.squeeze(2) + else: + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table + temb.float() + ).chunk(6, dim=1) + + # 1. Self-attention + norm_hidden_states = (self.norm1(hidden_states.float()) * (1 + scale_msa) + shift_msa).type_as(hidden_states) + attn_output = self.attn1( + norm_hidden_states, + None, + None, + rotary_emb, + original_context_length, + ) + hidden_states = (hidden_states.float() + attn_output * gate_msa).type_as(hidden_states) + + # 2. Cross-attention + if self.guidance_cross_attn: + history_seq_len = hidden_states.shape[1] - original_context_length + + history_hidden_states, hidden_states = torch.split( + hidden_states, [history_seq_len, original_context_length], dim=1 + ) + norm_hidden_states = self.norm2(hidden_states.float()).type_as(hidden_states) + attn_output = self.attn2( + norm_hidden_states, + encoder_hidden_states, + None, + None, + original_context_length, + ) + hidden_states = hidden_states + attn_output + hidden_states = torch.cat([history_hidden_states, hidden_states], dim=1) + else: + norm_hidden_states = self.norm2(hidden_states.float()).type_as(hidden_states) + attn_output = self.attn2( + norm_hidden_states, + encoder_hidden_states, + None, + None, + original_context_length, + ) + hidden_states = hidden_states + attn_output + + # 3. Feed-forward + norm_hidden_states = (self.norm3(hidden_states.float()) * (1 + c_scale_msa) + c_shift_msa).type_as( + hidden_states + ) + ff_output = self.ffn(norm_hidden_states) + hidden_states = (hidden_states.float() + ff_output.float() * c_gate_msa).type_as(hidden_states) + + return hidden_states + + +class HeliosTransformer3DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin +): + r""" + A Transformer model for video-like data used in the Helios model. + + Args: + patch_size (`tuple[int]`, defaults to `(1, 2, 2)`): + 3D patch dimensions for video embedding (t_patch, h_patch, w_patch). + num_attention_heads (`int`, defaults to `40`): + Fixed length for text embeddings. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_dim (`int`, defaults to `512`): + Input dimension for text embeddings. + freq_dim (`int`, defaults to `256`): + Dimension for sinusoidal time embeddings. + ffn_dim (`int`, defaults to `13824`): + Intermediate dimension in feed-forward network. + num_layers (`int`, defaults to `40`): + The number of layers of transformer blocks to use. + window_size (`tuple[int]`, defaults to `(-1, -1)`): + Window size for local attention (-1 indicates global attention). + cross_attn_norm (`bool`, defaults to `True`): + Enable cross-attention normalization. + qk_norm (`bool`, defaults to `True`): + Enable query/key normalization. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + add_img_emb (`bool`, defaults to `False`): + Whether to use img_emb. + added_kv_proj_dim (`int`, *optional*, defaults to `None`): + The number of channels to use for the added key and value projections. If `None`, no projection is used. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = [ + "patch_embedding", + "patch_short", + "patch_mid", + "patch_long", + "condition_embedder", + "norm", + ] + _no_split_modules = ["HeliosTransformerBlock", "HeliosOutputNorm"] + _keep_in_fp32_modules = [ + "time_embedder", + "scale_shift_table", + "norm1", + "norm2", + "norm3", + "history_key_scale", + ] + _keys_to_ignore_on_load_unexpected = ["norm_added_q"] + _repeated_blocks = ["HeliosTransformerBlock"] + _cp_plan = { + "blocks.0": { + "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + }, + "blocks.*": { + "temb": ContextParallelInput(split_dim=1, expected_dims=4, split_output=False), + "rotary_emb": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + }, + "blocks.39": ContextParallelOutput(gather_dim=1, expected_dims=3), + } + + @register_to_config + def __init__( + self, + patch_size: tuple[int, ...] = (1, 2, 2), + num_attention_heads: int = 40, + attention_head_dim: int = 128, + in_channels: int = 16, + out_channels: int = 16, + text_dim: int = 4096, + freq_dim: int = 256, + ffn_dim: int = 13824, + num_layers: int = 40, + cross_attn_norm: bool = True, + qk_norm: str | None = "rms_norm_across_heads", + eps: float = 1e-6, + added_kv_proj_dim: int | None = None, + rope_dim: tuple[int, ...] = (44, 42, 42), + rope_theta: float = 10000.0, + guidance_cross_attn: bool = True, + zero_history_timestep: bool = True, + has_multi_term_memory_patch: bool = True, + is_amplify_history: bool = False, + history_scale_mode: str = "per_head", # [scalar, per_head] + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + # 1. Patch & position embedding + self.rope = HeliosRotaryPosEmbed(rope_dim=rope_dim, theta=rope_theta) + self.patch_embedding = nn.Conv3d(in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + + # 2. Initial Multi Term Memory Patch + self.zero_history_timestep = zero_history_timestep + if has_multi_term_memory_patch: + self.patch_short = nn.Conv3d(in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + self.patch_mid = nn.Conv3d( + in_channels, + inner_dim, + kernel_size=tuple(2 * p for p in patch_size), + stride=tuple(2 * p for p in patch_size), + ) + self.patch_long = nn.Conv3d( + in_channels, + inner_dim, + kernel_size=tuple(4 * p for p in patch_size), + stride=tuple(4 * p for p in patch_size), + ) + + # 3. Condition embeddings + self.condition_embedder = HeliosTimeTextEmbedding( + dim=inner_dim, + time_freq_dim=freq_dim, + time_proj_dim=inner_dim * 6, + text_embed_dim=text_dim, + ) + + # 4. Transformer blocks + self.blocks = nn.ModuleList( + [ + HeliosTransformerBlock( + inner_dim, + ffn_dim, + num_attention_heads, + qk_norm, + cross_attn_norm, + eps, + added_kv_proj_dim, + guidance_cross_attn=guidance_cross_attn, + is_amplify_history=is_amplify_history, + history_scale_mode=history_scale_mode, + ) + for _ in range(num_layers) + ] + ) + + # 5. Output norm & projection + self.norm_out = HeliosOutputNorm(inner_dim, eps, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, out_channels * math.prod(patch_size)) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + # ------------ Stage 1 ------------ + indices_hidden_states=None, + indices_latents_history_short=None, + indices_latents_history_mid=None, + indices_latents_history_long=None, + latents_history_short=None, + latents_history_mid=None, + latents_history_long=None, + return_dict: bool = True, + attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor | dict[str, torch.Tensor]: + # 1. Input + batch_size = hidden_states.shape[0] + p_t, p_h, p_w = self.config.patch_size + + # 2. Process noisy latents + hidden_states = self.patch_embedding(hidden_states) + _, _, post_patch_num_frames, post_patch_height, post_patch_width = hidden_states.shape + + if indices_hidden_states is None: + indices_hidden_states = torch.arange(0, post_patch_num_frames).unsqueeze(0).expand(batch_size, -1) + + hidden_states = hidden_states.flatten(2).transpose(1, 2) + rotary_emb = self.rope( + frame_indices=indices_hidden_states, + height=post_patch_height, + width=post_patch_width, + device=hidden_states.device, + ) + rotary_emb = rotary_emb.flatten(2).transpose(1, 2) + original_context_length = hidden_states.shape[1] + + # 3. Process short history latents + if latents_history_short is not None and indices_latents_history_short is not None: + latents_history_short = self.patch_short(latents_history_short) + _, _, _, H1, W1 = latents_history_short.shape + latents_history_short = latents_history_short.flatten(2).transpose(1, 2) + + rotary_emb_history_short = self.rope( + frame_indices=indices_latents_history_short, + height=H1, + width=W1, + device=latents_history_short.device, + ) + rotary_emb_history_short = rotary_emb_history_short.flatten(2).transpose(1, 2) + + hidden_states = torch.cat([latents_history_short, hidden_states], dim=1) + rotary_emb = torch.cat([rotary_emb_history_short, rotary_emb], dim=1) + + # 4. Process mid history latents + if latents_history_mid is not None and indices_latents_history_mid is not None: + latents_history_mid = pad_for_3d_conv(latents_history_mid, (2, 4, 4)) + latents_history_mid = self.patch_mid(latents_history_mid) + latents_history_mid = latents_history_mid.flatten(2).transpose(1, 2) + + rotary_emb_history_mid = self.rope( + frame_indices=indices_latents_history_mid, + height=H1, + width=W1, + device=latents_history_mid.device, + ) + rotary_emb_history_mid = pad_for_3d_conv(rotary_emb_history_mid, (2, 2, 2)) + rotary_emb_history_mid = center_down_sample_3d(rotary_emb_history_mid, (2, 2, 2)) + rotary_emb_history_mid = rotary_emb_history_mid.flatten(2).transpose(1, 2) + + hidden_states = torch.cat([latents_history_mid, hidden_states], dim=1) + rotary_emb = torch.cat([rotary_emb_history_mid, rotary_emb], dim=1) + + # 5. Process long history latents + if latents_history_long is not None and indices_latents_history_long is not None: + latents_history_long = pad_for_3d_conv(latents_history_long, (4, 8, 8)) + latents_history_long = self.patch_long(latents_history_long) + latents_history_long = latents_history_long.flatten(2).transpose(1, 2) + + rotary_emb_history_long = self.rope( + frame_indices=indices_latents_history_long, + height=H1, + width=W1, + device=latents_history_long.device, + ) + rotary_emb_history_long = pad_for_3d_conv(rotary_emb_history_long, (4, 4, 4)) + rotary_emb_history_long = center_down_sample_3d(rotary_emb_history_long, (4, 4, 4)) + rotary_emb_history_long = rotary_emb_history_long.flatten(2).transpose(1, 2) + + hidden_states = torch.cat([latents_history_long, hidden_states], dim=1) + rotary_emb = torch.cat([rotary_emb_history_long, rotary_emb], dim=1) + + history_context_length = hidden_states.shape[1] - original_context_length + + if indices_hidden_states is not None and self.zero_history_timestep: + timestep_t0 = torch.zeros((1), dtype=timestep.dtype, device=timestep.device) + temb_t0, timestep_proj_t0, _ = self.condition_embedder( + timestep_t0, encoder_hidden_states, is_return_encoder_hidden_states=False + ) + temb_t0 = temb_t0.unsqueeze(1).expand(batch_size, history_context_length, -1) + timestep_proj_t0 = ( + timestep_proj_t0.unflatten(-1, (6, -1)) + .view(1, 6, 1, -1) + .expand(batch_size, -1, history_context_length, -1) + ) + + temb, timestep_proj, encoder_hidden_states = self.condition_embedder(timestep, encoder_hidden_states) + timestep_proj = timestep_proj.unflatten(-1, (6, -1)) + + if indices_hidden_states is not None and not self.zero_history_timestep: + main_repeat_size = hidden_states.shape[1] + else: + main_repeat_size = original_context_length + temb = temb.view(batch_size, 1, -1).expand(batch_size, main_repeat_size, -1) + timestep_proj = timestep_proj.view(batch_size, 6, 1, -1).expand(batch_size, 6, main_repeat_size, -1) + + if indices_hidden_states is not None and self.zero_history_timestep: + temb = torch.cat([temb_t0, temb], dim=1) + timestep_proj = torch.cat([timestep_proj_t0, timestep_proj], dim=2) + + if timestep_proj.ndim == 4: + timestep_proj = timestep_proj.permute(0, 2, 1, 3) + + # 6. Transformer blocks + hidden_states = hidden_states.contiguous() + encoder_hidden_states = encoder_hidden_states.contiguous() + rotary_emb = rotary_emb.contiguous() + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.blocks: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + timestep_proj, + rotary_emb, + original_context_length, + ) + else: + for block in self.blocks: + hidden_states = block( + hidden_states, + encoder_hidden_states, + timestep_proj, + rotary_emb, + original_context_length, + ) + + # 7. Normalization + hidden_states = self.norm_out(hidden_states, temb, original_context_length) + hidden_states = self.proj_out(hidden_states) + + # 8. Unpatchify + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, p_t, p_h, p_w, -1 + ) + hidden_states = hidden_states.permute(0, 7, 1, 4, 2, 5, 3, 6) + output = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hidream_image.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hidream_image.py new file mode 100644 index 0000000000000000000000000000000000000000..57c0a16eee6f1a7ad3cf21fd7fb1ba66452f1281 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hidream_image.py @@ -0,0 +1,924 @@ +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...models.modeling_outputs import Transformer2DModelOutput +from ...models.modeling_utils import ModelMixin +from ...utils import apply_lora_scale, deprecate, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import Attention +from ..embeddings import TimestepEmbedding, Timesteps + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class HiDreamImageFeedForwardSwiGLU(nn.Module): + def __init__( + self, + dim: int, + hidden_dim: int, + multiple_of: int = 256, + ffn_dim_multiplier: float | None = None, + ): + super().__init__() + hidden_dim = int(2 * hidden_dim / 3) + # custom dim factor multiplier + if ffn_dim_multiplier is not None: + hidden_dim = int(ffn_dim_multiplier * hidden_dim) + hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) + + self.w1 = nn.Linear(dim, hidden_dim, bias=False) + self.w2 = nn.Linear(hidden_dim, dim, bias=False) + self.w3 = nn.Linear(dim, hidden_dim, bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.w2(torch.nn.functional.silu(self.w1(x)) * self.w3(x)) + + +class HiDreamImagePooledEmbed(nn.Module): + def __init__(self, text_emb_dim, hidden_size): + super().__init__() + self.pooled_embedder = TimestepEmbedding(in_channels=text_emb_dim, time_embed_dim=hidden_size) + + def forward(self, pooled_embed: torch.Tensor) -> torch.Tensor: + return self.pooled_embedder(pooled_embed) + + +class HiDreamImageTimestepEmbed(nn.Module): + def __init__(self, hidden_size, frequency_embedding_size=256): + super().__init__() + self.time_proj = Timesteps(num_channels=frequency_embedding_size, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=frequency_embedding_size, time_embed_dim=hidden_size) + + def forward(self, timesteps: torch.Tensor, wdtype: torch.dtype | None = None) -> torch.Tensor: + t_emb = self.time_proj(timesteps).to(dtype=wdtype) + t_emb = self.timestep_embedder(t_emb) + return t_emb + + +class HiDreamImageOutEmbed(nn.Module): + def __init__(self, hidden_size, patch_size, out_channels): + super().__init__() + self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True) + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True)) + + def forward(self, hidden_states: torch.Tensor, temb: torch.Tensor) -> torch.Tensor: + shift, scale = self.adaLN_modulation(temb).chunk(2, dim=1) + hidden_states = self.norm_final(hidden_states) * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1) + hidden_states = self.linear(hidden_states) + return hidden_states + + +class HiDreamImagePatchEmbed(nn.Module): + def __init__( + self, + patch_size=2, + in_channels=4, + out_channels=1024, + ): + super().__init__() + self.patch_size = patch_size + self.out_channels = out_channels + self.proj = nn.Linear(in_channels * patch_size * patch_size, out_channels, bias=True) + + def forward(self, latent) -> torch.Tensor: + latent = self.proj(latent) + return latent + + +def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor: + assert dim % 2 == 0, "The dimension must be even." + + is_mps = pos.device.type == "mps" + is_npu = pos.device.type == "npu" + + dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + + scale = torch.arange(0, dim, 2, dtype=dtype, device=pos.device) / dim + omega = 1.0 / (theta**scale) + + batch_size, seq_length = pos.shape + out = torch.einsum("...n,d->...nd", pos, omega) + cos_out = torch.cos(out) + sin_out = torch.sin(out) + + stacked_out = torch.stack([cos_out, -sin_out, sin_out, cos_out], dim=-1) + out = stacked_out.view(batch_size, -1, dim // 2, 2, 2) + return out.float() + + +class HiDreamImageEmbedND(nn.Module): + def __init__(self, theta: int, axes_dim: list[int]): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + n_axes = ids.shape[-1] + emb = torch.cat( + [rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)], + dim=-3, + ) + return emb.unsqueeze(2) + + +def apply_rope(xq: torch.Tensor, xk: torch.Tensor, freqs_cis: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) + xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) + xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] + xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] + return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) + + +@maybe_allow_in_graph +class HiDreamAttention(Attention): + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + upcast_attention: bool = False, + upcast_softmax: bool = False, + scale_qk: bool = True, + eps: float = 1e-5, + processor=None, + out_dim: int = None, + single: bool = False, + ): + super(Attention, self).__init__() + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.query_dim = query_dim + self.upcast_attention = upcast_attention + self.upcast_softmax = upcast_softmax + self.out_dim = out_dim if out_dim is not None else query_dim + + self.scale_qk = scale_qk + self.scale = dim_head**-0.5 if self.scale_qk else 1.0 + + self.heads = out_dim // dim_head if out_dim is not None else heads + self.sliceable_head_dim = heads + self.single = single + + self.to_q = nn.Linear(query_dim, self.inner_dim) + self.to_k = nn.Linear(self.inner_dim, self.inner_dim) + self.to_v = nn.Linear(self.inner_dim, self.inner_dim) + self.to_out = nn.Linear(self.inner_dim, self.out_dim) + self.q_rms_norm = nn.RMSNorm(self.inner_dim, eps) + self.k_rms_norm = nn.RMSNorm(self.inner_dim, eps) + + if not single: + self.to_q_t = nn.Linear(query_dim, self.inner_dim) + self.to_k_t = nn.Linear(self.inner_dim, self.inner_dim) + self.to_v_t = nn.Linear(self.inner_dim, self.inner_dim) + self.to_out_t = nn.Linear(self.inner_dim, self.out_dim) + self.q_rms_norm_t = nn.RMSNorm(self.inner_dim, eps) + self.k_rms_norm_t = nn.RMSNorm(self.inner_dim, eps) + + self.set_processor(processor) + + def forward( + self, + norm_hidden_states: torch.Tensor, + hidden_states_masks: torch.Tensor = None, + norm_encoder_hidden_states: torch.Tensor = None, + image_rotary_emb: torch.Tensor = None, + ) -> torch.Tensor: + return self.processor( + self, + hidden_states=norm_hidden_states, + hidden_states_masks=hidden_states_masks, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + ) + + +class HiDreamAttnProcessor: + """Attention processor used typically in processing the SD3-like self-attention projections.""" + + def __call__( + self, + attn: HiDreamAttention, + hidden_states: torch.Tensor, + hidden_states_masks: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor = None, + *args, + **kwargs, + ) -> torch.Tensor: + dtype = hidden_states.dtype + batch_size = hidden_states.shape[0] + + query_i = attn.q_rms_norm(attn.to_q(hidden_states)).to(dtype=dtype) + key_i = attn.k_rms_norm(attn.to_k(hidden_states)).to(dtype=dtype) + value_i = attn.to_v(hidden_states) + + inner_dim = key_i.shape[-1] + head_dim = inner_dim // attn.heads + + query_i = query_i.view(batch_size, -1, attn.heads, head_dim) + key_i = key_i.view(batch_size, -1, attn.heads, head_dim) + value_i = value_i.view(batch_size, -1, attn.heads, head_dim) + if hidden_states_masks is not None: + key_i = key_i * hidden_states_masks.view(batch_size, -1, 1, 1) + + if not attn.single: + query_t = attn.q_rms_norm_t(attn.to_q_t(encoder_hidden_states)).to(dtype=dtype) + key_t = attn.k_rms_norm_t(attn.to_k_t(encoder_hidden_states)).to(dtype=dtype) + value_t = attn.to_v_t(encoder_hidden_states) + + query_t = query_t.view(batch_size, -1, attn.heads, head_dim) + key_t = key_t.view(batch_size, -1, attn.heads, head_dim) + value_t = value_t.view(batch_size, -1, attn.heads, head_dim) + + num_image_tokens = query_i.shape[1] + num_text_tokens = query_t.shape[1] + query = torch.cat([query_i, query_t], dim=1) + key = torch.cat([key_i, key_t], dim=1) + value = torch.cat([value_i, value_t], dim=1) + else: + query = query_i + key = key_i + value = value_i + + if query.shape[-1] == image_rotary_emb.shape[-3] * 2: + query, key = apply_rope(query, key, image_rotary_emb) + + else: + query_1, query_2 = query.chunk(2, dim=-1) + key_1, key_2 = key.chunk(2, dim=-1) + query_1, key_1 = apply_rope(query_1, key_1, image_rotary_emb) + query = torch.cat([query_1, query_2], dim=-1) + key = torch.cat([key_1, key_2], dim=-1) + + hidden_states = F.scaled_dot_product_attention( + query.transpose(1, 2), key.transpose(1, 2), value.transpose(1, 2), dropout_p=0.0, is_causal=False + ) + + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.to(query.dtype) + + if not attn.single: + hidden_states_i, hidden_states_t = torch.split(hidden_states, [num_image_tokens, num_text_tokens], dim=1) + hidden_states_i = attn.to_out(hidden_states_i) + hidden_states_t = attn.to_out_t(hidden_states_t) + return hidden_states_i, hidden_states_t + else: + hidden_states = attn.to_out(hidden_states) + return hidden_states + + +# Modified from https://github.com/deepseek-ai/DeepSeek-V3/blob/main/inference/model.py +class MoEGate(nn.Module): + def __init__( + self, + embed_dim, + num_routed_experts=4, + num_activated_experts=2, + aux_loss_alpha=0.01, + _force_inference_output=False, + ): + super().__init__() + self.top_k = num_activated_experts + self.n_routed_experts = num_routed_experts + + self.scoring_func = "softmax" + self.alpha = aux_loss_alpha + self.seq_aux = False + + # topk selection algorithm + self.norm_topk_prob = False + self.gating_dim = embed_dim + self.weight = nn.Parameter(torch.randn(self.n_routed_experts, self.gating_dim) / embed_dim**0.5) + + self._force_inference_output = _force_inference_output + + def forward(self, hidden_states): + bsz, seq_len, h = hidden_states.shape + ### compute gating score + hidden_states = hidden_states.view(-1, h) + logits = F.linear(hidden_states, self.weight, None) + if self.scoring_func == "softmax": + scores = logits.softmax(dim=-1) + else: + raise NotImplementedError(f"insupportable scoring function for MoE gating: {self.scoring_func}") + + ### select top-k experts + topk_weight, topk_idx = torch.topk(scores, k=self.top_k, dim=-1, sorted=False) + + ### norm gate to sum 1 + if self.top_k > 1 and self.norm_topk_prob: + denominator = topk_weight.sum(dim=-1, keepdim=True) + 1e-20 + topk_weight = topk_weight / denominator + + ### expert-level computation auxiliary loss + if self.training and self.alpha > 0.0 and not self._force_inference_output: + scores_for_aux = scores + aux_topk = self.top_k + # always compute aux loss based on the naive greedy topk method + topk_idx_for_aux_loss = topk_idx.view(bsz, -1) + if self.seq_aux: + scores_for_seq_aux = scores_for_aux.view(bsz, seq_len, -1) + ce = torch.zeros(bsz, self.n_routed_experts, device=hidden_states.device) + ce.scatter_add_( + 1, topk_idx_for_aux_loss, torch.ones(bsz, seq_len * aux_topk, device=hidden_states.device) + ).div_(seq_len * aux_topk / self.n_routed_experts) + aux_loss = (ce * scores_for_seq_aux.mean(dim=1)).sum(dim=1).mean() * self.alpha + else: + mask_ce = F.one_hot(topk_idx_for_aux_loss.view(-1), num_classes=self.n_routed_experts) + ce = mask_ce.float().mean(0) + + Pi = scores_for_aux.mean(0) + fi = ce * self.n_routed_experts + aux_loss = (Pi * fi).sum() * self.alpha + else: + aux_loss = None + return topk_idx, topk_weight, aux_loss + + +# Modified from https://github.com/deepseek-ai/DeepSeek-V3/blob/main/inference/model.py +class MOEFeedForwardSwiGLU(nn.Module): + def __init__( + self, + dim: int, + hidden_dim: int, + num_routed_experts: int, + num_activated_experts: int, + _force_inference_output: bool = False, + ): + super().__init__() + self.shared_experts = HiDreamImageFeedForwardSwiGLU(dim, hidden_dim // 2) + self.experts = nn.ModuleList( + [HiDreamImageFeedForwardSwiGLU(dim, hidden_dim) for i in range(num_routed_experts)] + ) + self._force_inference_output = _force_inference_output + self.gate = MoEGate( + embed_dim=dim, + num_routed_experts=num_routed_experts, + num_activated_experts=num_activated_experts, + _force_inference_output=_force_inference_output, + ) + self.num_activated_experts = num_activated_experts + + def forward(self, x): + wtype = x.dtype + identity = x + orig_shape = x.shape + topk_idx, topk_weight, aux_loss = self.gate(x) + x = x.view(-1, x.shape[-1]) + flat_topk_idx = topk_idx.view(-1) + if self.training and not self._force_inference_output: + x = x.repeat_interleave(self.num_activated_experts, dim=0) + y = torch.empty_like(x, dtype=wtype) + for i, expert in enumerate(self.experts): + y[flat_topk_idx == i] = expert(x[flat_topk_idx == i]).to(dtype=wtype) + y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1) + y = y.view(*orig_shape).to(dtype=wtype) + # y = AddAuxiliaryLoss.apply(y, aux_loss) + else: + y = self.moe_infer(x, flat_topk_idx, topk_weight.view(-1, 1)).view(*orig_shape) + y = y + self.shared_experts(identity) + return y + + @torch.no_grad() + def moe_infer(self, x, flat_expert_indices, flat_expert_weights): + expert_cache = torch.zeros_like(x) + idxs = flat_expert_indices.argsort() + tokens_per_expert = flat_expert_indices.bincount().cpu().numpy().cumsum(0) + token_idxs = idxs // self.num_activated_experts + for i, end_idx in enumerate(tokens_per_expert): + start_idx = 0 if i == 0 else tokens_per_expert[i - 1] + if start_idx == end_idx: + continue + expert = self.experts[i] + exp_token_idx = token_idxs[start_idx:end_idx] + expert_tokens = x[exp_token_idx] + expert_out = expert(expert_tokens) + expert_out.mul_(flat_expert_weights[idxs[start_idx:end_idx]]) + + # for fp16 and other dtype + expert_cache = expert_cache.to(expert_out.dtype) + expert_cache.scatter_reduce_(0, exp_token_idx.view(-1, 1).repeat(1, x.shape[-1]), expert_out, reduce="sum") + return expert_cache + + +class TextProjection(nn.Module): + def __init__(self, in_features, hidden_size): + super().__init__() + self.linear = nn.Linear(in_features=in_features, out_features=hidden_size, bias=False) + + def forward(self, caption): + hidden_states = self.linear(caption) + return hidden_states + + +@maybe_allow_in_graph +class HiDreamImageSingleTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + num_routed_experts: int = 4, + num_activated_experts: int = 2, + _force_inference_output: bool = False, + ): + super().__init__() + self.num_attention_heads = num_attention_heads + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(dim, 6 * dim, bias=True)) + + # 1. Attention + self.norm1_i = nn.LayerNorm(dim, eps=1e-06, elementwise_affine=False) + self.attn1 = HiDreamAttention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + processor=HiDreamAttnProcessor(), + single=True, + ) + + # 3. Feed-forward + self.norm3_i = nn.LayerNorm(dim, eps=1e-06, elementwise_affine=False) + if num_routed_experts > 0: + self.ff_i = MOEFeedForwardSwiGLU( + dim=dim, + hidden_dim=4 * dim, + num_routed_experts=num_routed_experts, + num_activated_experts=num_activated_experts, + _force_inference_output=_force_inference_output, + ) + else: + self.ff_i = HiDreamImageFeedForwardSwiGLU(dim=dim, hidden_dim=4 * dim) + + def forward( + self, + hidden_states: torch.Tensor, + hidden_states_masks: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor = None, + ) -> torch.Tensor: + wtype = hidden_states.dtype + shift_msa_i, scale_msa_i, gate_msa_i, shift_mlp_i, scale_mlp_i, gate_mlp_i = self.adaLN_modulation(temb)[ + :, None + ].chunk(6, dim=-1) + + # 1. MM-Attention + norm_hidden_states = self.norm1_i(hidden_states).to(dtype=wtype) + norm_hidden_states = norm_hidden_states * (1 + scale_msa_i) + shift_msa_i + attn_output_i = self.attn1( + norm_hidden_states, + hidden_states_masks, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = gate_msa_i * attn_output_i + hidden_states + + # 2. Feed-forward + norm_hidden_states = self.norm3_i(hidden_states).to(dtype=wtype) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp_i) + shift_mlp_i + ff_output_i = gate_mlp_i * self.ff_i(norm_hidden_states.to(dtype=wtype)) + hidden_states = ff_output_i + hidden_states + return hidden_states + + +@maybe_allow_in_graph +class HiDreamImageTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + num_routed_experts: int = 4, + num_activated_experts: int = 2, + _force_inference_output: bool = False, + ): + super().__init__() + self.num_attention_heads = num_attention_heads + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(dim, 12 * dim, bias=True)) + + # 1. Attention + self.norm1_i = nn.LayerNorm(dim, eps=1e-06, elementwise_affine=False) + self.norm1_t = nn.LayerNorm(dim, eps=1e-06, elementwise_affine=False) + self.attn1 = HiDreamAttention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + processor=HiDreamAttnProcessor(), + single=False, + ) + + # 3. Feed-forward + self.norm3_i = nn.LayerNorm(dim, eps=1e-06, elementwise_affine=False) + if num_routed_experts > 0: + self.ff_i = MOEFeedForwardSwiGLU( + dim=dim, + hidden_dim=4 * dim, + num_routed_experts=num_routed_experts, + num_activated_experts=num_activated_experts, + _force_inference_output=_force_inference_output, + ) + else: + self.ff_i = HiDreamImageFeedForwardSwiGLU(dim=dim, hidden_dim=4 * dim) + self.norm3_t = nn.LayerNorm(dim, eps=1e-06, elementwise_affine=False) + self.ff_t = HiDreamImageFeedForwardSwiGLU(dim=dim, hidden_dim=4 * dim) + + def forward( + self, + hidden_states: torch.Tensor, + hidden_states_masks: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + wtype = hidden_states.dtype + ( + shift_msa_i, + scale_msa_i, + gate_msa_i, + shift_mlp_i, + scale_mlp_i, + gate_mlp_i, + shift_msa_t, + scale_msa_t, + gate_msa_t, + shift_mlp_t, + scale_mlp_t, + gate_mlp_t, + ) = self.adaLN_modulation(temb)[:, None].chunk(12, dim=-1) + + # 1. MM-Attention + norm_hidden_states = self.norm1_i(hidden_states).to(dtype=wtype) + norm_hidden_states = norm_hidden_states * (1 + scale_msa_i) + shift_msa_i + norm_encoder_hidden_states = self.norm1_t(encoder_hidden_states).to(dtype=wtype) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + scale_msa_t) + shift_msa_t + + attn_output_i, attn_output_t = self.attn1( + norm_hidden_states, + hidden_states_masks, + norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + ) + + hidden_states = gate_msa_i * attn_output_i + hidden_states + encoder_hidden_states = gate_msa_t * attn_output_t + encoder_hidden_states + + # 2. Feed-forward + norm_hidden_states = self.norm3_i(hidden_states).to(dtype=wtype) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp_i) + shift_mlp_i + norm_encoder_hidden_states = self.norm3_t(encoder_hidden_states).to(dtype=wtype) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + scale_mlp_t) + shift_mlp_t + + ff_output_i = gate_mlp_i * self.ff_i(norm_hidden_states) + ff_output_t = gate_mlp_t * self.ff_t(norm_encoder_hidden_states) + hidden_states = ff_output_i + hidden_states + encoder_hidden_states = ff_output_t + encoder_hidden_states + return hidden_states, encoder_hidden_states + + +class HiDreamBlock(nn.Module): + def __init__(self, block: HiDreamImageTransformerBlock | HiDreamImageSingleTransformerBlock): + super().__init__() + self.block = block + + def forward( + self, + hidden_states: torch.Tensor, + hidden_states_masks: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + temb: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor = None, + ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]: + return self.block( + hidden_states=hidden_states, + hidden_states_masks=hidden_states_masks, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + + +class HiDreamImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): + _supports_gradient_checkpointing = True + _no_split_modules = ["HiDreamImageTransformerBlock", "HiDreamImageSingleTransformerBlock"] + + @register_to_config + def __init__( + self, + patch_size: int | None = None, + in_channels: int = 64, + out_channels: int | None = None, + num_layers: int = 16, + num_single_layers: int = 32, + attention_head_dim: int = 128, + num_attention_heads: int = 20, + caption_channels: list[int] = None, + text_emb_dim: int = 2048, + num_routed_experts: int = 4, + num_activated_experts: int = 2, + axes_dims_rope: tuple[int, int] = (32, 32), + max_resolution: tuple[int, int] = (128, 128), + llama_layers: list[int] = None, + force_inference_output: bool = False, + ): + super().__init__() + self.out_channels = out_channels or in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + self.t_embedder = HiDreamImageTimestepEmbed(self.inner_dim) + self.p_embedder = HiDreamImagePooledEmbed(text_emb_dim, self.inner_dim) + self.x_embedder = HiDreamImagePatchEmbed( + patch_size=patch_size, + in_channels=in_channels, + out_channels=self.inner_dim, + ) + self.pe_embedder = HiDreamImageEmbedND(theta=10000, axes_dim=axes_dims_rope) + + self.double_stream_blocks = nn.ModuleList( + [ + HiDreamBlock( + HiDreamImageTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + num_routed_experts=num_routed_experts, + num_activated_experts=num_activated_experts, + _force_inference_output=force_inference_output, + ) + ) + for _ in range(num_layers) + ] + ) + + self.single_stream_blocks = nn.ModuleList( + [ + HiDreamBlock( + HiDreamImageSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + num_routed_experts=num_routed_experts, + num_activated_experts=num_activated_experts, + _force_inference_output=force_inference_output, + ) + ) + for _ in range(num_single_layers) + ] + ) + + self.final_layer = HiDreamImageOutEmbed(self.inner_dim, patch_size, self.out_channels) + + caption_channels = [caption_channels[1]] * (num_layers + num_single_layers) + [caption_channels[0]] + caption_projection = [] + for caption_channel in caption_channels: + caption_projection.append(TextProjection(in_features=caption_channel, hidden_size=self.inner_dim)) + self.caption_projection = nn.ModuleList(caption_projection) + self.max_seq = max_resolution[0] * max_resolution[1] // (patch_size * patch_size) + + self.gradient_checkpointing = False + + def unpatchify(self, x: torch.Tensor, img_sizes: list[tuple[int, int]], is_training: bool) -> list[torch.Tensor]: + if is_training and not self.config.force_inference_output: + B, S, F = x.shape + C = F // (self.config.patch_size * self.config.patch_size) + x = ( + x.reshape(B, S, self.config.patch_size, self.config.patch_size, C) + .permute(0, 4, 1, 2, 3) + .reshape(B, C, S, self.config.patch_size * self.config.patch_size) + ) + else: + x_arr = [] + p1 = self.config.patch_size + p2 = self.config.patch_size + for i, img_size in enumerate(img_sizes): + pH, pW = img_size + t = x[i, : pH * pW].reshape(1, pH, pW, -1) + F_token = t.shape[-1] + C = F_token // (p1 * p2) + t = t.reshape(1, pH, pW, p1, p2, C) + t = t.permute(0, 5, 1, 3, 2, 4) + t = t.reshape(1, C, pH * p1, pW * p2) + x_arr.append(t) + x = torch.cat(x_arr, dim=0) + return x + + def patchify(self, hidden_states): + batch_size, channels, height, width = hidden_states.shape + patch_size = self.config.patch_size + patch_height, patch_width = height // patch_size, width // patch_size + device = hidden_states.device + dtype = hidden_states.dtype + + # create img_sizes + img_sizes = torch.tensor([patch_height, patch_width], dtype=torch.int64, device=device).reshape(-1) + img_sizes = img_sizes.unsqueeze(0).repeat(batch_size, 1) + + # create hidden_states_masks + if hidden_states.shape[-2] != hidden_states.shape[-1]: + hidden_states_masks = torch.zeros((batch_size, self.max_seq), dtype=dtype, device=device) + hidden_states_masks[:, : patch_height * patch_width] = 1.0 + else: + hidden_states_masks = None + + # create img_ids + img_ids = torch.zeros(patch_height, patch_width, 3, device=device) + row_indices = torch.arange(patch_height, device=device)[:, None] + col_indices = torch.arange(patch_width, device=device)[None, :] + img_ids[..., 1] = img_ids[..., 1] + row_indices + img_ids[..., 2] = img_ids[..., 2] + col_indices + img_ids = img_ids.reshape(patch_height * patch_width, -1) + + if hidden_states.shape[-2] != hidden_states.shape[-1]: + # Handle non-square latents + img_ids_pad = torch.zeros(self.max_seq, 3, device=device) + img_ids_pad[: patch_height * patch_width, :] = img_ids + img_ids = img_ids_pad.unsqueeze(0).repeat(batch_size, 1, 1) + else: + img_ids = img_ids.unsqueeze(0).repeat(batch_size, 1, 1) + + # patchify hidden_states + if hidden_states.shape[-2] != hidden_states.shape[-1]: + # Handle non-square latents + out = torch.zeros( + (batch_size, channels, self.max_seq, patch_size * patch_size), + dtype=dtype, + device=device, + ) + hidden_states = hidden_states.reshape( + batch_size, channels, patch_height, patch_size, patch_width, patch_size + ) + hidden_states = hidden_states.permute(0, 1, 2, 4, 3, 5) + hidden_states = hidden_states.reshape( + batch_size, channels, patch_height * patch_width, patch_size * patch_size + ) + out[:, :, 0 : patch_height * patch_width] = hidden_states + hidden_states = out + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape( + batch_size, self.max_seq, patch_size * patch_size * channels + ) + + else: + # Handle square latents + hidden_states = hidden_states.reshape( + batch_size, channels, patch_height, patch_size, patch_width, patch_size + ) + hidden_states = hidden_states.permute(0, 2, 4, 3, 5, 1) + hidden_states = hidden_states.reshape( + batch_size, patch_height * patch_width, patch_size * patch_size * channels + ) + + return hidden_states, hidden_states_masks, img_sizes, img_ids + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timesteps: torch.LongTensor = None, + encoder_hidden_states_t5: torch.Tensor = None, + encoder_hidden_states_llama3: torch.Tensor = None, + pooled_embeds: torch.Tensor = None, + img_ids: torch.Tensor | None = None, + img_sizes: list[tuple[int, int]] | None = None, + hidden_states_masks: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + **kwargs, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + encoder_hidden_states = kwargs.get("encoder_hidden_states", None) + + if encoder_hidden_states is not None: + deprecation_message = "The `encoder_hidden_states` argument is deprecated. Please use `encoder_hidden_states_t5` and `encoder_hidden_states_llama3` instead." + deprecate("encoder_hidden_states", "0.35.0", deprecation_message) + encoder_hidden_states_t5 = encoder_hidden_states[0] + encoder_hidden_states_llama3 = encoder_hidden_states[1] + + if img_ids is not None and img_sizes is not None and hidden_states_masks is None: + deprecation_message = ( + "Passing `img_ids` and `img_sizes` with unpachified `hidden_states` is deprecated and will be ignored." + ) + deprecate("img_ids", "0.35.0", deprecation_message) + + if hidden_states_masks is not None and (img_ids is None or img_sizes is None): + raise ValueError("if `hidden_states_masks` is passed, `img_ids` and `img_sizes` must also be passed.") + elif hidden_states_masks is not None and hidden_states.ndim != 3: + raise ValueError( + "if `hidden_states_masks` is passed, `hidden_states` must be a 3D tensors with shape (batch_size, patch_height * patch_width, patch_size * patch_size * channels)" + ) + + # spatial forward + batch_size = hidden_states.shape[0] + hidden_states_type = hidden_states.dtype + + # Patchify the input + if hidden_states_masks is None: + hidden_states, hidden_states_masks, img_sizes, img_ids = self.patchify(hidden_states) + + # Embed the hidden states + hidden_states = self.x_embedder(hidden_states) + + # 0. time + timesteps = self.t_embedder(timesteps, hidden_states_type) + p_embedder = self.p_embedder(pooled_embeds) + temb = timesteps + p_embedder + + encoder_hidden_states = [encoder_hidden_states_llama3[k] for k in self.config.llama_layers] + + if self.caption_projection is not None: + new_encoder_hidden_states = [] + for i, enc_hidden_state in enumerate(encoder_hidden_states): + enc_hidden_state = self.caption_projection[i](enc_hidden_state) + enc_hidden_state = enc_hidden_state.view(batch_size, -1, hidden_states.shape[-1]) + new_encoder_hidden_states.append(enc_hidden_state) + encoder_hidden_states = new_encoder_hidden_states + encoder_hidden_states_t5 = self.caption_projection[-1](encoder_hidden_states_t5) + encoder_hidden_states_t5 = encoder_hidden_states_t5.view(batch_size, -1, hidden_states.shape[-1]) + encoder_hidden_states.append(encoder_hidden_states_t5) + + txt_ids = torch.zeros( + batch_size, + encoder_hidden_states[-1].shape[1] + + encoder_hidden_states[-2].shape[1] + + encoder_hidden_states[0].shape[1], + 3, + device=img_ids.device, + dtype=img_ids.dtype, + ) + ids = torch.cat((img_ids, txt_ids), dim=1) + image_rotary_emb = self.pe_embedder(ids) + + # 2. Blocks + block_id = 0 + initial_encoder_hidden_states = torch.cat([encoder_hidden_states[-1], encoder_hidden_states[-2]], dim=1) + initial_encoder_hidden_states_seq_len = initial_encoder_hidden_states.shape[1] + for bid, block in enumerate(self.double_stream_blocks): + cur_llama31_encoder_hidden_states = encoder_hidden_states[block_id] + cur_encoder_hidden_states = torch.cat( + [initial_encoder_hidden_states, cur_llama31_encoder_hidden_states], dim=1 + ) + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, initial_encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + hidden_states_masks, + cur_encoder_hidden_states, + temb, + image_rotary_emb, + ) + else: + hidden_states, initial_encoder_hidden_states = block( + hidden_states=hidden_states, + hidden_states_masks=hidden_states_masks, + encoder_hidden_states=cur_encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + initial_encoder_hidden_states = initial_encoder_hidden_states[:, :initial_encoder_hidden_states_seq_len] + block_id += 1 + + image_tokens_seq_len = hidden_states.shape[1] + hidden_states = torch.cat([hidden_states, initial_encoder_hidden_states], dim=1) + hidden_states_seq_len = hidden_states.shape[1] + if hidden_states_masks is not None: + encoder_attention_mask_ones = torch.ones( + (batch_size, initial_encoder_hidden_states.shape[1] + cur_llama31_encoder_hidden_states.shape[1]), + device=hidden_states_masks.device, + dtype=hidden_states_masks.dtype, + ) + hidden_states_masks = torch.cat([hidden_states_masks, encoder_attention_mask_ones], dim=1) + + for bid, block in enumerate(self.single_stream_blocks): + cur_llama31_encoder_hidden_states = encoder_hidden_states[block_id] + hidden_states = torch.cat([hidden_states, cur_llama31_encoder_hidden_states], dim=1) + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + hidden_states_masks, + None, + temb, + image_rotary_emb, + ) + else: + hidden_states = block( + hidden_states=hidden_states, + hidden_states_masks=hidden_states_masks, + encoder_hidden_states=None, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = hidden_states[:, :hidden_states_seq_len] + block_id += 1 + + hidden_states = hidden_states[:, :image_tokens_seq_len, ...] + output = self.final_layer(hidden_states, temb) + output = self.unpatchify(output, img_sizes, self.training) + if hidden_states_masks is not None: + hidden_states_masks = hidden_states_masks[:, :image_tokens_seq_len] + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video.py new file mode 100644 index 0000000000000000000000000000000000000000..aab593c936430e25893f17765b252cc2937c195b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video.py @@ -0,0 +1,1096 @@ +# Copyright 2025 The Hunyuan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from diffusers.loaders import FromOriginalModelMixin + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ..attention import AttentionMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..attention_processor import Attention +from ..cache_utils import CacheMixin +from ..embeddings import ( + CombinedTimestepTextProjEmbeddings, + PixArtAlphaTextProjection, + TimestepEmbedding, + Timesteps, + get_1d_rotary_pos_embed, +) +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle, FP32LayerNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class HunyuanVideoAttnProcessor2_0: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "HunyuanVideoAttnProcessor2_0 requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + if attn.add_q_proj is None and encoder_hidden_states is not None: + hidden_states = torch.cat([hidden_states, encoder_hidden_states], dim=1) + + # 1. QKV projections + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + # 2. QK normalization + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # 3. Rotational positional embeddings applied to latent stream + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + if attn.add_q_proj is None and encoder_hidden_states is not None: + query = torch.cat( + [ + apply_rotary_emb( + query[:, : -encoder_hidden_states.shape[1]], + image_rotary_emb, + sequence_dim=1, + ), + query[:, -encoder_hidden_states.shape[1] :], + ], + dim=1, + ) + key = torch.cat( + [ + apply_rotary_emb( + key[:, : -encoder_hidden_states.shape[1]], + image_rotary_emb, + sequence_dim=1, + ), + key[:, -encoder_hidden_states.shape[1] :], + ], + dim=1, + ) + else: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + # 4. Encoder condition QKV projection and normalization + if attn.add_q_proj is not None and encoder_hidden_states is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + encoder_query = encoder_query.unflatten(2, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(2, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(2, (attn.heads, -1)) + + if attn.norm_added_q is not None: + encoder_query = attn.norm_added_q(encoder_query) + if attn.norm_added_k is not None: + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([query, encoder_query], dim=1) + key = torch.cat([key, encoder_key], dim=1) + value = torch.cat([value, encoder_value], dim=1) + + # 5. Attention + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + # 6. Output projection + if encoder_hidden_states is not None: + hidden_states, encoder_hidden_states = ( + hidden_states[:, : -encoder_hidden_states.shape[1]], + hidden_states[:, -encoder_hidden_states.shape[1] :], + ) + + if getattr(attn, "to_out", None) is not None: + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + if getattr(attn, "to_add_out", None) is not None: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + return hidden_states, encoder_hidden_states + + +class HunyuanVideoPatchEmbed(nn.Module): + def __init__( + self, + patch_size: int | tuple[int, int, int] = 16, + in_chans: int = 3, + embed_dim: int = 768, + ) -> None: + super().__init__() + + patch_size = (patch_size, patch_size, patch_size) if isinstance(patch_size, int) else patch_size + self.proj = nn.Conv3d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.proj(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) # BCFHW -> BNC + return hidden_states + + +class HunyuanVideoAdaNorm(nn.Module): + def __init__(self, in_features: int, out_features: int | None = None) -> None: + super().__init__() + + out_features = out_features or 2 * in_features + self.linear = nn.Linear(in_features, out_features) + self.nonlinearity = nn.SiLU() + + def forward( + self, temb: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + temb = self.linear(self.nonlinearity(temb)) + gate_msa, gate_mlp = temb.chunk(2, dim=1) + gate_msa, gate_mlp = gate_msa.unsqueeze(1), gate_mlp.unsqueeze(1) + return gate_msa, gate_mlp + + +class HunyuanVideoTokenReplaceAdaLayerNormZero(nn.Module): + def __init__(self, embedding_dim: int, norm_type: str = "layer_norm", bias: bool = True): + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 6 * embedding_dim, bias=bias) + + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6) + elif norm_type == "fp32_layer_norm": + self.norm = FP32LayerNorm(embedding_dim, elementwise_affine=False, bias=False) + else: + raise ValueError( + f"Unsupported `norm_type` ({norm_type}) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'." + ) + + def forward( + self, + hidden_states: torch.Tensor, + emb: torch.Tensor, + token_replace_emb: torch.Tensor, + first_frame_num_tokens: int, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + emb = self.linear(self.silu(emb)) + token_replace_emb = self.linear(self.silu(token_replace_emb)) + + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = emb.chunk(6, dim=1) + tr_shift_msa, tr_scale_msa, tr_gate_msa, tr_shift_mlp, tr_scale_mlp, tr_gate_mlp = token_replace_emb.chunk( + 6, dim=1 + ) + + norm_hidden_states = self.norm(hidden_states) + hidden_states_zero = ( + norm_hidden_states[:, :first_frame_num_tokens] * (1 + tr_scale_msa[:, None]) + tr_shift_msa[:, None] + ) + hidden_states_orig = ( + norm_hidden_states[:, first_frame_num_tokens:] * (1 + scale_msa[:, None]) + shift_msa[:, None] + ) + hidden_states = torch.cat([hidden_states_zero, hidden_states_orig], dim=1) + + return ( + hidden_states, + gate_msa, + shift_mlp, + scale_mlp, + gate_mlp, + tr_gate_msa, + tr_shift_mlp, + tr_scale_mlp, + tr_gate_mlp, + ) + + +class HunyuanVideoTokenReplaceAdaLayerNormZeroSingle(nn.Module): + def __init__(self, embedding_dim: int, norm_type: str = "layer_norm", bias: bool = True): + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 3 * embedding_dim, bias=bias) + + if norm_type == "layer_norm": + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False, eps=1e-6) + else: + raise ValueError( + f"Unsupported `norm_type` ({norm_type}) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'." + ) + + def forward( + self, + hidden_states: torch.Tensor, + emb: torch.Tensor, + token_replace_emb: torch.Tensor, + first_frame_num_tokens: int, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + emb = self.linear(self.silu(emb)) + token_replace_emb = self.linear(self.silu(token_replace_emb)) + + shift_msa, scale_msa, gate_msa = emb.chunk(3, dim=1) + tr_shift_msa, tr_scale_msa, tr_gate_msa = token_replace_emb.chunk(3, dim=1) + + norm_hidden_states = self.norm(hidden_states) + hidden_states_zero = ( + norm_hidden_states[:, :first_frame_num_tokens] * (1 + tr_scale_msa[:, None]) + tr_shift_msa[:, None] + ) + hidden_states_orig = ( + norm_hidden_states[:, first_frame_num_tokens:] * (1 + scale_msa[:, None]) + shift_msa[:, None] + ) + hidden_states = torch.cat([hidden_states_zero, hidden_states_orig], dim=1) + + return hidden_states, gate_msa, tr_gate_msa + + +class HunyuanVideoConditionEmbedding(nn.Module): + def __init__( + self, + embedding_dim: int, + pooled_projection_dim: int, + guidance_embeds: bool, + image_condition_type: str | None = None, + ): + super().__init__() + + self.image_condition_type = image_condition_type + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + self.text_embedder = PixArtAlphaTextProjection(pooled_projection_dim, embedding_dim, act_fn="silu") + + self.guidance_embedder = None + if guidance_embeds: + self.guidance_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + def forward( + self, timestep: torch.Tensor, pooled_projection: torch.Tensor, guidance: torch.Tensor | None = None + ) -> tuple[torch.Tensor, torch.Tensor]: + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=pooled_projection.dtype)) # (N, D) + pooled_projections = self.text_embedder(pooled_projection) + + token_replace_emb = None + if self.image_condition_type == "token_replace": + token_replace_timestep = torch.zeros_like(timestep) + token_replace_proj = self.time_proj(token_replace_timestep) + token_replace_emb = self.timestep_embedder(token_replace_proj.to(dtype=pooled_projection.dtype)) + token_replace_emb = token_replace_emb + pooled_projections + + if self.guidance_embedder is not None: + guidance_proj = self.time_proj(guidance) + guidance_emb = self.guidance_embedder(guidance_proj.to(dtype=pooled_projection.dtype)) + conditioning = timesteps_emb + guidance_emb + pooled_projections + else: + conditioning = timesteps_emb + pooled_projections + return conditioning, token_replace_emb + + +class HunyuanVideoIndividualTokenRefinerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_width_ratio: str = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=True, eps=1e-6) + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + heads=num_attention_heads, + dim_head=attention_head_dim, + bias=attention_bias, + ) + + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=True, eps=1e-6) + self.ff = FeedForward(hidden_size, mult=mlp_width_ratio, activation_fn="linear-silu", dropout=mlp_drop_rate) + + self.norm_out = HunyuanVideoAdaNorm(hidden_size, 2 * hidden_size) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + norm_hidden_states = self.norm1(hidden_states) + + attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=None, + attention_mask=attention_mask, + ) + + gate_msa, gate_mlp = self.norm_out(temb) + hidden_states = hidden_states + attn_output * gate_msa + + ff_output = self.ff(self.norm2(hidden_states)) + hidden_states = hidden_states + ff_output * gate_mlp + + return hidden_states + + +class HunyuanVideoIndividualTokenRefiner(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + num_layers: int, + mlp_width_ratio: float = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + self.refiner_blocks = nn.ModuleList( + [ + HunyuanVideoIndividualTokenRefinerBlock( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + mlp_width_ratio=mlp_width_ratio, + mlp_drop_rate=mlp_drop_rate, + attention_bias=attention_bias, + ) + for _ in range(num_layers) + ] + ) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + ) -> None: + self_attn_mask = None + if attention_mask is not None: + batch_size = attention_mask.shape[0] + seq_len = attention_mask.shape[1] + attention_mask = attention_mask.to(hidden_states.device).bool() + self_attn_mask_1 = attention_mask.view(batch_size, 1, 1, seq_len).repeat(1, 1, seq_len, 1) + self_attn_mask_2 = self_attn_mask_1.transpose(2, 3) + self_attn_mask = (self_attn_mask_1 & self_attn_mask_2).bool() + self_attn_mask[:, :, :, 0] = True + + for block in self.refiner_blocks: + hidden_states = block(hidden_states, temb, self_attn_mask) + + return hidden_states + + +class HunyuanVideoTokenRefiner(nn.Module): + def __init__( + self, + in_channels: int, + num_attention_heads: int, + attention_head_dim: int, + num_layers: int, + mlp_ratio: float = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.time_text_embed = CombinedTimestepTextProjEmbeddings( + embedding_dim=hidden_size, pooled_projection_dim=in_channels + ) + self.proj_in = nn.Linear(in_channels, hidden_size, bias=True) + self.token_refiner = HunyuanVideoIndividualTokenRefiner( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + num_layers=num_layers, + mlp_width_ratio=mlp_ratio, + mlp_drop_rate=mlp_drop_rate, + attention_bias=attention_bias, + ) + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + attention_mask: torch.LongTensor | None = None, + ) -> torch.Tensor: + if attention_mask is None: + pooled_projections = hidden_states.mean(dim=1) + else: + original_dtype = hidden_states.dtype + mask_float = attention_mask.float().unsqueeze(-1) + pooled_projections = (hidden_states * mask_float).sum(dim=1) / mask_float.sum(dim=1) + pooled_projections = pooled_projections.to(original_dtype) + + temb = self.time_text_embed(timestep, pooled_projections) + hidden_states = self.proj_in(hidden_states) + hidden_states = self.token_refiner(hidden_states, temb, attention_mask) + + return hidden_states + + +class HunyuanVideoRotaryPosEmbed(nn.Module): + def __init__(self, patch_size: int, patch_size_t: int, rope_dim: list[int], theta: float = 256.0) -> None: + super().__init__() + + self.patch_size = patch_size + self.patch_size_t = patch_size_t + self.rope_dim = rope_dim + self.theta = theta + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + rope_sizes = [num_frames // self.patch_size_t, height // self.patch_size, width // self.patch_size] + + axes_grids = [] + for i in range(3): + # Note: The following line diverges from original behaviour. We create the grid on the device, whereas + # original implementation creates it on CPU and then moves it to device. This results in numerical + # differences in layerwise debugging outputs, but visually it is the same. + grid = torch.arange(0, rope_sizes[i], device=hidden_states.device, dtype=torch.float32) + axes_grids.append(grid) + grid = torch.meshgrid(*axes_grids, indexing="ij") # [W, H, T] + grid = torch.stack(grid, dim=0) # [3, W, H, T] + + freqs = [] + for i in range(3): + freq = get_1d_rotary_pos_embed(self.rope_dim[i], grid[i].reshape(-1), self.theta, use_real=True) + freqs.append(freq) + + freqs_cos = torch.cat([f[0] for f in freqs], dim=1) # (W * H * T, D / 2) + freqs_sin = torch.cat([f[1] for f in freqs], dim=1) # (W * H * T, D / 2) + return freqs_cos, freqs_sin + + +class HunyuanVideoSingleTransformerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float = 4.0, + qk_norm: str = "rms_norm", + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + mlp_dim = int(hidden_size * mlp_ratio) + + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=hidden_size, + bias=True, + processor=HunyuanVideoAttnProcessor2_0(), + qk_norm=qk_norm, + eps=1e-6, + pre_only=True, + ) + + self.norm = AdaLayerNormZeroSingle(hidden_size, norm_type="layer_norm") + self.proj_mlp = nn.Linear(hidden_size, mlp_dim) + self.act_mlp = nn.GELU(approximate="tanh") + self.proj_out = nn.Linear(hidden_size + mlp_dim, hidden_size) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + *args, + **kwargs, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_length = encoder_hidden_states.shape[1] + hidden_states = torch.cat([hidden_states, encoder_hidden_states], dim=1) + + residual = hidden_states + + # 1. Input normalization + norm_hidden_states, gate = self.norm(hidden_states, emb=temb) + mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states)) + + norm_hidden_states, norm_encoder_hidden_states = ( + norm_hidden_states[:, :-text_seq_length, :], + norm_hidden_states[:, -text_seq_length:, :], + ) + + # 2. Attention + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + attn_output = torch.cat([attn_output, context_attn_output], dim=1) + + # 3. Modulation and residual connection + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + hidden_states = gate.unsqueeze(1) * self.proj_out(hidden_states) + hidden_states = hidden_states + residual + + hidden_states, encoder_hidden_states = ( + hidden_states[:, :-text_seq_length, :], + hidden_states[:, -text_seq_length:, :], + ) + return hidden_states, encoder_hidden_states + + +class HunyuanVideoTransformerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float, + qk_norm: str = "rms_norm", + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.norm1 = AdaLayerNormZero(hidden_size, norm_type="layer_norm") + self.norm1_context = AdaLayerNormZero(hidden_size, norm_type="layer_norm") + + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + added_kv_proj_dim=hidden_size, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=hidden_size, + context_pre_only=False, + bias=True, + processor=HunyuanVideoAttnProcessor2_0(), + qk_norm=qk_norm, + eps=1e-6, + ) + + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + freqs_cis: tuple[torch.Tensor, torch.Tensor] | None = None, + *args, + **kwargs, + ) -> tuple[torch.Tensor, torch.Tensor]: + # 1. Input normalization + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + + # 2. Joint attention + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=freqs_cis, + ) + + # 3. Modulation and residual connection + hidden_states = hidden_states + attn_output * gate_msa.unsqueeze(1) + encoder_hidden_states = encoder_hidden_states + context_attn_output * c_gate_msa.unsqueeze(1) + + norm_hidden_states = self.norm2(hidden_states) + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + # 4. Feed-forward + ff_output = self.ff(norm_hidden_states) + context_ff_output = self.ff_context(norm_encoder_hidden_states) + + hidden_states = hidden_states + gate_mlp.unsqueeze(1) * ff_output + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + + return hidden_states, encoder_hidden_states + + +class HunyuanVideoTokenReplaceSingleTransformerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float = 4.0, + qk_norm: str = "rms_norm", + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + mlp_dim = int(hidden_size * mlp_ratio) + + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=hidden_size, + bias=True, + processor=HunyuanVideoAttnProcessor2_0(), + qk_norm=qk_norm, + eps=1e-6, + pre_only=True, + ) + + self.norm = HunyuanVideoTokenReplaceAdaLayerNormZeroSingle(hidden_size, norm_type="layer_norm") + self.proj_mlp = nn.Linear(hidden_size, mlp_dim) + self.act_mlp = nn.GELU(approximate="tanh") + self.proj_out = nn.Linear(hidden_size + mlp_dim, hidden_size) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + token_replace_emb: torch.Tensor = None, + num_tokens: int = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_length = encoder_hidden_states.shape[1] + hidden_states = torch.cat([hidden_states, encoder_hidden_states], dim=1) + + residual = hidden_states + + # 1. Input normalization + norm_hidden_states, gate, tr_gate = self.norm(hidden_states, temb, token_replace_emb, num_tokens) + mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states)) + + norm_hidden_states, norm_encoder_hidden_states = ( + norm_hidden_states[:, :-text_seq_length, :], + norm_hidden_states[:, -text_seq_length:, :], + ) + + # 2. Attention + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + attn_output = torch.cat([attn_output, context_attn_output], dim=1) + + # 3. Modulation and residual connection + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + + proj_output = self.proj_out(hidden_states) + hidden_states_zero = proj_output[:, :num_tokens] * tr_gate.unsqueeze(1) + hidden_states_orig = proj_output[:, num_tokens:] * gate.unsqueeze(1) + hidden_states = torch.cat([hidden_states_zero, hidden_states_orig], dim=1) + hidden_states = hidden_states + residual + + hidden_states, encoder_hidden_states = ( + hidden_states[:, :-text_seq_length, :], + hidden_states[:, -text_seq_length:, :], + ) + return hidden_states, encoder_hidden_states + + +class HunyuanVideoTokenReplaceTransformerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float, + qk_norm: str = "rms_norm", + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.norm1 = HunyuanVideoTokenReplaceAdaLayerNormZero(hidden_size, norm_type="layer_norm") + self.norm1_context = AdaLayerNormZero(hidden_size, norm_type="layer_norm") + + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + added_kv_proj_dim=hidden_size, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=hidden_size, + context_pre_only=False, + bias=True, + processor=HunyuanVideoAttnProcessor2_0(), + qk_norm=qk_norm, + eps=1e-6, + ) + + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + freqs_cis: tuple[torch.Tensor, torch.Tensor] | None = None, + token_replace_emb: torch.Tensor = None, + num_tokens: int = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + # 1. Input normalization + ( + norm_hidden_states, + gate_msa, + shift_mlp, + scale_mlp, + gate_mlp, + tr_gate_msa, + tr_shift_mlp, + tr_scale_mlp, + tr_gate_mlp, + ) = self.norm1(hidden_states, temb, token_replace_emb, num_tokens) + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + + # 2. Joint attention + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=freqs_cis, + ) + + # 3. Modulation and residual connection + hidden_states_zero = hidden_states[:, :num_tokens] + attn_output[:, :num_tokens] * tr_gate_msa.unsqueeze(1) + hidden_states_orig = hidden_states[:, num_tokens:] + attn_output[:, num_tokens:] * gate_msa.unsqueeze(1) + hidden_states = torch.cat([hidden_states_zero, hidden_states_orig], dim=1) + encoder_hidden_states = encoder_hidden_states + context_attn_output * c_gate_msa.unsqueeze(1) + + norm_hidden_states = self.norm2(hidden_states) + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + + hidden_states_zero = norm_hidden_states[:, :num_tokens] * (1 + tr_scale_mlp[:, None]) + tr_shift_mlp[:, None] + hidden_states_orig = norm_hidden_states[:, num_tokens:] * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + norm_hidden_states = torch.cat([hidden_states_zero, hidden_states_orig], dim=1) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + # 4. Feed-forward + ff_output = self.ff(norm_hidden_states) + context_ff_output = self.ff_context(norm_encoder_hidden_states) + + hidden_states_zero = hidden_states[:, :num_tokens] + ff_output[:, :num_tokens] * tr_gate_mlp.unsqueeze(1) + hidden_states_orig = hidden_states[:, num_tokens:] + ff_output[:, num_tokens:] * gate_mlp.unsqueeze(1) + hidden_states = torch.cat([hidden_states_zero, hidden_states_orig], dim=1) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + + return hidden_states, encoder_hidden_states + + +class HunyuanVideoTransformer3DModel( + ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin +): + r""" + A Transformer model for video-like data used in [HunyuanVideo](https://huggingface.co/tencent/HunyuanVideo). + + Args: + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + num_attention_heads (`int`, defaults to `24`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + num_layers (`int`, defaults to `20`): + The number of layers of dual-stream blocks to use. + num_single_layers (`int`, defaults to `40`): + The number of layers of single-stream blocks to use. + num_refiner_layers (`int`, defaults to `2`): + The number of layers of refiner blocks to use. + mlp_ratio (`float`, defaults to `4.0`): + The ratio of the hidden layer size to the input size in the feedforward network. + patch_size (`int`, defaults to `2`): + The size of the spatial patches to use in the patch embedding layer. + patch_size_t (`int`, defaults to `1`): + The size of the tmeporal patches to use in the patch embedding layer. + qk_norm (`str`, defaults to `rms_norm`): + The normalization to use for the query and key projections in the attention layers. + guidance_embeds (`bool`, defaults to `True`): + Whether to use guidance embeddings in the model. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + pooled_projection_dim (`int`, defaults to `768`): + The dimension of the pooled projection of the text embeddings. + rope_theta (`float`, defaults to `256.0`): + The value of theta to use in the RoPE layer. + rope_axes_dim (`tuple[int]`, defaults to `(16, 56, 56)`): + The dimensions of the axes to use in the RoPE layer. + image_condition_type (`str`, *optional*, defaults to `None`): + The type of image conditioning to use. If `None`, no image conditioning is used. If `latent_concat`, the + image is concatenated to the latent stream. If `token_replace`, the image is used to replace first-frame + tokens in the latent stream and apply conditioning. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["x_embedder", "context_embedder", "norm"] + _no_split_modules = [ + "HunyuanVideoTransformerBlock", + "HunyuanVideoSingleTransformerBlock", + "HunyuanVideoPatchEmbed", + "HunyuanVideoTokenRefiner", + ] + _repeated_blocks = [ + "HunyuanVideoTransformerBlock", + "HunyuanVideoSingleTransformerBlock", + "HunyuanVideoPatchEmbed", + "HunyuanVideoTokenRefiner", + ] + + @register_to_config + def __init__( + self, + in_channels: int = 16, + out_channels: int = 16, + num_attention_heads: int = 24, + attention_head_dim: int = 128, + num_layers: int = 20, + num_single_layers: int = 40, + num_refiner_layers: int = 2, + mlp_ratio: float = 4.0, + patch_size: int = 2, + patch_size_t: int = 1, + qk_norm: str = "rms_norm", + guidance_embeds: bool = True, + text_embed_dim: int = 4096, + pooled_projection_dim: int = 768, + rope_theta: float = 256.0, + rope_axes_dim: tuple[int, ...] = (16, 56, 56), + image_condition_type: str | None = None, + ) -> None: + super().__init__() + + supported_image_condition_types = ["latent_concat", "token_replace"] + if image_condition_type is not None and image_condition_type not in supported_image_condition_types: + raise ValueError( + f"Invalid `image_condition_type` ({image_condition_type}). Supported ones are: {supported_image_condition_types}" + ) + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + # 1. Latent and condition embedders + self.x_embedder = HunyuanVideoPatchEmbed((patch_size_t, patch_size, patch_size), in_channels, inner_dim) + self.context_embedder = HunyuanVideoTokenRefiner( + text_embed_dim, num_attention_heads, attention_head_dim, num_layers=num_refiner_layers + ) + + self.time_text_embed = HunyuanVideoConditionEmbedding( + inner_dim, pooled_projection_dim, guidance_embeds, image_condition_type + ) + + # 2. RoPE + self.rope = HunyuanVideoRotaryPosEmbed(patch_size, patch_size_t, rope_axes_dim, rope_theta) + + # 3. Dual stream transformer blocks + if image_condition_type == "token_replace": + self.transformer_blocks = nn.ModuleList( + [ + HunyuanVideoTokenReplaceTransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_layers) + ] + ) + else: + self.transformer_blocks = nn.ModuleList( + [ + HunyuanVideoTransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_layers) + ] + ) + + # 4. Single stream transformer blocks + if image_condition_type == "token_replace": + self.single_transformer_blocks = nn.ModuleList( + [ + HunyuanVideoTokenReplaceSingleTransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_single_layers) + ] + ) + else: + self.single_transformer_blocks = nn.ModuleList( + [ + HunyuanVideoSingleTransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_single_layers) + ] + ) + + # 5. Output projection + self.norm_out = AdaLayerNormContinuous(inner_dim, inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(inner_dim, patch_size_t * patch_size * patch_size * out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_attention_mask: torch.Tensor, + pooled_projections: torch.Tensor, + guidance: torch.Tensor = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p, p_t = self.config.patch_size, self.config.patch_size_t + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p + post_patch_width = width // p + first_frame_num_tokens = 1 * post_patch_height * post_patch_width + + # 1. RoPE + image_rotary_emb = self.rope(hidden_states) + + # 2. Conditional embeddings + temb, token_replace_emb = self.time_text_embed(timestep, pooled_projections, guidance) + + hidden_states = self.x_embedder(hidden_states) + encoder_hidden_states = self.context_embedder(encoder_hidden_states, timestep, encoder_attention_mask) + + # 3. Attention mask preparation + latent_sequence_length = hidden_states.shape[1] + condition_sequence_length = encoder_hidden_states.shape[1] + sequence_length = latent_sequence_length + condition_sequence_length + attention_mask = torch.ones( + batch_size, sequence_length, device=hidden_states.device, dtype=torch.bool + ) # [B, N] + effective_condition_sequence_length = encoder_attention_mask.sum(dim=1, dtype=torch.int) # [B,] + effective_sequence_length = latent_sequence_length + effective_condition_sequence_length + indices = torch.arange(sequence_length, device=hidden_states.device).unsqueeze(0) # [1, N] + mask_indices = indices >= effective_sequence_length.unsqueeze(1) # [B, N] + attention_mask = attention_mask.masked_fill(mask_indices, False) + attention_mask = attention_mask.unsqueeze(1).unsqueeze(1) # [B, 1, 1, N] + + # 4. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.transformer_blocks: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + attention_mask, + image_rotary_emb, + token_replace_emb, + first_frame_num_tokens, + ) + + for block in self.single_transformer_blocks: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + attention_mask, + image_rotary_emb, + token_replace_emb, + first_frame_num_tokens, + ) + + else: + for block in self.transformer_blocks: + hidden_states, encoder_hidden_states = block( + hidden_states, + encoder_hidden_states, + temb, + attention_mask, + image_rotary_emb, + token_replace_emb, + first_frame_num_tokens, + ) + + for block in self.single_transformer_blocks: + hidden_states, encoder_hidden_states = block( + hidden_states, + encoder_hidden_states, + temb, + attention_mask, + image_rotary_emb, + token_replace_emb, + first_frame_num_tokens, + ) + + # 5. Output projection + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, -1, p_t, p, p + ) + hidden_states = hidden_states.permute(0, 4, 1, 5, 2, 6, 3, 7) + hidden_states = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (hidden_states,) + + return Transformer2DModelOutput(sample=hidden_states) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video15.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video15.py new file mode 100644 index 0000000000000000000000000000000000000000..222b0791d650fdd466628a676a4a5fa731da0375 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video15.py @@ -0,0 +1,775 @@ +# Copyright 2025 The Hunyuan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from diffusers.loaders import FromOriginalModelMixin + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ..attention import AttentionMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..attention_processor import Attention +from ..cache_utils import CacheMixin +from ..embeddings import ( + CombinedTimestepTextProjEmbeddings, + TimestepEmbedding, + Timesteps, + get_1d_rotary_pos_embed, +) +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class HunyuanVideo15AttnProcessor2_0: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "HunyuanVideo15AttnProcessor2_0 requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + # 1. QKV projections + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + # 2. QK normalization + query = attn.norm_q(query) + key = attn.norm_k(key) + + # 3. Rotational positional embeddings applied to latent stream + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + # 4. Encoder condition QKV projection and normalization + if encoder_hidden_states is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + encoder_query = encoder_query.unflatten(2, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(2, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(2, (attn.heads, -1)) + + if attn.norm_added_q is not None: + encoder_query = attn.norm_added_q(encoder_query) + if attn.norm_added_k is not None: + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([query, encoder_query], dim=1) + key = torch.cat([key, encoder_key], dim=1) + value = torch.cat([value, encoder_value], dim=1) + + batch_size, seq_len, heads, dim = query.shape + attention_mask = F.pad(attention_mask, (seq_len - attention_mask.shape[1], 0), value=True) + attention_mask = attention_mask.bool() + self_attn_mask_1 = attention_mask.view(batch_size, 1, 1, seq_len).repeat(1, 1, seq_len, 1) + self_attn_mask_2 = self_attn_mask_1.transpose(2, 3) + attention_mask = (self_attn_mask_1 & self_attn_mask_2).bool() + + # 5. Attention + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + # 6. Output projection + if encoder_hidden_states is not None: + hidden_states, encoder_hidden_states = ( + hidden_states[:, : -encoder_hidden_states.shape[1]], + hidden_states[:, -encoder_hidden_states.shape[1] :], + ) + + if getattr(attn, "to_out", None) is not None: + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + if getattr(attn, "to_add_out", None) is not None: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + return hidden_states, encoder_hidden_states + + +class HunyuanVideo15PatchEmbed(nn.Module): + def __init__( + self, + patch_size: int | tuple[int, int, int] = 16, + in_chans: int = 3, + embed_dim: int = 768, + ) -> None: + super().__init__() + + patch_size = (patch_size, patch_size, patch_size) if isinstance(patch_size, int) else patch_size + self.proj = nn.Conv3d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.proj(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) # BCFHW -> BNC + return hidden_states + + +class HunyuanVideo15AdaNorm(nn.Module): + def __init__(self, in_features: int, out_features: int | None = None) -> None: + super().__init__() + + out_features = out_features or 2 * in_features + self.linear = nn.Linear(in_features, out_features) + self.nonlinearity = nn.SiLU() + + def forward( + self, temb: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + temb = self.linear(self.nonlinearity(temb)) + gate_msa, gate_mlp = temb.chunk(2, dim=1) + gate_msa, gate_mlp = gate_msa.unsqueeze(1), gate_mlp.unsqueeze(1) + return gate_msa, gate_mlp + + +class HunyuanVideo15TimeEmbedding(nn.Module): + r""" + Time embedding for HunyuanVideo 1.5. + + Supports standard timestep embedding and optional reference timestep embedding for MeanFlow-based super-resolution + models. + + Args: + embedding_dim (`int`): + The dimension of the output embedding. + """ + + def __init__(self, embedding_dim: int, use_meanflow: bool = False): + super().__init__() + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.use_meanflow = use_meanflow + self.time_proj_r = None + self.timestep_embedder_r = None + if use_meanflow: + self.time_proj_r = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder_r = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + def forward( + self, + timestep: torch.Tensor, + timestep_r: torch.Tensor | None = None, + ) -> torch.Tensor: + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=timestep.dtype)) + + if timestep_r is not None: + timesteps_proj_r = self.time_proj_r(timestep_r) + timesteps_emb_r = self.timestep_embedder_r(timesteps_proj_r.to(dtype=timestep.dtype)) + timesteps_emb = timesteps_emb + timesteps_emb_r + + return timesteps_emb + + +class HunyuanVideo15IndividualTokenRefinerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_width_ratio: str = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=True, eps=1e-6) + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + heads=num_attention_heads, + dim_head=attention_head_dim, + bias=attention_bias, + ) + + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=True, eps=1e-6) + self.ff = FeedForward(hidden_size, mult=mlp_width_ratio, activation_fn="linear-silu", dropout=mlp_drop_rate) + + self.norm_out = HunyuanVideo15AdaNorm(hidden_size, 2 * hidden_size) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + norm_hidden_states = self.norm1(hidden_states) + + attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=None, + attention_mask=attention_mask, + ) + + gate_msa, gate_mlp = self.norm_out(temb) + hidden_states = hidden_states + attn_output * gate_msa + + ff_output = self.ff(self.norm2(hidden_states)) + hidden_states = hidden_states + ff_output * gate_mlp + + return hidden_states + + +class HunyuanVideo15IndividualTokenRefiner(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + num_layers: int, + mlp_width_ratio: float = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + self.refiner_blocks = nn.ModuleList( + [ + HunyuanVideo15IndividualTokenRefinerBlock( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + mlp_width_ratio=mlp_width_ratio, + mlp_drop_rate=mlp_drop_rate, + attention_bias=attention_bias, + ) + for _ in range(num_layers) + ] + ) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + ) -> None: + self_attn_mask = None + if attention_mask is not None: + batch_size = attention_mask.shape[0] + seq_len = attention_mask.shape[1] + attention_mask = attention_mask.to(hidden_states.device).bool() + self_attn_mask_1 = attention_mask.view(batch_size, 1, 1, seq_len).repeat(1, 1, seq_len, 1) + self_attn_mask_2 = self_attn_mask_1.transpose(2, 3) + self_attn_mask = (self_attn_mask_1 & self_attn_mask_2).bool() + + for block in self.refiner_blocks: + hidden_states = block(hidden_states, temb, self_attn_mask) + + return hidden_states + + +class HunyuanVideo15TokenRefiner(nn.Module): + def __init__( + self, + in_channels: int, + num_attention_heads: int, + attention_head_dim: int, + num_layers: int, + mlp_ratio: float = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.time_text_embed = CombinedTimestepTextProjEmbeddings( + embedding_dim=hidden_size, pooled_projection_dim=in_channels + ) + self.proj_in = nn.Linear(in_channels, hidden_size, bias=True) + self.token_refiner = HunyuanVideo15IndividualTokenRefiner( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + num_layers=num_layers, + mlp_width_ratio=mlp_ratio, + mlp_drop_rate=mlp_drop_rate, + attention_bias=attention_bias, + ) + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + attention_mask: torch.LongTensor | None = None, + ) -> torch.Tensor: + if attention_mask is None: + pooled_projections = hidden_states.mean(dim=1) + else: + original_dtype = hidden_states.dtype + mask_float = attention_mask.float().unsqueeze(-1) + pooled_projections = (hidden_states * mask_float).sum(dim=1) / mask_float.sum(dim=1) + pooled_projections = pooled_projections.to(original_dtype) + + temb = self.time_text_embed(timestep, pooled_projections) + hidden_states = self.proj_in(hidden_states) + hidden_states = self.token_refiner(hidden_states, temb, attention_mask) + + return hidden_states + + +class HunyuanVideo15RotaryPosEmbed(nn.Module): + def __init__(self, patch_size: int, patch_size_t: int, rope_dim: list[int], theta: float = 256.0) -> None: + super().__init__() + + self.patch_size = patch_size + self.patch_size_t = patch_size_t + self.rope_dim = rope_dim + self.theta = theta + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + rope_sizes = [num_frames // self.patch_size_t, height // self.patch_size, width // self.patch_size] + + axes_grids = [] + for i in range(len(rope_sizes)): + # Note: The following line diverges from original behaviour. We create the grid on the device, whereas + # original implementation creates it on CPU and then moves it to device. This results in numerical + # differences in layerwise debugging outputs, but visually it is the same. + grid = torch.arange(0, rope_sizes[i], device=hidden_states.device, dtype=torch.float32) + axes_grids.append(grid) + grid = torch.meshgrid(*axes_grids, indexing="ij") # [W, H, T] + grid = torch.stack(grid, dim=0) # [3, W, H, T] + + freqs = [] + for i in range(3): + freq = get_1d_rotary_pos_embed(self.rope_dim[i], grid[i].reshape(-1), self.theta, use_real=True) + freqs.append(freq) + + freqs_cos = torch.cat([f[0] for f in freqs], dim=1) # (W * H * T, D / 2) + freqs_sin = torch.cat([f[1] for f in freqs], dim=1) # (W * H * T, D / 2) + return freqs_cos, freqs_sin + + +class HunyuanVideo15ByT5TextProjection(nn.Module): + def __init__(self, in_features: int, hidden_size: int, out_features: int): + super().__init__() + self.norm = nn.LayerNorm(in_features) + self.linear_1 = nn.Linear(in_features, hidden_size) + self.linear_2 = nn.Linear(hidden_size, hidden_size) + self.linear_3 = nn.Linear(hidden_size, out_features) + self.act_fn = nn.GELU() + + def forward(self, encoder_hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.norm(encoder_hidden_states) + hidden_states = self.linear_1(hidden_states) + hidden_states = self.act_fn(hidden_states) + hidden_states = self.linear_2(hidden_states) + hidden_states = self.act_fn(hidden_states) + hidden_states = self.linear_3(hidden_states) + return hidden_states + + +class HunyuanVideo15ImageProjection(nn.Module): + def __init__(self, in_channels: int, hidden_size: int): + super().__init__() + self.norm_in = nn.LayerNorm(in_channels) + self.linear_1 = nn.Linear(in_channels, in_channels) + self.act_fn = nn.GELU() + self.linear_2 = nn.Linear(in_channels, hidden_size) + self.norm_out = nn.LayerNorm(hidden_size) + + def forward(self, image_embeds: torch.Tensor) -> torch.Tensor: + hidden_states = self.norm_in(image_embeds) + hidden_states = self.linear_1(hidden_states) + hidden_states = self.act_fn(hidden_states) + hidden_states = self.linear_2(hidden_states) + hidden_states = self.norm_out(hidden_states) + return hidden_states + + +class HunyuanVideo15TransformerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float, + qk_norm: str = "rms_norm", + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.norm1 = AdaLayerNormZero(hidden_size, norm_type="layer_norm") + self.norm1_context = AdaLayerNormZero(hidden_size, norm_type="layer_norm") + + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + added_kv_proj_dim=hidden_size, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=hidden_size, + context_pre_only=False, + bias=True, + processor=HunyuanVideo15AttnProcessor2_0(), + qk_norm=qk_norm, + eps=1e-6, + ) + + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + freqs_cis: tuple[torch.Tensor, torch.Tensor] | None = None, + *args, + **kwargs, + ) -> tuple[torch.Tensor, torch.Tensor]: + # 1. Input normalization + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + + # 2. Joint attention + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=freqs_cis, + ) + + # 3. Modulation and residual connection + hidden_states = hidden_states + attn_output * gate_msa.unsqueeze(1) + encoder_hidden_states = encoder_hidden_states + context_attn_output * c_gate_msa.unsqueeze(1) + + norm_hidden_states = self.norm2(hidden_states) + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + # 4. Feed-forward + ff_output = self.ff(norm_hidden_states) + context_ff_output = self.ff_context(norm_encoder_hidden_states) + + hidden_states = hidden_states + gate_mlp.unsqueeze(1) * ff_output + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + + return hidden_states, encoder_hidden_states + + +class HunyuanVideo15Transformer3DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin +): + r""" + A Transformer model for video-like data used in [HunyuanVideo1.5](https://huggingface.co/tencent/HunyuanVideo1.5). + + Args: + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + num_attention_heads (`int`, defaults to `24`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + num_layers (`int`, defaults to `20`): + The number of layers of dual-stream blocks to use. + num_refiner_layers (`int`, defaults to `2`): + The number of layers of refiner blocks to use. + mlp_ratio (`float`, defaults to `4.0`): + The ratio of the hidden layer size to the input size in the feedforward network. + patch_size (`int`, defaults to `2`): + The size of the spatial patches to use in the patch embedding layer. + patch_size_t (`int`, defaults to `1`): + The size of the tmeporal patches to use in the patch embedding layer. + qk_norm (`str`, defaults to `rms_norm`): + The normalization to use for the query and key projections in the attention layers. + guidance_embeds (`bool`, defaults to `True`): + Whether to use guidance embeddings in the model. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + pooled_projection_dim (`int`, defaults to `768`): + The dimension of the pooled projection of the text embeddings. + rope_theta (`float`, defaults to `256.0`): + The value of theta to use in the RoPE layer. + rope_axes_dim (`tuple[int]`, defaults to `(16, 56, 56)`): + The dimensions of the axes to use in the RoPE layer. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["x_embedder", "context_embedder", "norm"] + _no_split_modules = [ + "HunyuanVideo15TransformerBlock", + "HunyuanVideo15PatchEmbed", + "HunyuanVideo15TokenRefiner", + ] + _repeated_blocks = [ + "HunyuanVideo15TransformerBlock", + "HunyuanVideo15PatchEmbed", + "HunyuanVideo15TokenRefiner", + ] + + @register_to_config + def __init__( + self, + in_channels: int = 65, + out_channels: int = 32, + num_attention_heads: int = 16, + attention_head_dim: int = 128, + num_layers: int = 54, + num_refiner_layers: int = 2, + mlp_ratio: float = 4.0, + patch_size: int = 1, + patch_size_t: int = 1, + qk_norm: str = "rms_norm", + text_embed_dim: int = 3584, + text_embed_2_dim: int = 1472, + image_embed_dim: int = 1152, + rope_theta: float = 256.0, + rope_axes_dim: tuple[int, ...] = (16, 56, 56), + # YiYi Notes: config based on target_size_config https://github.com/yiyixuxu/hy15/blob/main/hyvideo/pipelines/hunyuan_video_pipeline.py#L205 + target_size: int = 640, # did not name sample_size since it is in pixel spaces + task_type: str = "i2v", + use_meanflow: bool = False, + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + # 1. Latent and condition embedders + self.x_embedder = HunyuanVideo15PatchEmbed((patch_size_t, patch_size, patch_size), in_channels, inner_dim) + self.image_embedder = HunyuanVideo15ImageProjection(image_embed_dim, inner_dim) + + self.context_embedder = HunyuanVideo15TokenRefiner( + text_embed_dim, num_attention_heads, attention_head_dim, num_layers=num_refiner_layers + ) + self.context_embedder_2 = HunyuanVideo15ByT5TextProjection(text_embed_2_dim, 2048, inner_dim) + + self.time_embed = HunyuanVideo15TimeEmbedding(inner_dim, use_meanflow=use_meanflow) + + self.cond_type_embed = nn.Embedding(3, inner_dim) + + # 2. RoPE + self.rope = HunyuanVideo15RotaryPosEmbed(patch_size, patch_size_t, rope_axes_dim, rope_theta) + + # 3. Dual stream transformer blocks + + self.transformer_blocks = nn.ModuleList( + [ + HunyuanVideo15TransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_layers) + ] + ) + + # 5. Output projection + self.norm_out = AdaLayerNormContinuous(inner_dim, inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(inner_dim, patch_size_t * patch_size * patch_size * out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_attention_mask: torch.Tensor, + timestep_r: torch.LongTensor | None = None, + encoder_hidden_states_2: torch.Tensor | None = None, + encoder_attention_mask_2: torch.Tensor | None = None, + image_embeds: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.config.patch_size_t, self.config.patch_size, self.config.patch_size + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p_h + post_patch_width = width // p_w + + # 1. RoPE + image_rotary_emb = self.rope(hidden_states) + + # 2. Conditional embeddings + temb = self.time_embed(timestep, timestep_r=timestep_r) + + hidden_states = self.x_embedder(hidden_states) + + # qwen text embedding + encoder_hidden_states = self.context_embedder(encoder_hidden_states, timestep, encoder_attention_mask) + + encoder_hidden_states_cond_emb = self.cond_type_embed( + torch.zeros_like(encoder_hidden_states[:, :, 0], dtype=torch.long) + ) + encoder_hidden_states = encoder_hidden_states + encoder_hidden_states_cond_emb + + # byt5 text embedding + encoder_hidden_states_2 = self.context_embedder_2(encoder_hidden_states_2) + + encoder_hidden_states_2_cond_emb = self.cond_type_embed( + torch.ones_like(encoder_hidden_states_2[:, :, 0], dtype=torch.long) + ) + encoder_hidden_states_2 = encoder_hidden_states_2 + encoder_hidden_states_2_cond_emb + + # image embed + encoder_hidden_states_3 = self.image_embedder(image_embeds) + is_t2v = torch.all(image_embeds == 0) + if is_t2v: + encoder_hidden_states_3 = encoder_hidden_states_3 * 0.0 + encoder_attention_mask_3 = torch.zeros( + (batch_size, encoder_hidden_states_3.shape[1]), + dtype=encoder_attention_mask.dtype, + device=encoder_attention_mask.device, + ) + else: + encoder_attention_mask_3 = torch.ones( + (batch_size, encoder_hidden_states_3.shape[1]), + dtype=encoder_attention_mask.dtype, + device=encoder_attention_mask.device, + ) + encoder_hidden_states_3_cond_emb = self.cond_type_embed( + 2 + * torch.ones_like( + encoder_hidden_states_3[:, :, 0], + dtype=torch.long, + ) + ) + encoder_hidden_states_3 = encoder_hidden_states_3 + encoder_hidden_states_3_cond_emb + + # reorder and combine text tokens: combine valid tokens first, then padding + encoder_attention_mask = encoder_attention_mask.bool() + encoder_attention_mask_2 = encoder_attention_mask_2.bool() + encoder_attention_mask_3 = encoder_attention_mask_3.bool() + new_encoder_hidden_states = [] + new_encoder_attention_mask = [] + + for text, text_mask, text_2, text_mask_2, image, image_mask in zip( + encoder_hidden_states, + encoder_attention_mask, + encoder_hidden_states_2, + encoder_attention_mask_2, + encoder_hidden_states_3, + encoder_attention_mask_3, + ): + # Concatenate: [valid_image, valid_byt5, valid_mllm, invalid_image, invalid_byt5, invalid_mllm] + new_encoder_hidden_states.append( + torch.cat( + [ + image[image_mask], # valid image + text_2[text_mask_2], # valid byt5 + text[text_mask], # valid mllm + image[~image_mask], # invalid image + torch.zeros_like(text_2[~text_mask_2]), # invalid byt5 (zeroed) + torch.zeros_like(text[~text_mask]), # invalid mllm (zeroed) + ], + dim=0, + ) + ) + + # Apply same reordering to attention masks + new_encoder_attention_mask.append( + torch.cat( + [ + image_mask[image_mask], + text_mask_2[text_mask_2], + text_mask[text_mask], + image_mask[~image_mask], + text_mask_2[~text_mask_2], + text_mask[~text_mask], + ], + dim=0, + ) + ) + + encoder_hidden_states = torch.stack(new_encoder_hidden_states) + encoder_attention_mask = torch.stack(new_encoder_attention_mask) + + # 4. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.transformer_blocks: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + encoder_attention_mask, + image_rotary_emb, + ) + + else: + for block in self.transformer_blocks: + hidden_states, encoder_hidden_states = block( + hidden_states, + encoder_hidden_states, + temb, + encoder_attention_mask, + image_rotary_emb, + ) + + # 5. Output projection + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, -1, p_t, p_h, p_w + ) + hidden_states = hidden_states.permute(0, 4, 1, 5, 2, 6, 3, 7) + hidden_states = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (hidden_states,) + + return Transformer2DModelOutput(sample=hidden_states) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video_framepack.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video_framepack.py new file mode 100644 index 0000000000000000000000000000000000000000..f005c4d4cd519fb69b4f15b870e39642de83cee4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuan_video_framepack.py @@ -0,0 +1,398 @@ +# Copyright 2025 The Framepack Team, The Hunyuan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, get_logger +from ..cache_utils import CacheMixin +from ..embeddings import get_1d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous +from .transformer_hunyuan_video import ( + HunyuanVideoConditionEmbedding, + HunyuanVideoPatchEmbed, + HunyuanVideoSingleTransformerBlock, + HunyuanVideoTokenRefiner, + HunyuanVideoTransformerBlock, +) + + +logger = get_logger(__name__) # pylint: disable=invalid-name + + +class HunyuanVideoFramepackRotaryPosEmbed(nn.Module): + def __init__(self, patch_size: int, patch_size_t: int, rope_dim: list[int], theta: float = 256.0) -> None: + super().__init__() + + self.patch_size = patch_size + self.patch_size_t = patch_size_t + self.rope_dim = rope_dim + self.theta = theta + + def forward(self, frame_indices: torch.Tensor, height: int, width: int, device: torch.device): + height = height // self.patch_size + width = width // self.patch_size + grid = torch.meshgrid( + frame_indices.to(device=device, dtype=torch.float32), + torch.arange(0, height, device=device, dtype=torch.float32), + torch.arange(0, width, device=device, dtype=torch.float32), + indexing="ij", + ) # 3 * [W, H, T] + grid = torch.stack(grid, dim=0) # [3, W, H, T] + + freqs = [] + for i in range(3): + freq = get_1d_rotary_pos_embed(self.rope_dim[i], grid[i].reshape(-1), self.theta, use_real=True) + freqs.append(freq) + + freqs_cos = torch.cat([f[0] for f in freqs], dim=1) # (W * H * T, D / 2) + freqs_sin = torch.cat([f[1] for f in freqs], dim=1) # (W * H * T, D / 2) + + return freqs_cos, freqs_sin + + +class FramepackClipVisionProjection(nn.Module): + def __init__(self, in_channels: int, out_channels: int): + super().__init__() + self.up = nn.Linear(in_channels, out_channels * 3) + self.down = nn.Linear(out_channels * 3, out_channels) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.up(hidden_states) + hidden_states = F.silu(hidden_states) + hidden_states = self.down(hidden_states) + return hidden_states + + +class HunyuanVideoHistoryPatchEmbed(nn.Module): + def __init__(self, in_channels: int, inner_dim: int): + super().__init__() + self.proj = nn.Conv3d(in_channels, inner_dim, kernel_size=(1, 2, 2), stride=(1, 2, 2)) + self.proj_2x = nn.Conv3d(in_channels, inner_dim, kernel_size=(2, 4, 4), stride=(2, 4, 4)) + self.proj_4x = nn.Conv3d(in_channels, inner_dim, kernel_size=(4, 8, 8), stride=(4, 8, 8)) + + def forward( + self, + latents_clean: torch.Tensor | None = None, + latents_clean_2x: torch.Tensor | None = None, + latents_clean_4x: torch.Tensor | None = None, + ): + if latents_clean is not None: + latents_clean = self.proj(latents_clean) + latents_clean = latents_clean.flatten(2).transpose(1, 2) + if latents_clean_2x is not None: + latents_clean_2x = _pad_for_3d_conv(latents_clean_2x, (2, 4, 4)) + latents_clean_2x = self.proj_2x(latents_clean_2x) + latents_clean_2x = latents_clean_2x.flatten(2).transpose(1, 2) + if latents_clean_4x is not None: + latents_clean_4x = _pad_for_3d_conv(latents_clean_4x, (4, 8, 8)) + latents_clean_4x = self.proj_4x(latents_clean_4x) + latents_clean_4x = latents_clean_4x.flatten(2).transpose(1, 2) + return latents_clean, latents_clean_2x, latents_clean_4x + + +class HunyuanVideoFramepackTransformer3DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin +): + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["x_embedder", "context_embedder", "norm"] + _no_split_modules = [ + "HunyuanVideoTransformerBlock", + "HunyuanVideoSingleTransformerBlock", + "HunyuanVideoHistoryPatchEmbed", + "HunyuanVideoTokenRefiner", + ] + + @register_to_config + def __init__( + self, + in_channels: int = 16, + out_channels: int = 16, + num_attention_heads: int = 24, + attention_head_dim: int = 128, + num_layers: int = 20, + num_single_layers: int = 40, + num_refiner_layers: int = 2, + mlp_ratio: float = 4.0, + patch_size: int = 2, + patch_size_t: int = 1, + qk_norm: str = "rms_norm", + guidance_embeds: bool = True, + text_embed_dim: int = 4096, + pooled_projection_dim: int = 768, + rope_theta: float = 256.0, + rope_axes_dim: tuple[int, ...] = (16, 56, 56), + image_condition_type: str | None = None, + has_image_proj: int = False, + image_proj_dim: int = 1152, + has_clean_x_embedder: int = False, + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + # 1. Latent and condition embedders + self.x_embedder = HunyuanVideoPatchEmbed((patch_size_t, patch_size, patch_size), in_channels, inner_dim) + + # Framepack history projection embedder + self.clean_x_embedder = None + if has_clean_x_embedder: + self.clean_x_embedder = HunyuanVideoHistoryPatchEmbed(in_channels, inner_dim) + + self.context_embedder = HunyuanVideoTokenRefiner( + text_embed_dim, num_attention_heads, attention_head_dim, num_layers=num_refiner_layers + ) + + # Framepack image-conditioning embedder + self.image_projection = FramepackClipVisionProjection(image_proj_dim, inner_dim) if has_image_proj else None + + self.time_text_embed = HunyuanVideoConditionEmbedding( + inner_dim, pooled_projection_dim, guidance_embeds, image_condition_type + ) + + # 2. RoPE + self.rope = HunyuanVideoFramepackRotaryPosEmbed(patch_size, patch_size_t, rope_axes_dim, rope_theta) + + # 3. Dual stream transformer blocks + self.transformer_blocks = nn.ModuleList( + [ + HunyuanVideoTransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_layers) + ] + ) + + # 4. Single stream transformer blocks + self.single_transformer_blocks = nn.ModuleList( + [ + HunyuanVideoSingleTransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_single_layers) + ] + ) + + # 5. Output projection + self.norm_out = AdaLayerNormContinuous(inner_dim, inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(inner_dim, patch_size_t * patch_size * patch_size * out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_attention_mask: torch.Tensor, + pooled_projections: torch.Tensor, + image_embeds: torch.Tensor, + indices_latents: torch.Tensor, + guidance: torch.Tensor | None = None, + latents_clean: torch.Tensor | None = None, + indices_latents_clean: torch.Tensor | None = None, + latents_history_2x: torch.Tensor | None = None, + indices_latents_history_2x: torch.Tensor | None = None, + latents_history_4x: torch.Tensor | None = None, + indices_latents_history_4x: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p, p_t = self.config.patch_size, self.config.patch_size_t + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p + post_patch_width = width // p + original_context_length = post_patch_num_frames * post_patch_height * post_patch_width + + if indices_latents is None: + indices_latents = torch.arange(0, num_frames).unsqueeze(0).expand(batch_size, -1) + + hidden_states = self.x_embedder(hidden_states) + image_rotary_emb = self.rope( + frame_indices=indices_latents, height=height, width=width, device=hidden_states.device + ) + + latents_clean, latents_history_2x, latents_history_4x = self.clean_x_embedder( + latents_clean, latents_history_2x, latents_history_4x + ) + + if latents_clean is not None and indices_latents_clean is not None: + image_rotary_emb_clean = self.rope( + frame_indices=indices_latents_clean, height=height, width=width, device=hidden_states.device + ) + if latents_history_2x is not None and indices_latents_history_2x is not None: + image_rotary_emb_history_2x = self.rope( + frame_indices=indices_latents_history_2x, height=height, width=width, device=hidden_states.device + ) + if latents_history_4x is not None and indices_latents_history_4x is not None: + image_rotary_emb_history_4x = self.rope( + frame_indices=indices_latents_history_4x, height=height, width=width, device=hidden_states.device + ) + + hidden_states, image_rotary_emb = self._pack_history_states( + hidden_states, + latents_clean, + latents_history_2x, + latents_history_4x, + image_rotary_emb, + image_rotary_emb_clean, + image_rotary_emb_history_2x, + image_rotary_emb_history_4x, + post_patch_height, + post_patch_width, + ) + + temb, _ = self.time_text_embed(timestep, pooled_projections, guidance) + encoder_hidden_states = self.context_embedder(encoder_hidden_states, timestep, encoder_attention_mask) + + encoder_hidden_states_image = self.image_projection(image_embeds) + attention_mask_image = encoder_attention_mask.new_ones((batch_size, encoder_hidden_states_image.shape[1])) + + # must cat before (not after) encoder_hidden_states, due to attn masking + encoder_hidden_states = torch.cat([encoder_hidden_states_image, encoder_hidden_states], dim=1) + encoder_attention_mask = torch.cat([attention_mask_image, encoder_attention_mask], dim=1) + + latent_sequence_length = hidden_states.shape[1] + condition_sequence_length = encoder_hidden_states.shape[1] + sequence_length = latent_sequence_length + condition_sequence_length + attention_mask = torch.zeros( + batch_size, sequence_length, device=hidden_states.device, dtype=torch.bool + ) # [B, N] + effective_condition_sequence_length = encoder_attention_mask.sum(dim=1, dtype=torch.int) # [B,] + effective_sequence_length = latent_sequence_length + effective_condition_sequence_length + + if batch_size == 1: + encoder_hidden_states = encoder_hidden_states[:, : effective_condition_sequence_length[0]] + attention_mask = None + else: + for i in range(batch_size): + attention_mask[i, : effective_sequence_length[i]] = True + # [B, 1, 1, N], for broadcasting across attention heads + attention_mask = attention_mask.unsqueeze(1).unsqueeze(1) + + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.transformer_blocks: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, temb, attention_mask, image_rotary_emb + ) + + for block in self.single_transformer_blocks: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, temb, attention_mask, image_rotary_emb + ) + + else: + for block in self.transformer_blocks: + hidden_states, encoder_hidden_states = block( + hidden_states, encoder_hidden_states, temb, attention_mask, image_rotary_emb + ) + + for block in self.single_transformer_blocks: + hidden_states, encoder_hidden_states = block( + hidden_states, encoder_hidden_states, temb, attention_mask, image_rotary_emb + ) + + hidden_states = hidden_states[:, -original_context_length:] + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, -1, p_t, p, p + ) + hidden_states = hidden_states.permute(0, 4, 1, 5, 2, 6, 3, 7) + hidden_states = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (hidden_states,) + return Transformer2DModelOutput(sample=hidden_states) + + def _pack_history_states( + self, + hidden_states: torch.Tensor, + latents_clean: torch.Tensor | None = None, + latents_history_2x: torch.Tensor | None = None, + latents_history_4x: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] = None, + image_rotary_emb_clean: tuple[torch.Tensor, torch.Tensor] | None = None, + image_rotary_emb_history_2x: tuple[torch.Tensor, torch.Tensor] | None = None, + image_rotary_emb_history_4x: tuple[torch.Tensor, torch.Tensor] | None = None, + height: int = None, + width: int = None, + ): + image_rotary_emb = list(image_rotary_emb) # convert tuple to list for in-place modification + + if latents_clean is not None and image_rotary_emb_clean is not None: + hidden_states = torch.cat([latents_clean, hidden_states], dim=1) + image_rotary_emb[0] = torch.cat([image_rotary_emb_clean[0], image_rotary_emb[0]], dim=0) + image_rotary_emb[1] = torch.cat([image_rotary_emb_clean[1], image_rotary_emb[1]], dim=0) + + if latents_history_2x is not None and image_rotary_emb_history_2x is not None: + hidden_states = torch.cat([latents_history_2x, hidden_states], dim=1) + image_rotary_emb_history_2x = self._pad_rotary_emb(image_rotary_emb_history_2x, height, width, (2, 2, 2)) + image_rotary_emb[0] = torch.cat([image_rotary_emb_history_2x[0], image_rotary_emb[0]], dim=0) + image_rotary_emb[1] = torch.cat([image_rotary_emb_history_2x[1], image_rotary_emb[1]], dim=0) + + if latents_history_4x is not None and image_rotary_emb_history_4x is not None: + hidden_states = torch.cat([latents_history_4x, hidden_states], dim=1) + image_rotary_emb_history_4x = self._pad_rotary_emb(image_rotary_emb_history_4x, height, width, (4, 4, 4)) + image_rotary_emb[0] = torch.cat([image_rotary_emb_history_4x[0], image_rotary_emb[0]], dim=0) + image_rotary_emb[1] = torch.cat([image_rotary_emb_history_4x[1], image_rotary_emb[1]], dim=0) + + return hidden_states, tuple(image_rotary_emb) + + def _pad_rotary_emb( + self, + image_rotary_emb: tuple[torch.Tensor], + height: int, + width: int, + kernel_size: tuple[int, int, int], + ): + # freqs_cos, freqs_sin have shape [W * H * T, D / 2], where D is attention head dim + freqs_cos, freqs_sin = image_rotary_emb + freqs_cos = freqs_cos.unsqueeze(0).permute(0, 2, 1).unflatten(2, (-1, height, width)) + freqs_sin = freqs_sin.unsqueeze(0).permute(0, 2, 1).unflatten(2, (-1, height, width)) + freqs_cos = _pad_for_3d_conv(freqs_cos, kernel_size) + freqs_sin = _pad_for_3d_conv(freqs_sin, kernel_size) + freqs_cos = _center_down_sample_3d(freqs_cos, kernel_size) + freqs_sin = _center_down_sample_3d(freqs_sin, kernel_size) + freqs_cos = freqs_cos.flatten(2).permute(0, 2, 1).squeeze(0) + freqs_sin = freqs_sin.flatten(2).permute(0, 2, 1).squeeze(0) + return freqs_cos, freqs_sin + + +def _pad_for_3d_conv(x, kernel_size): + if isinstance(x, (tuple, list)): + return tuple(_pad_for_3d_conv(i, kernel_size) for i in x) + b, c, t, h, w = x.shape + pt, ph, pw = kernel_size + pad_t = (pt - (t % pt)) % pt + pad_h = (ph - (h % ph)) % ph + pad_w = (pw - (w % pw)) % pw + return torch.nn.functional.pad(x, (0, pad_w, 0, pad_h, 0, pad_t), mode="replicate") + + +def _center_down_sample_3d(x, kernel_size): + if isinstance(x, (tuple, list)): + return tuple(_center_down_sample_3d(i, kernel_size) for i in x) + return torch.nn.functional.avg_pool3d(x, kernel_size, stride=kernel_size) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuanimage.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuanimage.py new file mode 100644 index 0000000000000000000000000000000000000000..a2d3d9229963900c620f0cad37b0b1d383f968af --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_hunyuanimage.py @@ -0,0 +1,890 @@ +# Copyright 2025 The Hunyuan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from diffusers.loaders import FromOriginalModelMixin + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..attention_processor import Attention +from ..cache_utils import CacheMixin +from ..embeddings import ( + CombinedTimestepTextProjEmbeddings, + TimestepEmbedding, + Timesteps, + get_1d_rotary_pos_embed, +) +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class HunyuanImageAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "HunyuanImageAttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + if attn.add_q_proj is None and encoder_hidden_states is not None: + hidden_states = torch.cat([hidden_states, encoder_hidden_states], dim=1) + + # 1. QKV projections + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(2, (attn.heads, -1)) # batch_size, seq_len, heads, head_dim + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + # 2. QK normalization + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # 3. Rotational positional embeddings applied to latent stream + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + if attn.add_q_proj is None and encoder_hidden_states is not None: + query = torch.cat( + [ + apply_rotary_emb( + query[:, : -encoder_hidden_states.shape[1]], image_rotary_emb, sequence_dim=1 + ), + query[:, -encoder_hidden_states.shape[1] :], + ], + dim=1, + ) + key = torch.cat( + [ + apply_rotary_emb(key[:, : -encoder_hidden_states.shape[1]], image_rotary_emb, sequence_dim=1), + key[:, -encoder_hidden_states.shape[1] :], + ], + dim=1, + ) + else: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + # 4. Encoder condition QKV projection and normalization + if attn.add_q_proj is not None and encoder_hidden_states is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + encoder_query = encoder_query.unflatten(2, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(2, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(2, (attn.heads, -1)) + + if attn.norm_added_q is not None: + encoder_query = attn.norm_added_q(encoder_query) + if attn.norm_added_k is not None: + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([query, encoder_query], dim=1) + key = torch.cat([key, encoder_key], dim=1) + value = torch.cat([value, encoder_value], dim=1) + + # 5. Attention + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + # 6. Output projection + if encoder_hidden_states is not None: + hidden_states, encoder_hidden_states = ( + hidden_states[:, : -encoder_hidden_states.shape[1]], + hidden_states[:, -encoder_hidden_states.shape[1] :], + ) + + if getattr(attn, "to_out", None) is not None: + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + if getattr(attn, "to_add_out", None) is not None: + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + return hidden_states, encoder_hidden_states + + +class HunyuanImagePatchEmbed(nn.Module): + def __init__( + self, + patch_size: tuple[int, int, tuple[int, int, int]] = (16, 16), + in_chans: int = 3, + embed_dim: int = 768, + ) -> None: + super().__init__() + + self.patch_size = patch_size + + if len(patch_size) == 2: + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + elif len(patch_size) == 3: + self.proj = nn.Conv3d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + else: + raise ValueError(f"patch_size must be a tuple of length 2 or 3, got {len(patch_size)}") + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.proj(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) + return hidden_states + + +class HunyuanImageByT5TextProjection(nn.Module): + def __init__(self, in_features: int, hidden_size: int, out_features: int): + super().__init__() + self.norm = nn.LayerNorm(in_features) + self.linear_1 = nn.Linear(in_features, hidden_size) + self.linear_2 = nn.Linear(hidden_size, hidden_size) + self.linear_3 = nn.Linear(hidden_size, out_features) + self.act_fn = nn.GELU() + + def forward(self, encoder_hidden_states: torch.Tensor) -> torch.Tensor: + hidden_states = self.norm(encoder_hidden_states) + hidden_states = self.linear_1(hidden_states) + hidden_states = self.act_fn(hidden_states) + hidden_states = self.linear_2(hidden_states) + hidden_states = self.act_fn(hidden_states) + hidden_states = self.linear_3(hidden_states) + return hidden_states + + +class HunyuanImageAdaNorm(nn.Module): + def __init__(self, in_features: int, out_features: int | None = None) -> None: + super().__init__() + + out_features = out_features or 2 * in_features + self.linear = nn.Linear(in_features, out_features) + self.nonlinearity = nn.SiLU() + + def forward( + self, temb: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + temb = self.linear(self.nonlinearity(temb)) + gate_msa, gate_mlp = temb.chunk(2, dim=1) + gate_msa, gate_mlp = gate_msa.unsqueeze(1), gate_mlp.unsqueeze(1) + return gate_msa, gate_mlp + + +class HunyuanImageCombinedTimeGuidanceEmbedding(nn.Module): + def __init__( + self, + embedding_dim: int, + guidance_embeds: bool = False, + use_meanflow: bool = False, + ): + super().__init__() + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.use_meanflow = use_meanflow + + self.time_proj_r = None + self.timestep_embedder_r = None + if use_meanflow: + self.time_proj_r = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder_r = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.guidance_embedder = None + if guidance_embeds: + self.guidance_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + def forward( + self, + timestep: torch.Tensor, + timestep_r: torch.Tensor | None = None, + guidance: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=timestep.dtype)) + + if timestep_r is not None: + timesteps_proj_r = self.time_proj_r(timestep_r) + timesteps_emb_r = self.timestep_embedder_r(timesteps_proj_r.to(dtype=timestep.dtype)) + timesteps_emb = (timesteps_emb + timesteps_emb_r) / 2 + + if self.guidance_embedder is not None: + guidance_proj = self.time_proj(guidance) + guidance_emb = self.guidance_embedder(guidance_proj.to(dtype=timestep.dtype)) + conditioning = timesteps_emb + guidance_emb + else: + conditioning = timesteps_emb + + return conditioning + + +# IndividualTokenRefinerBlock +@maybe_allow_in_graph +class HunyuanImageIndividualTokenRefinerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, # 28 + attention_head_dim: int, # 128 + mlp_width_ratio: str = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=True, eps=1e-6) + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + heads=num_attention_heads, + dim_head=attention_head_dim, + bias=attention_bias, + ) + + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=True, eps=1e-6) + self.ff = FeedForward(hidden_size, mult=mlp_width_ratio, activation_fn="linear-silu", dropout=mlp_drop_rate) + + self.norm_out = HunyuanImageAdaNorm(hidden_size, 2 * hidden_size) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + norm_hidden_states = self.norm1(hidden_states) + + attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=None, + attention_mask=attention_mask, + ) + + gate_msa, gate_mlp = self.norm_out(temb) + hidden_states = hidden_states + attn_output * gate_msa + + ff_output = self.ff(self.norm2(hidden_states)) + hidden_states = hidden_states + ff_output * gate_mlp + + return hidden_states + + +class HunyuanImageIndividualTokenRefiner(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + num_layers: int, + mlp_width_ratio: float = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + self.refiner_blocks = nn.ModuleList( + [ + HunyuanImageIndividualTokenRefinerBlock( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + mlp_width_ratio=mlp_width_ratio, + mlp_drop_rate=mlp_drop_rate, + attention_bias=attention_bias, + ) + for _ in range(num_layers) + ] + ) + + def forward( + self, + hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + ) -> None: + self_attn_mask = None + if attention_mask is not None: + batch_size = attention_mask.shape[0] + seq_len = attention_mask.shape[1] + attention_mask = attention_mask.to(hidden_states.device) + self_attn_mask_1 = attention_mask.view(batch_size, 1, 1, seq_len).repeat(1, 1, seq_len, 1) + self_attn_mask_2 = self_attn_mask_1.transpose(2, 3) + self_attn_mask = (self_attn_mask_1 & self_attn_mask_2).bool() + self_attn_mask[:, :, :, 0] = True + + for block in self.refiner_blocks: + hidden_states = block(hidden_states, temb, self_attn_mask) + + return hidden_states + + +# txt_in +class HunyuanImageTokenRefiner(nn.Module): + def __init__( + self, + in_channels: int, + num_attention_heads: int, + attention_head_dim: int, + num_layers: int, + mlp_ratio: float = 4.0, + mlp_drop_rate: float = 0.0, + attention_bias: bool = True, + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.time_text_embed = CombinedTimestepTextProjEmbeddings( + embedding_dim=hidden_size, pooled_projection_dim=in_channels + ) + self.proj_in = nn.Linear(in_channels, hidden_size, bias=True) + self.token_refiner = HunyuanImageIndividualTokenRefiner( + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + num_layers=num_layers, + mlp_width_ratio=mlp_ratio, + mlp_drop_rate=mlp_drop_rate, + attention_bias=attention_bias, + ) + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + attention_mask: torch.LongTensor | None = None, + ) -> torch.Tensor: + if attention_mask is None: + pooled_hidden_states = hidden_states.mean(dim=1) + else: + original_dtype = hidden_states.dtype + mask_float = attention_mask.float().unsqueeze(-1) + pooled_hidden_states = (hidden_states * mask_float).sum(dim=1) / mask_float.sum(dim=1) + pooled_hidden_states = pooled_hidden_states.to(original_dtype) + + temb = self.time_text_embed(timestep, pooled_hidden_states) + hidden_states = self.proj_in(hidden_states) + hidden_states = self.token_refiner(hidden_states, temb, attention_mask) + + return hidden_states + + +class HunyuanImageRotaryPosEmbed(nn.Module): + def __init__(self, patch_size: tuple | list[int], rope_dim: tuple | list[int], theta: float = 256.0) -> None: + super().__init__() + + if not isinstance(patch_size, (tuple, list)) or len(patch_size) not in [2, 3]: + raise ValueError(f"patch_size must be a tuple or list of length 2 or 3, got {patch_size}") + + if not isinstance(rope_dim, (tuple, list)) or len(rope_dim) not in [2, 3]: + raise ValueError(f"rope_dim must be a tuple or list of length 2 or 3, got {rope_dim}") + + if not len(patch_size) == len(rope_dim): + raise ValueError(f"patch_size and rope_dim must have the same length, got {patch_size} and {rope_dim}") + + self.patch_size = patch_size + self.rope_dim = rope_dim + self.theta = theta + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if hidden_states.ndim == 5: + _, _, frame, height, width = hidden_states.shape + patch_size_frame, patch_size_height, patch_size_width = self.patch_size + rope_sizes = [frame // patch_size_frame, height // patch_size_height, width // patch_size_width] + elif hidden_states.ndim == 4: + _, _, height, width = hidden_states.shape + patch_size_height, patch_size_width = self.patch_size + rope_sizes = [height // patch_size_height, width // patch_size_width] + else: + raise ValueError(f"hidden_states must be a 4D or 5D tensor, got {hidden_states.shape}") + + axes_grids = [] + for i in range(len(rope_sizes)): + grid = torch.arange(0, rope_sizes[i], device=hidden_states.device, dtype=torch.float32) + axes_grids.append(grid) + grid = torch.meshgrid(*axes_grids, indexing="ij") # dim x [H, W] + grid = torch.stack(grid, dim=0) # [2, H, W] + + freqs = [] + for i in range(len(rope_sizes)): + freq = get_1d_rotary_pos_embed(self.rope_dim[i], grid[i].reshape(-1), self.theta, use_real=True) + freqs.append(freq) + + freqs_cos = torch.cat([f[0] for f in freqs], dim=1) # (W * H * T, D / 2) + freqs_sin = torch.cat([f[1] for f in freqs], dim=1) # (W * H * T, D / 2) + return freqs_cos, freqs_sin + + +@maybe_allow_in_graph +class HunyuanImageSingleTransformerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float = 4.0, + qk_norm: str = "rms_norm", + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + mlp_dim = int(hidden_size * mlp_ratio) + + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=hidden_size, + bias=True, + processor=HunyuanImageAttnProcessor(), + qk_norm=qk_norm, + eps=1e-6, + pre_only=True, + ) + + self.norm = AdaLayerNormZeroSingle(hidden_size, norm_type="layer_norm") + self.proj_mlp = nn.Linear(hidden_size, mlp_dim) + self.act_mlp = nn.GELU(approximate="tanh") + self.proj_out = nn.Linear(hidden_size + mlp_dim, hidden_size) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + *args, + **kwargs, + ) -> torch.Tensor: + text_seq_length = encoder_hidden_states.shape[1] + hidden_states = torch.cat([hidden_states, encoder_hidden_states], dim=1) + + residual = hidden_states + + # 1. Input normalization + norm_hidden_states, gate = self.norm(hidden_states, emb=temb) + mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states)) + + norm_hidden_states, norm_encoder_hidden_states = ( + norm_hidden_states[:, :-text_seq_length, :], + norm_hidden_states[:, -text_seq_length:, :], + ) + + # 2. Attention + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + attn_output = torch.cat([attn_output, context_attn_output], dim=1) + + # 3. Modulation and residual connection + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + hidden_states = gate.unsqueeze(1) * self.proj_out(hidden_states) + hidden_states = hidden_states + residual + + hidden_states, encoder_hidden_states = ( + hidden_states[:, :-text_seq_length, :], + hidden_states[:, -text_seq_length:, :], + ) + return hidden_states, encoder_hidden_states + + +@maybe_allow_in_graph +class HunyuanImageTransformerBlock(nn.Module): + def __init__( + self, + num_attention_heads: int, + attention_head_dim: int, + mlp_ratio: float, + qk_norm: str = "rms_norm", + ) -> None: + super().__init__() + + hidden_size = num_attention_heads * attention_head_dim + + self.norm1 = AdaLayerNormZero(hidden_size, norm_type="layer_norm") + self.norm1_context = AdaLayerNormZero(hidden_size, norm_type="layer_norm") + + self.attn = Attention( + query_dim=hidden_size, + cross_attention_dim=None, + added_kv_proj_dim=hidden_size, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=hidden_size, + context_pre_only=False, + bias=True, + processor=HunyuanImageAttnProcessor(), + qk_norm=qk_norm, + eps=1e-6, + ) + + self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(hidden_size, mult=mlp_ratio, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + *args, + **kwargs, + ) -> tuple[torch.Tensor, torch.Tensor]: + # 1. Input normalization + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + + # 2. Joint attention + attn_output, context_attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + + # 3. Modulation and residual connection + hidden_states = hidden_states + attn_output * gate_msa.unsqueeze(1) + encoder_hidden_states = encoder_hidden_states + context_attn_output * c_gate_msa.unsqueeze(1) + + norm_hidden_states = self.norm2(hidden_states) + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + # 4. Feed-forward + ff_output = self.ff(norm_hidden_states) + context_ff_output = self.ff_context(norm_encoder_hidden_states) + + hidden_states = hidden_states + gate_mlp.unsqueeze(1) * ff_output + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + + return hidden_states, encoder_hidden_states + + +class HunyuanImageTransformer2DModel( + ModelMixin, ConfigMixin, AttentionMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin +): + r""" + The Transformer model used in [HunyuanImage-2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1). + + Args: + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + num_attention_heads (`int`, defaults to `24`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + num_layers (`int`, defaults to `20`): + The number of layers of dual-stream blocks to use. + num_single_layers (`int`, defaults to `40`): + The number of layers of single-stream blocks to use. + num_refiner_layers (`int`, defaults to `2`): + The number of layers of refiner blocks to use. + mlp_ratio (`float`, defaults to `4.0`): + The ratio of the hidden layer size to the input size in the feedforward network. + patch_size (`int`, defaults to `2`): + The size of the spatial patches to use in the patch embedding layer. + patch_size_t (`int`, defaults to `1`): + The size of the tmeporal patches to use in the patch embedding layer. + qk_norm (`str`, defaults to `rms_norm`): + The normalization to use for the query and key projections in the attention layers. + guidance_embeds (`bool`, defaults to `True`): + Whether to use guidance embeddings in the model. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + pooled_projection_dim (`int`, defaults to `768`): + The dimension of the pooled projection of the text embeddings. + rope_theta (`float`, defaults to `256.0`): + The value of theta to use in the RoPE layer. + rope_axes_dim (`tuple[int]`, defaults to `(16, 56, 56)`): + The dimensions of the axes to use in the RoPE layer. + image_condition_type (`str`, *optional*, defaults to `None`): + The type of image conditioning to use. If `None`, no image conditioning is used. If `latent_concat`, the + image is concatenated to the latent stream. If `token_replace`, the image is used to replace first-frame + tokens in the latent stream and apply conditioning. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["x_embedder", "context_embedder", "norm"] + _no_split_modules = [ + "HunyuanImageTransformerBlock", + "HunyuanImageSingleTransformerBlock", + "HunyuanImagePatchEmbed", + "HunyuanImageTokenRefiner", + ] + _repeated_blocks = ["HunyuanImageTransformerBlock", "HunyuanImageSingleTransformerBlock"] + + @register_to_config + def __init__( + self, + in_channels: int = 64, + out_channels: int = 64, + num_attention_heads: int = 28, + attention_head_dim: int = 128, + num_layers: int = 20, + num_single_layers: int = 40, + num_refiner_layers: int = 2, + mlp_ratio: float = 4.0, + patch_size: tuple[int, int] = (1, 1), + qk_norm: str = "rms_norm", + guidance_embeds: bool = False, + text_embed_dim: int = 3584, + text_embed_2_dim: int | None = None, + rope_theta: float = 256.0, + rope_axes_dim: tuple[int, ...] = (64, 64), + use_meanflow: bool = False, + ) -> None: + super().__init__() + + if not (isinstance(patch_size, (tuple, list)) and len(patch_size) in [2, 3]): + raise ValueError(f"patch_size must be a tuple of length 2 or 3, got {patch_size}") + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + # 1. Latent and condition embedders + self.x_embedder = HunyuanImagePatchEmbed(patch_size, in_channels, inner_dim) + self.context_embedder = HunyuanImageTokenRefiner( + text_embed_dim, num_attention_heads, attention_head_dim, num_layers=num_refiner_layers + ) + + if text_embed_2_dim is not None: + self.context_embedder_2 = HunyuanImageByT5TextProjection(text_embed_2_dim, 2048, inner_dim) + else: + self.context_embedder_2 = None + + self.time_guidance_embed = HunyuanImageCombinedTimeGuidanceEmbedding(inner_dim, guidance_embeds, use_meanflow) + + # 2. RoPE + self.rope = HunyuanImageRotaryPosEmbed(patch_size, rope_axes_dim, rope_theta) + + # 3. Dual stream transformer blocks + + self.transformer_blocks = nn.ModuleList( + [ + HunyuanImageTransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_layers) + ] + ) + + # 4. Single stream transformer blocks + self.single_transformer_blocks = nn.ModuleList( + [ + HunyuanImageSingleTransformerBlock( + num_attention_heads, attention_head_dim, mlp_ratio=mlp_ratio, qk_norm=qk_norm + ) + for _ in range(num_single_layers) + ] + ) + + # 5. Output projection + self.norm_out = AdaLayerNormContinuous(inner_dim, inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(inner_dim, math.prod(patch_size) * out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_attention_mask: torch.Tensor, + timestep_r: torch.LongTensor | None = None, + encoder_hidden_states_2: torch.Tensor | None = None, + encoder_attention_mask_2: torch.Tensor | None = None, + guidance: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.Tensor | dict[str, torch.Tensor]: + if hidden_states.ndim == 4: + batch_size, channels, height, width = hidden_states.shape + sizes = (height, width) + elif hidden_states.ndim == 5: + batch_size, channels, frame, height, width = hidden_states.shape + sizes = (frame, height, width) + else: + raise ValueError(f"hidden_states must be a 4D or 5D tensor, got {hidden_states.shape}") + + post_patch_sizes = tuple(d // p for d, p in zip(sizes, self.config.patch_size)) + + # 1. RoPE + image_rotary_emb = self.rope(hidden_states) + + # 2. Conditional embeddings + encoder_attention_mask = encoder_attention_mask.bool() + temb = self.time_guidance_embed(timestep, guidance=guidance, timestep_r=timestep_r) + hidden_states = self.x_embedder(hidden_states) + encoder_hidden_states = self.context_embedder(encoder_hidden_states, timestep, encoder_attention_mask) + + if self.context_embedder_2 is not None and encoder_hidden_states_2 is not None: + encoder_hidden_states_2 = self.context_embedder_2(encoder_hidden_states_2) + + encoder_attention_mask_2 = encoder_attention_mask_2.bool() + + # reorder and combine text tokens: combine valid tokens first, then padding + new_encoder_hidden_states = [] + new_encoder_attention_mask = [] + + for text, text_mask, text_2, text_mask_2 in zip( + encoder_hidden_states, encoder_attention_mask, encoder_hidden_states_2, encoder_attention_mask_2 + ): + # Concatenate: [valid_mllm, valid_byt5, invalid_mllm, invalid_byt5] + new_encoder_hidden_states.append( + torch.cat( + [ + text_2[text_mask_2], # valid byt5 + text[text_mask], # valid mllm + text_2[~text_mask_2], # invalid byt5 + text[~text_mask], # invalid mllm + ], + dim=0, + ) + ) + + # Apply same reordering to attention masks + new_encoder_attention_mask.append( + torch.cat( + [ + text_mask_2[text_mask_2], + text_mask[text_mask], + text_mask_2[~text_mask_2], + text_mask[~text_mask], + ], + dim=0, + ) + ) + + encoder_hidden_states = torch.stack(new_encoder_hidden_states) + encoder_attention_mask = torch.stack(new_encoder_attention_mask) + + attention_mask = torch.nn.functional.pad(encoder_attention_mask, (hidden_states.shape[1], 0), value=True) + attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + # 3. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.transformer_blocks: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + + for block in self.single_transformer_blocks: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + + else: + for block in self.transformer_blocks: + hidden_states, encoder_hidden_states = block( + hidden_states, + encoder_hidden_states, + temb, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + + for block in self.single_transformer_blocks: + hidden_states, encoder_hidden_states = block( + hidden_states, + encoder_hidden_states, + temb, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + + # 4. Output projection + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + # 5. unpatchify + # reshape: [batch_size, *post_patch_dims, channels, *patch_size] + out_channels = self.config.out_channels + reshape_dims = [batch_size] + list(post_patch_sizes) + [out_channels] + list(self.config.patch_size) + hidden_states = hidden_states.reshape(*reshape_dims) + + # create permutation pattern: batch, channels, then interleave post_patch and patch dims + # For 4D: [0, 3, 1, 4, 2, 5] -> batch, channels, post_patch_height, patch_size_height, post_patch_width, patch_size_width + # For 5D: [0, 4, 1, 5, 2, 6, 3, 7] -> batch, channels, post_patch_frame, patch_size_frame, post_patch_height, patch_size_height, post_patch_width, patch_size_width + ndim = len(post_patch_sizes) + permute_pattern = [0, ndim + 1] # batch, channels + for i in range(ndim): + permute_pattern.extend([i + 1, ndim + 2 + i]) # post_patch_sizes[i], patch_sizes[i] + hidden_states = hidden_states.permute(*permute_pattern) + + # flatten patch dimensions: flatten each (post_patch_size, patch_size) pair + # batch_size, channels, post_patch_sizes[0] * patch_sizes[0], post_patch_sizes[1] * patch_sizes[1], ... + final_dims = [batch_size, out_channels] + [ + post_patch * patch for post_patch, patch in zip(post_patch_sizes, self.config.patch_size) + ] + hidden_states = hidden_states.reshape(*final_dims) + + if not return_dict: + return (hidden_states,) + + return Transformer2DModelOutput(sample=hidden_states) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_kandinsky.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_kandinsky.py new file mode 100644 index 0000000000000000000000000000000000000000..88ef70d546c8dafc849ca71943248206b61757e8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_kandinsky.py @@ -0,0 +1,668 @@ +# Copyright 2025 The Kandinsky Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import math +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import ( + logging, +) +from ..attention import AttentionMixin, AttentionModuleMixin +from ..attention_dispatch import _CAN_USE_FLEX_ATTN, dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin + + +logger = logging.get_logger(__name__) + + +def get_freqs(dim, max_period=10000.0): + freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=dim, dtype=torch.float32) / dim) + return freqs + + +def fractal_flatten(x, rope, shape, block_mask=False): + if block_mask: + pixel_size = 8 + x = local_patching(x, shape, (1, pixel_size, pixel_size), dim=1) + rope = local_patching(rope, shape, (1, pixel_size, pixel_size), dim=1) + x = x.flatten(1, 2) + rope = rope.flatten(1, 2) + else: + x = x.flatten(1, 3) + rope = rope.flatten(1, 3) + return x, rope + + +def fractal_unflatten(x, shape, block_mask=False): + if block_mask: + pixel_size = 8 + x = x.reshape(x.shape[0], -1, pixel_size**2, *x.shape[2:]) + x = local_merge(x, shape, (1, pixel_size, pixel_size), dim=1) + else: + x = x.reshape(*shape, *x.shape[2:]) + return x + + +def local_patching(x, shape, group_size, dim=0): + batch_size, duration, height, width = shape + g1, g2, g3 = group_size + x = x.reshape( + *x.shape[:dim], + duration // g1, + g1, + height // g2, + g2, + width // g3, + g3, + *x.shape[dim + 3 :], + ) + x = x.permute( + *range(len(x.shape[:dim])), + dim, + dim + 2, + dim + 4, + dim + 1, + dim + 3, + dim + 5, + *range(dim + 6, len(x.shape)), + ) + x = x.flatten(dim, dim + 2).flatten(dim + 1, dim + 3) + return x + + +def local_merge(x, shape, group_size, dim=0): + batch_size, duration, height, width = shape + g1, g2, g3 = group_size + x = x.reshape( + *x.shape[:dim], + duration // g1, + height // g2, + width // g3, + g1, + g2, + g3, + *x.shape[dim + 2 :], + ) + x = x.permute( + *range(len(x.shape[:dim])), + dim, + dim + 3, + dim + 1, + dim + 4, + dim + 2, + dim + 5, + *range(dim + 6, len(x.shape)), + ) + x = x.flatten(dim, dim + 1).flatten(dim + 1, dim + 2).flatten(dim + 2, dim + 3) + return x + + +def nablaT_v2( + q: Tensor, + k: Tensor, + sta: Tensor, + thr: float = 0.9, +): + if _CAN_USE_FLEX_ATTN: + from torch.nn.attention.flex_attention import BlockMask + else: + raise ValueError("Nabla attention is not supported with this version of PyTorch") + + q = q.transpose(1, 2).contiguous() + k = k.transpose(1, 2).contiguous() + + # Map estimation + B, h, S, D = q.shape + s1 = S // 64 + qa = q.reshape(B, h, s1, 64, D).mean(-2) + ka = k.reshape(B, h, s1, 64, D).mean(-2).transpose(-2, -1) + map = qa @ ka + + map = torch.softmax(map / math.sqrt(D), dim=-1) + # Map binarization + vals, inds = map.sort(-1) + cvals = vals.cumsum_(-1) + mask = (cvals >= 1 - thr).int() + mask = mask.gather(-1, inds.argsort(-1)) + + mask = torch.logical_or(mask, sta) + + # BlockMask creation + kv_nb = mask.sum(-1).to(torch.int32) + kv_inds = mask.argsort(dim=-1, descending=True).to(torch.int32) + return BlockMask.from_kv_blocks(torch.zeros_like(kv_nb), kv_inds, kv_nb, kv_inds, BLOCK_SIZE=64, mask_mod=None) + + +class Kandinsky5TimeEmbeddings(nn.Module): + def __init__(self, model_dim, time_dim, max_period=10000.0): + super().__init__() + assert model_dim % 2 == 0 + self.model_dim = model_dim + self.max_period = max_period + self.freqs = get_freqs(self.model_dim // 2, self.max_period) + self.in_layer = nn.Linear(model_dim, time_dim, bias=True) + self.activation = nn.SiLU() + self.out_layer = nn.Linear(time_dim, time_dim, bias=True) + + def forward(self, time): + args = torch.outer(time.to(torch.float32), self.freqs.to(device=time.device)) + time_embed = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + time_embed = self.out_layer(self.activation(self.in_layer(time_embed))) + return time_embed + + +class Kandinsky5TextEmbeddings(nn.Module): + def __init__(self, text_dim, model_dim): + super().__init__() + self.in_layer = nn.Linear(text_dim, model_dim, bias=True) + self.norm = nn.LayerNorm(model_dim, elementwise_affine=True) + + def forward(self, text_embed): + text_embed = self.in_layer(text_embed) + return self.norm(text_embed).type_as(text_embed) + + +class Kandinsky5VisualEmbeddings(nn.Module): + def __init__(self, visual_dim, model_dim, patch_size): + super().__init__() + self.patch_size = patch_size + self.in_layer = nn.Linear(math.prod(patch_size) * visual_dim, model_dim) + + def forward(self, x): + batch_size, duration, height, width, dim = x.shape + x = ( + x.view( + batch_size, + duration // self.patch_size[0], + self.patch_size[0], + height // self.patch_size[1], + self.patch_size[1], + width // self.patch_size[2], + self.patch_size[2], + dim, + ) + .permute(0, 1, 3, 5, 2, 4, 6, 7) + .flatten(4, 7) + ) + return self.in_layer(x) + + +class Kandinsky5RoPE1D(nn.Module): + def __init__(self, dim, max_pos=1024, max_period=10000.0): + super().__init__() + self.max_period = max_period + self.dim = dim + self.max_pos = max_pos + freq = get_freqs(dim // 2, max_period) + pos = torch.arange(max_pos, dtype=freq.dtype) + self.register_buffer("args", torch.outer(pos, freq), persistent=False) + + def forward(self, pos): + args = self.args[pos] + cosine = torch.cos(args) + sine = torch.sin(args) + rope = torch.stack([cosine, -sine, sine, cosine], dim=-1) + rope = rope.view(*rope.shape[:-1], 2, 2) + return rope.unsqueeze(-4) + + +class Kandinsky5RoPE3D(nn.Module): + def __init__(self, axes_dims, max_pos=(128, 128, 128), max_period=10000.0): + super().__init__() + self.axes_dims = axes_dims + self.max_pos = max_pos + self.max_period = max_period + + for i, (axes_dim, ax_max_pos) in enumerate(zip(axes_dims, max_pos)): + freq = get_freqs(axes_dim // 2, max_period) + pos = torch.arange(ax_max_pos, dtype=freq.dtype) + self.register_buffer(f"args_{i}", torch.outer(pos, freq), persistent=False) + + def forward(self, shape, pos, scale_factor=(1.0, 1.0, 1.0)): + batch_size, duration, height, width = shape + args_t = self.args_0[pos[0]] / scale_factor[0] + args_h = self.args_1[pos[1]] / scale_factor[1] + args_w = self.args_2[pos[2]] / scale_factor[2] + + args = torch.cat( + [ + args_t.view(1, duration, 1, 1, -1).repeat(batch_size, 1, height, width, 1), + args_h.view(1, 1, height, 1, -1).repeat(batch_size, duration, 1, width, 1), + args_w.view(1, 1, 1, width, -1).repeat(batch_size, duration, height, 1, 1), + ], + dim=-1, + ) + cosine = torch.cos(args) + sine = torch.sin(args) + rope = torch.stack([cosine, -sine, sine, cosine], dim=-1) + rope = rope.view(*rope.shape[:-1], 2, 2) + return rope.unsqueeze(-4) + + +class Kandinsky5Modulation(nn.Module): + def __init__(self, time_dim, model_dim, num_params): + super().__init__() + self.activation = nn.SiLU() + self.out_layer = nn.Linear(time_dim, num_params * model_dim) + self.out_layer.weight.data.zero_() + self.out_layer.bias.data.zero_() + + def forward(self, x): + return self.out_layer(self.activation(x)) + + +class Kandinsky5AttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + + def __call__(self, attn, hidden_states, encoder_hidden_states=None, rotary_emb=None, sparse_params=None): + # query, key, value = self.get_qkv(x) + query = attn.to_query(hidden_states) + + if encoder_hidden_states is not None: + key = attn.to_key(encoder_hidden_states) + value = attn.to_value(encoder_hidden_states) + + shape, cond_shape = query.shape[:-1], key.shape[:-1] + query = query.reshape(*shape, attn.num_heads, -1) + key = key.reshape(*cond_shape, attn.num_heads, -1) + value = value.reshape(*cond_shape, attn.num_heads, -1) + + else: + key = attn.to_key(hidden_states) + value = attn.to_value(hidden_states) + + shape = query.shape[:-1] + query = query.reshape(*shape, attn.num_heads, -1) + key = key.reshape(*shape, attn.num_heads, -1) + value = value.reshape(*shape, attn.num_heads, -1) + + # query, key = self.norm_qk(query, key) + query = attn.query_norm(query.float()).type_as(query) + key = attn.key_norm(key.float()).type_as(key) + + def apply_rotary(x, rope): + x_ = x.reshape(*x.shape[:-1], -1, 1, 2).to(torch.float32) + x_out = (rope * x_).sum(dim=-1) + return x_out.reshape(*x.shape).to(torch.bfloat16) + + if rotary_emb is not None: + query = apply_rotary(query, rotary_emb).type_as(query) + key = apply_rotary(key, rotary_emb).type_as(key) + + if sparse_params is not None: + attn_mask = nablaT_v2( + query, + key, + sparse_params["sta_mask"], + thr=sparse_params["P"], + ) + + else: + attn_mask = None + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attn_mask, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + + hidden_states = hidden_states.flatten(-2, -1) + + attn_out = attn.out_layer(hidden_states) + return attn_out + + +class Kandinsky5Attention(nn.Module, AttentionModuleMixin): + _default_processor_cls = Kandinsky5AttnProcessor + _available_processors = [ + Kandinsky5AttnProcessor, + ] + + def __init__(self, num_channels, head_dim, processor=None): + super().__init__() + assert num_channels % head_dim == 0 + self.num_heads = num_channels // head_dim + + self.to_query = nn.Linear(num_channels, num_channels, bias=True) + self.to_key = nn.Linear(num_channels, num_channels, bias=True) + self.to_value = nn.Linear(num_channels, num_channels, bias=True) + self.query_norm = nn.RMSNorm(head_dim) + self.key_norm = nn.RMSNorm(head_dim) + + self.out_layer = nn.Linear(num_channels, num_channels, bias=True) + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + sparse_params: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + quiet_attn_parameters = {} + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters and k not in quiet_attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"attention_processor_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + + return self.processor( + self, + hidden_states, + encoder_hidden_states=encoder_hidden_states, + sparse_params=sparse_params, + rotary_emb=rotary_emb, + **kwargs, + ) + + +class Kandinsky5FeedForward(nn.Module): + def __init__(self, dim, ff_dim): + super().__init__() + self.in_layer = nn.Linear(dim, ff_dim, bias=False) + self.activation = nn.GELU() + self.out_layer = nn.Linear(ff_dim, dim, bias=False) + + def forward(self, x): + return self.out_layer(self.activation(self.in_layer(x))) + + +class Kandinsky5OutLayer(nn.Module): + def __init__(self, model_dim, time_dim, visual_dim, patch_size): + super().__init__() + self.patch_size = patch_size + self.modulation = Kandinsky5Modulation(time_dim, model_dim, 2) + self.norm = nn.LayerNorm(model_dim, elementwise_affine=False) + self.out_layer = nn.Linear(model_dim, math.prod(patch_size) * visual_dim, bias=True) + + def forward(self, visual_embed, text_embed, time_embed): + shift, scale = torch.chunk(self.modulation(time_embed).unsqueeze(dim=1), 2, dim=-1) + + visual_embed = ( + self.norm(visual_embed.float()) * (scale.float()[:, None, None] + 1.0) + shift.float()[:, None, None] + ).type_as(visual_embed) + + x = self.out_layer(visual_embed) + + batch_size, duration, height, width, _ = x.shape + x = ( + x.view( + batch_size, + duration, + height, + width, + -1, + self.patch_size[0], + self.patch_size[1], + self.patch_size[2], + ) + .permute(0, 1, 5, 2, 6, 3, 7, 4) + .flatten(1, 2) + .flatten(2, 3) + .flatten(3, 4) + ) + return x + + +class Kandinsky5TransformerEncoderBlock(nn.Module): + def __init__(self, model_dim, time_dim, ff_dim, head_dim): + super().__init__() + self.text_modulation = Kandinsky5Modulation(time_dim, model_dim, 6) + + self.self_attention_norm = nn.LayerNorm(model_dim, elementwise_affine=False) + self.self_attention = Kandinsky5Attention(model_dim, head_dim, processor=Kandinsky5AttnProcessor()) + + self.feed_forward_norm = nn.LayerNorm(model_dim, elementwise_affine=False) + self.feed_forward = Kandinsky5FeedForward(model_dim, ff_dim) + + def forward(self, x, time_embed, rope): + self_attn_params, ff_params = torch.chunk(self.text_modulation(time_embed).unsqueeze(dim=1), 2, dim=-1) + shift, scale, gate = torch.chunk(self_attn_params, 3, dim=-1) + out = (self.self_attention_norm(x.float()) * (scale.float() + 1.0) + shift.float()).type_as(x) + out = self.self_attention(out, rotary_emb=rope) + x = (x.float() + gate.float() * out.float()).type_as(x) + + shift, scale, gate = torch.chunk(ff_params, 3, dim=-1) + out = (self.feed_forward_norm(x.float()) * (scale.float() + 1.0) + shift.float()).type_as(x) + out = self.feed_forward(out) + x = (x.float() + gate.float() * out.float()).type_as(x) + + return x + + +class Kandinsky5TransformerDecoderBlock(nn.Module): + def __init__(self, model_dim, time_dim, ff_dim, head_dim): + super().__init__() + self.visual_modulation = Kandinsky5Modulation(time_dim, model_dim, 9) + + self.self_attention_norm = nn.LayerNorm(model_dim, elementwise_affine=False) + self.self_attention = Kandinsky5Attention(model_dim, head_dim, processor=Kandinsky5AttnProcessor()) + + self.cross_attention_norm = nn.LayerNorm(model_dim, elementwise_affine=False) + self.cross_attention = Kandinsky5Attention(model_dim, head_dim, processor=Kandinsky5AttnProcessor()) + + self.feed_forward_norm = nn.LayerNorm(model_dim, elementwise_affine=False) + self.feed_forward = Kandinsky5FeedForward(model_dim, ff_dim) + + def forward(self, visual_embed, text_embed, time_embed, rope, sparse_params): + self_attn_params, cross_attn_params, ff_params = torch.chunk( + self.visual_modulation(time_embed).unsqueeze(dim=1), 3, dim=-1 + ) + + shift, scale, gate = torch.chunk(self_attn_params, 3, dim=-1) + visual_out = (self.self_attention_norm(visual_embed.float()) * (scale.float() + 1.0) + shift.float()).type_as( + visual_embed + ) + visual_out = self.self_attention(visual_out, rotary_emb=rope, sparse_params=sparse_params) + visual_embed = (visual_embed.float() + gate.float() * visual_out.float()).type_as(visual_embed) + + shift, scale, gate = torch.chunk(cross_attn_params, 3, dim=-1) + visual_out = (self.cross_attention_norm(visual_embed.float()) * (scale.float() + 1.0) + shift.float()).type_as( + visual_embed + ) + visual_out = self.cross_attention(visual_out, encoder_hidden_states=text_embed) + visual_embed = (visual_embed.float() + gate.float() * visual_out.float()).type_as(visual_embed) + + shift, scale, gate = torch.chunk(ff_params, 3, dim=-1) + visual_out = (self.feed_forward_norm(visual_embed.float()) * (scale.float() + 1.0) + shift.float()).type_as( + visual_embed + ) + visual_out = self.feed_forward(visual_out) + visual_embed = (visual_embed.float() + gate.float() * visual_out.float()).type_as(visual_embed) + + return visual_embed + + +class Kandinsky5Transformer3DModel( + ModelMixin, + ConfigMixin, + PeftAdapterMixin, + FromOriginalModelMixin, + CacheMixin, + AttentionMixin, +): + """ + A 3D Diffusion Transformer model for video-like data. + """ + + _repeated_blocks = [ + "Kandinsky5TransformerEncoderBlock", + "Kandinsky5TransformerDecoderBlock", + ] + _keep_in_fp32_modules = ["time_embeddings", "modulation", "visual_modulation", "text_modulation"] + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + in_visual_dim=4, + in_text_dim=3584, + in_text_dim2=768, + time_dim=512, + out_visual_dim=4, + patch_size=(1, 2, 2), + model_dim=2048, + ff_dim=5120, + num_text_blocks=2, + num_visual_blocks=32, + axes_dims=(16, 24, 24), + visual_cond=False, + attention_type: str = "regular", + attention_causal: bool = None, + attention_local: bool = None, + attention_glob: bool = None, + attention_window: int = None, + attention_P: float = None, + attention_wT: int = None, + attention_wW: int = None, + attention_wH: int = None, + attention_add_sta: bool = None, + attention_method: str = None, + ): + super().__init__() + + head_dim = sum(axes_dims) + self.in_visual_dim = in_visual_dim + self.model_dim = model_dim + self.patch_size = patch_size + self.visual_cond = visual_cond + self.attention_type = attention_type + + visual_embed_dim = 2 * in_visual_dim + 1 if visual_cond else in_visual_dim + + # Initialize embeddings + self.time_embeddings = Kandinsky5TimeEmbeddings(model_dim, time_dim) + self.text_embeddings = Kandinsky5TextEmbeddings(in_text_dim, model_dim) + self.pooled_text_embeddings = Kandinsky5TextEmbeddings(in_text_dim2, time_dim) + self.visual_embeddings = Kandinsky5VisualEmbeddings(visual_embed_dim, model_dim, patch_size) + + # Initialize positional embeddings + self.text_rope_embeddings = Kandinsky5RoPE1D(head_dim) + self.visual_rope_embeddings = Kandinsky5RoPE3D(axes_dims) + + # Initialize transformer blocks + self.text_transformer_blocks = nn.ModuleList( + [Kandinsky5TransformerEncoderBlock(model_dim, time_dim, ff_dim, head_dim) for _ in range(num_text_blocks)] + ) + + self.visual_transformer_blocks = nn.ModuleList( + [ + Kandinsky5TransformerDecoderBlock(model_dim, time_dim, ff_dim, head_dim) + for _ in range(num_visual_blocks) + ] + ) + + # Initialize output layer + self.out_layer = Kandinsky5OutLayer(model_dim, time_dim, out_visual_dim, patch_size) + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, # x + encoder_hidden_states: torch.Tensor, # text_embed + timestep: torch.Tensor, # time + pooled_projections: torch.Tensor, # pooled_text_embed + visual_rope_pos: tuple[int, int, int], + text_rope_pos: torch.LongTensor, + scale_factor: tuple[float, float, float] = (1.0, 1.0, 1.0), + sparse_params: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> Transformer2DModelOutput | torch.FloatTensor: + """ + Forward pass of the Kandinsky5 3D Transformer. + + Args: + hidden_states (`torch.FloatTensor`): Input visual states + encoder_hidden_states (`torch.FloatTensor`): Text embeddings + timestep (`torch.Tensor` or `float` or `int`): Current timestep + pooled_projections (`torch.FloatTensor`): Pooled text embeddings + visual_rope_pos (`tuple[int, int, int]`): Position for visual RoPE + text_rope_pos (`torch.LongTensor`): Position for text RoPE + scale_factor (`tuple[float, float, float]`, optional): Scale factor for RoPE + sparse_params (`dict[str, Any]`, optional): Parameters for sparse attention + return_dict (`bool`, optional): Whether to return a dictionary + + Returns: + [`~models.transformer_2d.Transformer2DModelOutput`] or `torch.FloatTensor`: The output of the transformer + """ + x = hidden_states + text_embed = encoder_hidden_states + time = timestep + pooled_text_embed = pooled_projections + + text_embed = self.text_embeddings(text_embed) + time_embed = self.time_embeddings(time) + time_embed = time_embed + self.pooled_text_embeddings(pooled_text_embed) + visual_embed = self.visual_embeddings(x) + text_rope = self.text_rope_embeddings(text_rope_pos) + text_rope = text_rope.unsqueeze(dim=0) + + for text_transformer_block in self.text_transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + text_embed = self._gradient_checkpointing_func( + text_transformer_block, text_embed, time_embed, text_rope + ) + else: + text_embed = text_transformer_block(text_embed, time_embed, text_rope) + + visual_shape = visual_embed.shape[:-1] + visual_rope = self.visual_rope_embeddings(visual_shape, visual_rope_pos, scale_factor) + to_fractal = sparse_params["to_fractal"] if sparse_params is not None else False + visual_embed, visual_rope = fractal_flatten(visual_embed, visual_rope, visual_shape, block_mask=to_fractal) + + for visual_transformer_block in self.visual_transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + visual_embed = self._gradient_checkpointing_func( + visual_transformer_block, + visual_embed, + text_embed, + time_embed, + visual_rope, + sparse_params, + ) + else: + visual_embed = visual_transformer_block( + visual_embed, text_embed, time_embed, visual_rope, sparse_params + ) + + visual_embed = fractal_unflatten(visual_embed, visual_shape, block_mask=to_fractal) + x = self.out_layer(visual_embed, text_embed, time_embed) + + if not return_dict: + return x + + return Transformer2DModelOutput(sample=x) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_longcat_image.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_longcat_image.py new file mode 100644 index 0000000000000000000000000000000000000000..7a000fa2b2ce29291d75305b9e13e5268ba4e782 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_longcat_image.py @@ -0,0 +1,546 @@ +# Copyright 2025 MeiTuan LongCat-Image Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import TimestepEmbedding, Timesteps, apply_rotary_emb, get_1d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _get_projections(attn: "LongCatImageAttention", hidden_states, encoder_hidden_states=None): + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + encoder_query = encoder_key = encoder_value = None + if encoder_hidden_states is not None and attn.added_kv_proj_dim is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_fused_projections(attn: "LongCatImageAttention", hidden_states, encoder_hidden_states=None): + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + + encoder_query = encoder_key = encoder_value = (None,) + if encoder_hidden_states is not None and hasattr(attn, "to_added_qkv"): + encoder_query, encoder_key, encoder_value = attn.to_added_qkv(encoder_hidden_states).chunk(3, dim=-1) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_qkv_projections(attn: "LongCatImageAttention", hidden_states, encoder_hidden_states=None): + if attn.fused_projections: + return _get_fused_projections(attn, hidden_states, encoder_hidden_states) + return _get_projections(attn, hidden_states, encoder_hidden_states) + + +class LongCatImageAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + + def __call__( + self, + attn: "LongCatImageAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( + attn, hidden_states, encoder_hidden_states + ) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if attn.added_kv_proj_dim is not None: + encoder_query = encoder_query.unflatten(-1, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(-1, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(-1, (attn.heads, -1)) + + encoder_query = attn.norm_added_q(encoder_query) + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([encoder_query, query], dim=1) + key = torch.cat([encoder_key, key], dim=1) + value = torch.cat([encoder_value, value], dim=1) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( + [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 + ) + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class LongCatImageAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = LongCatImageAttnProcessor + _available_processors = [ + LongCatImageAttnProcessor, + ] + + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + added_kv_proj_dim: int | None = None, + added_proj_bias: bool | None = True, + out_bias: bool = True, + eps: float = 1e-5, + out_dim: int = None, + context_pre_only: bool | None = None, + pre_only: bool = False, + elementwise_affine: bool = True, + processor=None, + ): + super().__init__() + + self.head_dim = dim_head + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.query_dim = query_dim + self.use_bias = bias + self.dropout = dropout + self.out_dim = out_dim if out_dim is not None else query_dim + self.context_pre_only = context_pre_only + self.pre_only = pre_only + self.heads = out_dim // dim_head if out_dim is not None else heads + self.added_kv_proj_dim = added_kv_proj_dim + self.added_proj_bias = added_proj_bias + + self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_v = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + + if not self.pre_only: + self.to_out = torch.nn.ModuleList([]) + self.to_out.append(torch.nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(torch.nn.Dropout(dropout)) + + if added_kv_proj_dim is not None: + self.norm_added_q = torch.nn.RMSNorm(dim_head, eps=eps) + self.norm_added_k = torch.nn.RMSNorm(dim_head, eps=eps) + self.add_q_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.to_add_out = torch.nn.Linear(self.inner_dim, query_dim, bias=out_bias) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + quiet_attn_parameters = {"ip_adapter_masks", "ip_hidden_states"} + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters and k not in quiet_attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"joint_attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs) + + +@maybe_allow_in_graph +class LongCatImageSingleTransformerBlock(nn.Module): + def __init__(self, dim: int, num_attention_heads: int, attention_head_dim: int, mlp_ratio: float = 4.0): + super().__init__() + self.mlp_hidden_dim = int(dim * mlp_ratio) + + self.norm = AdaLayerNormZeroSingle(dim) + self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim) + self.act_mlp = nn.GELU(approximate="tanh") + self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim) + + self.attn = LongCatImageAttention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + processor=LongCatImageAttnProcessor(), + eps=1e-6, + pre_only=True, + ) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_len = encoder_hidden_states.shape[1] + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + residual = hidden_states + norm_hidden_states, gate = self.norm(hidden_states, emb=temb) + mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states)) + joint_attention_kwargs = joint_attention_kwargs or {} + attn_output = self.attn( + hidden_states=norm_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + gate = gate.unsqueeze(1) + hidden_states = gate * self.proj_out(hidden_states) + hidden_states = residual + hidden_states + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + encoder_hidden_states, hidden_states = hidden_states[:, :text_seq_len], hidden_states[:, text_seq_len:] + return encoder_hidden_states, hidden_states + + +@maybe_allow_in_graph +class LongCatImageTransformerBlock(nn.Module): + def __init__( + self, dim: int, num_attention_heads: int, attention_head_dim: int, qk_norm: str = "rms_norm", eps: float = 1e-6 + ): + super().__init__() + + self.norm1 = AdaLayerNormZero(dim) + self.norm1_context = AdaLayerNormZero(dim) + + self.attn = LongCatImageAttention( + query_dim=dim, + added_kv_proj_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=False, + bias=True, + processor=LongCatImageAttnProcessor(), + eps=eps, + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + joint_attention_kwargs = joint_attention_kwargs or {} + + # Attention. + attention_outputs = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + if len(attention_outputs) == 2: + attn_output, context_attn_output = attention_outputs + elif len(attention_outputs) == 3: + attn_output, context_attn_output, ip_attn_output = attention_outputs + + # Process attention outputs for the `hidden_states`. + attn_output = gate_msa.unsqueeze(1) * attn_output + hidden_states = hidden_states + attn_output + + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp.unsqueeze(1) * ff_output + + hidden_states = hidden_states + ff_output + if len(attention_outputs) == 3: + hidden_states = hidden_states + ip_attn_output + + # Process attention outputs for the `encoder_hidden_states`. + context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output + encoder_hidden_states = encoder_hidden_states + context_attn_output + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + + return encoder_hidden_states, hidden_states + + +class LongCatImagePosEmbed(nn.Module): + def __init__(self, theta: int, axes_dim: list[int]): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + n_axes = ids.shape[-1] + cos_out = [] + sin_out = [] + pos = ids.float() + is_mps = ids.device.type == "mps" + is_npu = ids.device.type == "npu" + freqs_dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + for i in range(n_axes): + cos, sin = get_1d_rotary_pos_embed( + self.axes_dim[i], + pos[:, i], + theta=self.theta, + repeat_interleave_real=True, + use_real=True, + freqs_dtype=freqs_dtype, + ) + cos_out.append(cos) + sin_out.append(sin) + freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device) + freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device) + return freqs_cos, freqs_sin + + +class LongCatImageTimestepEmbeddings(nn.Module): + def __init__(self, embedding_dim): + super().__init__() + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + def forward(self, timestep, hidden_dtype): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, D) + + return timesteps_emb + + +class LongCatImageTransformer2DModel( + ModelMixin, + ConfigMixin, + PeftAdapterMixin, + FromOriginalModelMixin, + CacheMixin, + AttentionMixin, +): + """ + The Transformer model introduced in Longcat-Image. + """ + + _supports_gradient_checkpointing = True + _repeated_blocks = ["LongCatImageTransformerBlock", "LongCatImageSingleTransformerBlock"] + + @register_to_config + def __init__( + self, + patch_size: int = 1, + in_channels: int = 64, + num_layers: int = 19, + num_single_layers: int = 38, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 3584, + pooled_projection_dim: int = 3584, + axes_dims_rope: list[int] = [16, 56, 56], + ): + super().__init__() + self.out_channels = in_channels + self.inner_dim = num_attention_heads * attention_head_dim + self.pooled_projection_dim = pooled_projection_dim + + self.pos_embed = LongCatImagePosEmbed(theta=10000, axes_dim=axes_dims_rope) + + self.time_embed = LongCatImageTimestepEmbeddings(embedding_dim=self.inner_dim) + + self.context_embedder = nn.Linear(joint_attention_dim, self.inner_dim) + self.x_embedder = torch.nn.Linear(in_channels, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + LongCatImageTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for i in range(num_layers) + ] + ) + + self.single_transformer_blocks = nn.ModuleList( + [ + LongCatImageSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for i in range(num_single_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + self.use_checkpoint = [True] * num_layers + self.use_single_checkpoint = [True] * num_single_layers + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + guidance: torch.Tensor = None, + return_dict: bool = True, + ) -> torch.FloatTensor | Transformer2DModelOutput: + """ + The forward method. + + Args: + hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`): + Input `hidden_states`. + encoder_hidden_states (`torch.FloatTensor` of shape `(batch size, sequence_len, embed_dims)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + block_controlnet_hidden_states: (`list` of `torch.Tensor`): + A list of tensors that if specified are added to the residuals of transformer blocks. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + hidden_states = self.x_embedder(hidden_states) + + timestep = timestep.to(hidden_states.dtype) * 1000 + + temb = self.time_embed(timestep, hidden_states.dtype) + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + ids = torch.cat((txt_ids, img_ids), dim=0) + image_rotary_emb = self.pos_embed(ids) + + for index_block, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing and self.use_checkpoint[index_block]: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + ) + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + + for index_block, block in enumerate(self.single_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing and self.use_single_checkpoint[index_block]: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + ) + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + + hidden_states = self.norm_out(hidden_states, temb) + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ltx.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ltx.py new file mode 100644 index 0000000000000000000000000000000000000000..0034d636761b8e175eec414e254695f8935316d8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ltx.py @@ -0,0 +1,567 @@ +# Copyright 2025 The Lightricks team and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import math +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, deprecate, is_torch_version, logging +from ...utils.torch_utils import maybe_allow_in_graph +from .._modeling_parallel import ContextParallelInput, ContextParallelOutput +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import PixArtAlphaTextProjection +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormSingle, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class LTXVideoAttentionProcessor2_0: + def __new__(cls, *args, **kwargs): + deprecation_message = "`LTXVideoAttentionProcessor2_0` is deprecated and this will be removed in a future version. Please use `LTXVideoAttnProcessor`" + deprecate("LTXVideoAttentionProcessor2_0", "1.0.0", deprecation_message) + + return LTXVideoAttnProcessor(*args, **kwargs) + + +class LTXVideoAttnProcessor: + r""" + Processor for implementing attention (SDPA is used by default if you're using PyTorch 2.0). This is used in the LTX + model. It applies a normalization layer and rotary embedding on the query and key vector. + """ + + _attention_backend = None + _parallel_config = None + + def __init__(self): + if is_torch_version("<", "2.0"): + raise ValueError( + "LTX attention processors require a minimum PyTorch version of 2.0. Please upgrade your PyTorch installation." + ) + + def __call__( + self, + attn: "LTXAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb) + key = apply_rotary_emb(key, image_rotary_emb) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +class LTXAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = LTXVideoAttnProcessor + _available_processors = [LTXVideoAttnProcessor] + + def __init__( + self, + query_dim: int, + heads: int = 8, + kv_heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = True, + cross_attention_dim: int | None = None, + out_bias: bool = True, + qk_norm: str = "rms_norm_across_heads", + processor=None, + ): + super().__init__() + if qk_norm != "rms_norm_across_heads": + raise NotImplementedError("Only 'rms_norm_across_heads' is supported as a valid value for `qk_norm`.") + + self.head_dim = dim_head + self.inner_dim = dim_head * heads + self.inner_kv_dim = self.inner_dim if kv_heads is None else dim_head * kv_heads + self.query_dim = query_dim + self.cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim + self.use_bias = bias + self.dropout = dropout + self.out_dim = query_dim + self.heads = heads + + norm_eps = 1e-5 + norm_elementwise_affine = True + self.norm_q = torch.nn.RMSNorm(dim_head * heads, eps=norm_eps, elementwise_affine=norm_elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head * kv_heads, eps=norm_eps, elementwise_affine=norm_elementwise_affine) + self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(self.cross_attention_dim, self.inner_kv_dim, bias=bias) + self.to_v = torch.nn.Linear(self.cross_attention_dim, self.inner_kv_dim, bias=bias) + self.to_out = torch.nn.ModuleList([]) + self.to_out.append(torch.nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(torch.nn.Dropout(dropout)) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs) + + +class LTXVideoRotaryPosEmbed(nn.Module): + def __init__( + self, + dim: int, + base_num_frames: int = 20, + base_height: int = 2048, + base_width: int = 2048, + patch_size: int = 1, + patch_size_t: int = 1, + theta: float = 10000.0, + ) -> None: + super().__init__() + + self.dim = dim + self.base_num_frames = base_num_frames + self.base_height = base_height + self.base_width = base_width + self.patch_size = patch_size + self.patch_size_t = patch_size_t + self.theta = theta + + def _prepare_video_coords( + self, + batch_size: int, + num_frames: int, + height: int, + width: int, + rope_interpolation_scale: tuple[torch.Tensor, float, float], + device: torch.device, + ) -> torch.Tensor: + # Always compute rope in fp32 + grid_h = torch.arange(height, dtype=torch.float32, device=device) + grid_w = torch.arange(width, dtype=torch.float32, device=device) + grid_f = torch.arange(num_frames, dtype=torch.float32, device=device) + grid = torch.meshgrid(grid_f, grid_h, grid_w, indexing="ij") + grid = torch.stack(grid, dim=0) + grid = grid.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1) + + if rope_interpolation_scale is not None: + grid[:, 0:1] = grid[:, 0:1] * rope_interpolation_scale[0] * self.patch_size_t / self.base_num_frames + grid[:, 1:2] = grid[:, 1:2] * rope_interpolation_scale[1] * self.patch_size / self.base_height + grid[:, 2:3] = grid[:, 2:3] * rope_interpolation_scale[2] * self.patch_size / self.base_width + + grid = grid.flatten(2, 4).transpose(1, 2) + + return grid + + def forward( + self, + hidden_states: torch.Tensor, + num_frames: int | None = None, + height: int | None = None, + width: int | None = None, + rope_interpolation_scale: tuple[torch.Tensor, float, float] | None = None, + video_coords: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + batch_size = hidden_states.size(0) + + if video_coords is None: + grid = self._prepare_video_coords( + batch_size, + num_frames, + height, + width, + rope_interpolation_scale=rope_interpolation_scale, + device=hidden_states.device, + ) + else: + grid = torch.stack( + [ + video_coords[:, 0] / self.base_num_frames, + video_coords[:, 1] / self.base_height, + video_coords[:, 2] / self.base_width, + ], + dim=-1, + ) + + start = 1.0 + end = self.theta + freqs = self.theta ** torch.linspace( + math.log(start, self.theta), + math.log(end, self.theta), + self.dim // 6, + device=hidden_states.device, + dtype=torch.float32, + ) + freqs = freqs * math.pi / 2.0 + freqs = freqs * (grid.unsqueeze(-1) * 2 - 1) + freqs = freqs.transpose(-1, -2).flatten(2) + + cos_freqs = freqs.cos().repeat_interleave(2, dim=-1) + sin_freqs = freqs.sin().repeat_interleave(2, dim=-1) + + if self.dim % 6 != 0: + cos_padding = torch.ones_like(cos_freqs[:, :, : self.dim % 6]) + sin_padding = torch.zeros_like(cos_freqs[:, :, : self.dim % 6]) + cos_freqs = torch.cat([cos_padding, cos_freqs], dim=-1) + sin_freqs = torch.cat([sin_padding, sin_freqs], dim=-1) + + return cos_freqs, sin_freqs + + +@maybe_allow_in_graph +class LTXVideoTransformerBlock(nn.Module): + r""" + Transformer block used in [LTX](https://huggingface.co/Lightricks/LTX-Video). + + Args: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`): + The number of channels in each head. + qk_norm (`str`, defaults to `"rms_norm"`): + The normalization layer to use. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to use in feed-forward. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + cross_attention_dim: int, + qk_norm: str = "rms_norm_across_heads", + activation_fn: str = "gelu-approximate", + attention_bias: bool = True, + attention_out_bias: bool = True, + eps: float = 1e-6, + elementwise_affine: bool = False, + ): + super().__init__() + + self.norm1 = RMSNorm(dim, eps=eps, elementwise_affine=elementwise_affine) + self.attn1 = LTXAttention( + query_dim=dim, + heads=num_attention_heads, + kv_heads=num_attention_heads, + dim_head=attention_head_dim, + bias=attention_bias, + cross_attention_dim=None, + out_bias=attention_out_bias, + qk_norm=qk_norm, + ) + + self.norm2 = RMSNorm(dim, eps=eps, elementwise_affine=elementwise_affine) + self.attn2 = LTXAttention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + kv_heads=num_attention_heads, + dim_head=attention_head_dim, + bias=attention_bias, + out_bias=attention_out_bias, + qk_norm=qk_norm, + ) + + self.ff = FeedForward(dim, activation_fn=activation_fn) + + self.scale_shift_table = nn.Parameter(torch.randn(6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + encoder_attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size = hidden_states.size(0) + norm_hidden_states = self.norm1(hidden_states) + + num_ada_params = self.scale_shift_table.shape[0] + ada_values = self.scale_shift_table[None, None].to(temb.device) + temb.reshape( + batch_size, temb.size(1), num_ada_params, -1 + ) + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = ada_values.unbind(dim=2) + norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa + + attn_hidden_states = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=None, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = hidden_states + attn_hidden_states * gate_msa + + attn_hidden_states = self.attn2( + hidden_states, + encoder_hidden_states=encoder_hidden_states, + image_rotary_emb=None, + attention_mask=encoder_attention_mask, + ) + hidden_states = hidden_states + attn_hidden_states + norm_hidden_states = self.norm2(hidden_states) * (1 + scale_mlp) + shift_mlp + + ff_output = self.ff(norm_hidden_states) + hidden_states = hidden_states + ff_output * gate_mlp + + return hidden_states + + +@maybe_allow_in_graph +class LTXVideoTransformer3DModel( + ModelMixin, ConfigMixin, AttentionMixin, FromOriginalModelMixin, PeftAdapterMixin, CacheMixin +): + r""" + A Transformer model for video-like data used in [LTX](https://huggingface.co/Lightricks/LTX-Video). + + Args: + in_channels (`int`, defaults to `128`): + The number of channels in the input. + out_channels (`int`, defaults to `128`): + The number of channels in the output. + patch_size (`int`, defaults to `1`): + The size of the spatial patches to use in the patch embedding layer. + patch_size_t (`int`, defaults to `1`): + The size of the tmeporal patches to use in the patch embedding layer. + num_attention_heads (`int`, defaults to `32`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `64`): + The number of channels in each head. + cross_attention_dim (`int`, defaults to `2048 `): + The number of channels for cross attention heads. + num_layers (`int`, defaults to `28`): + The number of layers of Transformer blocks to use. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to use in feed-forward. + qk_norm (`str`, defaults to `"rms_norm_across_heads"`): + The normalization layer to use. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["norm"] + _repeated_blocks = ["LTXVideoTransformerBlock"] + _cp_plan = { + "": { + "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "encoder_attention_mask": ContextParallelInput(split_dim=1, expected_dims=2, split_output=False), + }, + "rope": { + 0: ContextParallelInput(split_dim=1, expected_dims=3, split_output=True), + 1: ContextParallelInput(split_dim=1, expected_dims=3, split_output=True), + }, + "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), + } + + @register_to_config + def __init__( + self, + in_channels: int = 128, + out_channels: int = 128, + patch_size: int = 1, + patch_size_t: int = 1, + num_attention_heads: int = 32, + attention_head_dim: int = 64, + cross_attention_dim: int = 2048, + num_layers: int = 28, + activation_fn: str = "gelu-approximate", + qk_norm: str = "rms_norm_across_heads", + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + caption_channels: int = 4096, + attention_bias: bool = True, + attention_out_bias: bool = True, + ) -> None: + super().__init__() + + out_channels = out_channels or in_channels + inner_dim = num_attention_heads * attention_head_dim + + self.proj_in = nn.Linear(in_channels, inner_dim) + + self.scale_shift_table = nn.Parameter(torch.randn(2, inner_dim) / inner_dim**0.5) + self.time_embed = AdaLayerNormSingle(inner_dim, use_additional_conditions=False) + + self.caption_projection = PixArtAlphaTextProjection(in_features=caption_channels, hidden_size=inner_dim) + + self.rope = LTXVideoRotaryPosEmbed( + dim=inner_dim, + base_num_frames=20, + base_height=2048, + base_width=2048, + patch_size=patch_size, + patch_size_t=patch_size_t, + theta=10000.0, + ) + + self.transformer_blocks = nn.ModuleList( + [ + LTXVideoTransformerBlock( + dim=inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + cross_attention_dim=cross_attention_dim, + qk_norm=qk_norm, + activation_fn=activation_fn, + attention_bias=attention_bias, + attention_out_bias=attention_out_bias, + eps=norm_eps, + elementwise_affine=norm_elementwise_affine, + ) + for _ in range(num_layers) + ] + ) + + self.norm_out = nn.LayerNorm(inner_dim, eps=1e-6, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_attention_mask: torch.Tensor, + num_frames: int | None = None, + height: int | None = None, + width: int | None = None, + rope_interpolation_scale: tuple[float, float, float] | torch.Tensor | None = None, + video_coords: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.Tensor: + image_rotary_emb = self.rope(hidden_states, num_frames, height, width, rope_interpolation_scale, video_coords) + + # convert encoder_attention_mask to a bias the same way we do for attention_mask + if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2: + encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0 + encoder_attention_mask = encoder_attention_mask.unsqueeze(1) + + batch_size = hidden_states.size(0) + hidden_states = self.proj_in(hidden_states) + + temb, embedded_timestep = self.time_embed( + timestep.flatten(), + batch_size=batch_size, + hidden_dtype=hidden_states.dtype, + ) + + temb = temb.view(batch_size, -1, temb.size(-1)) + embedded_timestep = embedded_timestep.view(batch_size, -1, embedded_timestep.size(-1)) + + encoder_hidden_states = self.caption_projection(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.size(-1)) + + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + encoder_attention_mask, + ) + else: + hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + encoder_attention_mask=encoder_attention_mask, + ) + + scale_shift_values = self.scale_shift_table[None, None] + embedded_timestep[:, :, None] + shift, scale = scale_shift_values[:, :, 0], scale_shift_values[:, :, 1] + + hidden_states = self.norm_out(hidden_states) + hidden_states = hidden_states * (1 + scale) + shift + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) + + +def apply_rotary_emb(x, freqs): + cos, sin = freqs + x_real, x_imag = x.unflatten(2, (-1, 2)).unbind(-1) # [B, S, C // 2] + x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(2) + out = (x.float() * cos + x_rotated.float() * sin).to(x.dtype) + return out diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ltx2.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ltx2.py new file mode 100644 index 0000000000000000000000000000000000000000..db949ca34a1fe7ee97754561c54cb473caa12c3f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ltx2.py @@ -0,0 +1,1323 @@ +# Copyright 2025 The Lightricks team and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from dataclasses import dataclass +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import BaseOutput, apply_lora_scale, is_torch_version, logging +from .._modeling_parallel import ContextParallelInput, ContextParallelOutput +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import PixArtAlphaCombinedTimestepSizeEmbeddings, PixArtAlphaTextProjection +from ..modeling_utils import ModelMixin +from ..normalization import RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def apply_interleaved_rotary_emb(x: torch.Tensor, freqs: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor: + cos, sin = freqs + x_real, x_imag = x.unflatten(2, (-1, 2)).unbind(-1) # [B, S, C // 2] + x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(2) + out = (x.float() * cos + x_rotated.float() * sin).to(x.dtype) + return out + + +def apply_split_rotary_emb(x: torch.Tensor, freqs: tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor: + cos, sin = freqs + + x_dtype = x.dtype + needs_reshape = False + if x.ndim != 4 and cos.ndim == 4: + # cos is (b, h, t, r) -> reshape x to (b, h, t, dim_per_head) + b, h, t, _ = cos.shape + x = x.reshape(b, t, h, -1).swapaxes(1, 2) + needs_reshape = True + + # Split last dim (2*r) into (d=2, r) + last = x.shape[-1] + if last % 2 != 0: + raise ValueError(f"Expected x.shape[-1] to be even for split rotary, got {last}.") + r = last // 2 + + # (..., 2, r) + split_x = x.reshape(*x.shape[:-1], 2, r).float() # Explicitly upcast to float + first_x = split_x[..., :1, :] # (..., 1, r) + second_x = split_x[..., 1:, :] # (..., 1, r) + + cos_u = cos.unsqueeze(-2) # broadcast to (..., 1, r) against (..., 2, r) + sin_u = sin.unsqueeze(-2) + + out = split_x * cos_u + first_out = out[..., :1, :] + second_out = out[..., 1:, :] + + first_out.addcmul_(-sin_u, second_x) + second_out.addcmul_(sin_u, first_x) + + out = out.reshape(*out.shape[:-2], last) + + if needs_reshape: + out = out.swapaxes(1, 2).reshape(b, t, -1) + + out = out.to(dtype=x_dtype) + return out + + +@dataclass +class AudioVisualModelOutput(BaseOutput): + r""" + Holds the output of an audiovisual model which produces both visual (e.g. video) and audio outputs. + + Args: + sample (`torch.Tensor` of shape `(batch_size, num_channels, num_frames, height, width)`): + The hidden states output conditioned on the `encoder_hidden_states` input, representing the visual output + of the model. This is typically a video (spatiotemporal) output. + audio_sample (`torch.Tensor` of shape `(batch_size, TODO)`): + The audio output of the audiovisual model. + """ + + sample: "torch.Tensor" # noqa: F821 + audio_sample: "torch.Tensor" # noqa: F821 + + +class LTX2AdaLayerNormSingle(nn.Module): + r""" + Norm layer adaptive layer norm single (adaLN-single). + + As proposed in PixArt-Alpha (see: https://huggingface.co/papers/2310.00426; Section 2.3) and adapted by the LTX-2.0 + model. In particular, the number of modulation parameters to be calculated is now configurable. + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + num_mod_params (`int`, *optional*, defaults to `6`): + The number of modulation parameters which will be calculated in the first return argument. The default of 6 + is standard, but sometimes we may want to have a different (usually smaller) number of modulation + parameters. + use_additional_conditions (`bool`, *optional*, defaults to `False`): + Whether to use additional conditions for normalization or not. + """ + + def __init__(self, embedding_dim: int, num_mod_params: int = 6, use_additional_conditions: bool = False): + super().__init__() + self.num_mod_params = num_mod_params + + self.emb = PixArtAlphaCombinedTimestepSizeEmbeddings( + embedding_dim, size_emb_dim=embedding_dim // 3, use_additional_conditions=use_additional_conditions + ) + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, self.num_mod_params * embedding_dim, bias=True) + + def forward( + self, + timestep: torch.Tensor, + added_cond_kwargs: dict[str, torch.Tensor] | None = None, + batch_size: int | None = None, + hidden_dtype: torch.dtype | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + # No modulation happening here. + added_cond_kwargs = added_cond_kwargs or {"resolution": None, "aspect_ratio": None} + embedded_timestep = self.emb(timestep, **added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_dtype) + return self.linear(self.silu(embedded_timestep)), embedded_timestep + + +class LTX2AudioVideoAttnProcessor: + r""" + Processor for implementing attention (SDPA is used by default if you're using PyTorch 2.0) for the LTX-2.0 model. + Compared to the LTX-1.0 model, we allow the RoPE embeddings for the queries and keys to be separate so that we can + support audio-to-video (a2v) and video-to-audio (v2a) cross attention. + """ + + _attention_backend = None + _parallel_config = None + + def __init__(self): + if is_torch_version("<", "2.0"): + raise ValueError( + "LTX attention processors require a minimum PyTorch version of 2.0. Please upgrade your PyTorch installation." + ) + + def __call__( + self, + attn: "LTX2Attention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + query_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + key_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if query_rotary_emb is not None: + if attn.rope_type == "interleaved": + query = apply_interleaved_rotary_emb(query, query_rotary_emb) + key = apply_interleaved_rotary_emb( + key, key_rotary_emb if key_rotary_emb is not None else query_rotary_emb + ) + elif attn.rope_type == "split": + query = apply_split_rotary_emb(query, query_rotary_emb) + key = apply_split_rotary_emb(key, key_rotary_emb if key_rotary_emb is not None else query_rotary_emb) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +class LTX2Attention(torch.nn.Module, AttentionModuleMixin): + r""" + Attention class for all LTX-2.0 attention layers. Compared to LTX-1.0, this supports specifying the query and key + RoPE embeddings separately for audio-to-video (a2v) and video-to-audio (v2a) cross-attention. + """ + + _default_processor_cls = LTX2AudioVideoAttnProcessor + _available_processors = [LTX2AudioVideoAttnProcessor] + + def __init__( + self, + query_dim: int, + heads: int = 8, + kv_heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = True, + cross_attention_dim: int | None = None, + out_bias: bool = True, + qk_norm: str = "rms_norm_across_heads", + norm_eps: float = 1e-6, + norm_elementwise_affine: bool = True, + rope_type: str = "interleaved", + processor=None, + ): + super().__init__() + if qk_norm != "rms_norm_across_heads": + raise NotImplementedError("Only 'rms_norm_across_heads' is supported as a valid value for `qk_norm`.") + + self.head_dim = dim_head + self.inner_dim = dim_head * heads + self.inner_kv_dim = self.inner_dim if kv_heads is None else dim_head * kv_heads + self.query_dim = query_dim + self.cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim + self.use_bias = bias + self.dropout = dropout + self.out_dim = query_dim + self.heads = heads + self.rope_type = rope_type + + self.norm_q = torch.nn.RMSNorm(dim_head * heads, eps=norm_eps, elementwise_affine=norm_elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head * kv_heads, eps=norm_eps, elementwise_affine=norm_elementwise_affine) + self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(self.cross_attention_dim, self.inner_kv_dim, bias=bias) + self.to_v = torch.nn.Linear(self.cross_attention_dim, self.inner_kv_dim, bias=bias) + self.to_out = torch.nn.ModuleList([]) + self.to_out.append(torch.nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(torch.nn.Dropout(dropout)) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + query_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + key_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + hidden_states = self.processor( + self, hidden_states, encoder_hidden_states, attention_mask, query_rotary_emb, key_rotary_emb, **kwargs + ) + return hidden_states + + +class LTX2VideoTransformerBlock(nn.Module): + r""" + Transformer block used in [LTX-2.0](https://huggingface.co/Lightricks/LTX-Video). + + Args: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`): + The number of channels in each head. + qk_norm (`str`, defaults to `"rms_norm"`): + The normalization layer to use. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to use in feed-forward. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + cross_attention_dim: int, + audio_dim: int, + audio_num_attention_heads: int, + audio_attention_head_dim, + audio_cross_attention_dim: int, + qk_norm: str = "rms_norm_across_heads", + activation_fn: str = "gelu-approximate", + attention_bias: bool = True, + attention_out_bias: bool = True, + eps: float = 1e-6, + elementwise_affine: bool = False, + rope_type: str = "interleaved", + ): + super().__init__() + + # 1. Self-Attention (video and audio) + self.norm1 = RMSNorm(dim, eps=eps, elementwise_affine=elementwise_affine) + self.attn1 = LTX2Attention( + query_dim=dim, + heads=num_attention_heads, + kv_heads=num_attention_heads, + dim_head=attention_head_dim, + bias=attention_bias, + cross_attention_dim=None, + out_bias=attention_out_bias, + qk_norm=qk_norm, + rope_type=rope_type, + ) + + self.audio_norm1 = RMSNorm(audio_dim, eps=eps, elementwise_affine=elementwise_affine) + self.audio_attn1 = LTX2Attention( + query_dim=audio_dim, + heads=audio_num_attention_heads, + kv_heads=audio_num_attention_heads, + dim_head=audio_attention_head_dim, + bias=attention_bias, + cross_attention_dim=None, + out_bias=attention_out_bias, + qk_norm=qk_norm, + rope_type=rope_type, + ) + + # 2. Prompt Cross-Attention + self.norm2 = RMSNorm(dim, eps=eps, elementwise_affine=elementwise_affine) + self.attn2 = LTX2Attention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + kv_heads=num_attention_heads, + dim_head=attention_head_dim, + bias=attention_bias, + out_bias=attention_out_bias, + qk_norm=qk_norm, + rope_type=rope_type, + ) + + self.audio_norm2 = RMSNorm(audio_dim, eps=eps, elementwise_affine=elementwise_affine) + self.audio_attn2 = LTX2Attention( + query_dim=audio_dim, + cross_attention_dim=audio_cross_attention_dim, + heads=audio_num_attention_heads, + kv_heads=audio_num_attention_heads, + dim_head=audio_attention_head_dim, + bias=attention_bias, + out_bias=attention_out_bias, + qk_norm=qk_norm, + rope_type=rope_type, + ) + + # 3. Audio-to-Video (a2v) and Video-to-Audio (v2a) Cross-Attention + # Audio-to-Video (a2v) Attention --> Q: Video; K,V: Audio + self.audio_to_video_norm = RMSNorm(dim, eps=eps, elementwise_affine=elementwise_affine) + self.audio_to_video_attn = LTX2Attention( + query_dim=dim, + cross_attention_dim=audio_dim, + heads=audio_num_attention_heads, + kv_heads=audio_num_attention_heads, + dim_head=audio_attention_head_dim, + bias=attention_bias, + out_bias=attention_out_bias, + qk_norm=qk_norm, + rope_type=rope_type, + ) + + # Video-to-Audio (v2a) Attention --> Q: Audio; K,V: Video + self.video_to_audio_norm = RMSNorm(audio_dim, eps=eps, elementwise_affine=elementwise_affine) + self.video_to_audio_attn = LTX2Attention( + query_dim=audio_dim, + cross_attention_dim=dim, + heads=audio_num_attention_heads, + kv_heads=audio_num_attention_heads, + dim_head=audio_attention_head_dim, + bias=attention_bias, + out_bias=attention_out_bias, + qk_norm=qk_norm, + rope_type=rope_type, + ) + + # 4. Feedforward layers + self.norm3 = RMSNorm(dim, eps=eps, elementwise_affine=elementwise_affine) + self.ff = FeedForward(dim, activation_fn=activation_fn) + + self.audio_norm3 = RMSNorm(audio_dim, eps=eps, elementwise_affine=elementwise_affine) + self.audio_ff = FeedForward(audio_dim, activation_fn=activation_fn) + + # 5. Per-Layer Modulation Parameters + # Self-Attention / Feedforward AdaLayerNorm-Zero mod params + self.scale_shift_table = nn.Parameter(torch.randn(6, dim) / dim**0.5) + self.audio_scale_shift_table = nn.Parameter(torch.randn(6, audio_dim) / audio_dim**0.5) + + # Per-layer a2v, v2a Cross-Attention mod params + self.video_a2v_cross_attn_scale_shift_table = nn.Parameter(torch.randn(5, dim)) + self.audio_a2v_cross_attn_scale_shift_table = nn.Parameter(torch.randn(5, audio_dim)) + + def forward( + self, + hidden_states: torch.Tensor, + audio_hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + audio_encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + temb_audio: torch.Tensor, + temb_ca_scale_shift: torch.Tensor, + temb_ca_audio_scale_shift: torch.Tensor, + temb_ca_gate: torch.Tensor, + temb_ca_audio_gate: torch.Tensor, + video_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + audio_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ca_video_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ca_audio_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + encoder_attention_mask: torch.Tensor | None = None, + audio_encoder_attention_mask: torch.Tensor | None = None, + a2v_cross_attention_mask: torch.Tensor | None = None, + v2a_cross_attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size = hidden_states.size(0) + + # 1. Video and Audio Self-Attention + norm_hidden_states = self.norm1(hidden_states) + + num_ada_params = self.scale_shift_table.shape[0] + ada_values = self.scale_shift_table[None, None].to(temb.device) + temb.reshape( + batch_size, temb.size(1), num_ada_params, -1 + ) + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = ada_values.unbind(dim=2) + norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa + + attn_hidden_states = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=None, + query_rotary_emb=video_rotary_emb, + ) + hidden_states = hidden_states + attn_hidden_states * gate_msa + + norm_audio_hidden_states = self.audio_norm1(audio_hidden_states) + + num_audio_ada_params = self.audio_scale_shift_table.shape[0] + audio_ada_values = self.audio_scale_shift_table[None, None].to(temb_audio.device) + temb_audio.reshape( + batch_size, temb_audio.size(1), num_audio_ada_params, -1 + ) + audio_shift_msa, audio_scale_msa, audio_gate_msa, audio_shift_mlp, audio_scale_mlp, audio_gate_mlp = ( + audio_ada_values.unbind(dim=2) + ) + norm_audio_hidden_states = norm_audio_hidden_states * (1 + audio_scale_msa) + audio_shift_msa + + attn_audio_hidden_states = self.audio_attn1( + hidden_states=norm_audio_hidden_states, + encoder_hidden_states=None, + query_rotary_emb=audio_rotary_emb, + ) + audio_hidden_states = audio_hidden_states + attn_audio_hidden_states * audio_gate_msa + + # 2. Video and Audio Cross-Attention with the text embeddings + norm_hidden_states = self.norm2(hidden_states) + attn_hidden_states = self.attn2( + norm_hidden_states, + encoder_hidden_states=encoder_hidden_states, + query_rotary_emb=None, + attention_mask=encoder_attention_mask, + ) + hidden_states = hidden_states + attn_hidden_states + + norm_audio_hidden_states = self.audio_norm2(audio_hidden_states) + attn_audio_hidden_states = self.audio_attn2( + norm_audio_hidden_states, + encoder_hidden_states=audio_encoder_hidden_states, + query_rotary_emb=None, + attention_mask=audio_encoder_attention_mask, + ) + audio_hidden_states = audio_hidden_states + attn_audio_hidden_states + + # 3. Audio-to-Video (a2v) and Video-to-Audio (v2a) Cross-Attention + norm_hidden_states = self.audio_to_video_norm(hidden_states) + norm_audio_hidden_states = self.video_to_audio_norm(audio_hidden_states) + + # Combine global and per-layer cross attention modulation parameters + # Video + video_per_layer_ca_scale_shift = self.video_a2v_cross_attn_scale_shift_table[:4, :] + video_per_layer_ca_gate = self.video_a2v_cross_attn_scale_shift_table[4:, :] + + video_ca_scale_shift_table = ( + video_per_layer_ca_scale_shift[:, :, ...].to(temb_ca_scale_shift.dtype) + + temb_ca_scale_shift.reshape(batch_size, temb_ca_scale_shift.shape[1], 4, -1) + ).unbind(dim=2) + video_ca_gate = ( + video_per_layer_ca_gate[:, :, ...].to(temb_ca_gate.dtype) + + temb_ca_gate.reshape(batch_size, temb_ca_gate.shape[1], 1, -1) + ).unbind(dim=2) + + video_a2v_ca_scale, video_a2v_ca_shift, video_v2a_ca_scale, video_v2a_ca_shift = video_ca_scale_shift_table + a2v_gate = video_ca_gate[0].squeeze(2) + + # Audio + audio_per_layer_ca_scale_shift = self.audio_a2v_cross_attn_scale_shift_table[:4, :] + audio_per_layer_ca_gate = self.audio_a2v_cross_attn_scale_shift_table[4:, :] + + audio_ca_scale_shift_table = ( + audio_per_layer_ca_scale_shift[:, :, ...].to(temb_ca_audio_scale_shift.dtype) + + temb_ca_audio_scale_shift.reshape(batch_size, temb_ca_audio_scale_shift.shape[1], 4, -1) + ).unbind(dim=2) + audio_ca_gate = ( + audio_per_layer_ca_gate[:, :, ...].to(temb_ca_audio_gate.dtype) + + temb_ca_audio_gate.reshape(batch_size, temb_ca_audio_gate.shape[1], 1, -1) + ).unbind(dim=2) + + audio_a2v_ca_scale, audio_a2v_ca_shift, audio_v2a_ca_scale, audio_v2a_ca_shift = audio_ca_scale_shift_table + v2a_gate = audio_ca_gate[0].squeeze(2) + + # Audio-to-Video Cross Attention: Q: Video; K,V: Audio + mod_norm_hidden_states = norm_hidden_states * (1 + video_a2v_ca_scale.squeeze(2)) + video_a2v_ca_shift.squeeze( + 2 + ) + mod_norm_audio_hidden_states = norm_audio_hidden_states * ( + 1 + audio_a2v_ca_scale.squeeze(2) + ) + audio_a2v_ca_shift.squeeze(2) + + a2v_attn_hidden_states = self.audio_to_video_attn( + mod_norm_hidden_states, + encoder_hidden_states=mod_norm_audio_hidden_states, + query_rotary_emb=ca_video_rotary_emb, + key_rotary_emb=ca_audio_rotary_emb, + attention_mask=a2v_cross_attention_mask, + ) + + hidden_states = hidden_states + a2v_gate * a2v_attn_hidden_states + + # Video-to-Audio Cross Attention: Q: Audio; K,V: Video + mod_norm_hidden_states = norm_hidden_states * (1 + video_v2a_ca_scale.squeeze(2)) + video_v2a_ca_shift.squeeze( + 2 + ) + mod_norm_audio_hidden_states = norm_audio_hidden_states * ( + 1 + audio_v2a_ca_scale.squeeze(2) + ) + audio_v2a_ca_shift.squeeze(2) + + v2a_attn_hidden_states = self.video_to_audio_attn( + mod_norm_audio_hidden_states, + encoder_hidden_states=mod_norm_hidden_states, + query_rotary_emb=ca_audio_rotary_emb, + key_rotary_emb=ca_video_rotary_emb, + attention_mask=v2a_cross_attention_mask, + ) + + audio_hidden_states = audio_hidden_states + v2a_gate * v2a_attn_hidden_states + + # 4. Feedforward + norm_hidden_states = self.norm3(hidden_states) * (1 + scale_mlp) + shift_mlp + ff_output = self.ff(norm_hidden_states) + hidden_states = hidden_states + ff_output * gate_mlp + + norm_audio_hidden_states = self.audio_norm3(audio_hidden_states) * (1 + audio_scale_mlp) + audio_shift_mlp + audio_ff_output = self.audio_ff(norm_audio_hidden_states) + audio_hidden_states = audio_hidden_states + audio_ff_output * audio_gate_mlp + + return hidden_states, audio_hidden_states + + +class LTX2AudioVideoRotaryPosEmbed(nn.Module): + """ + Video and audio rotary positional embeddings (RoPE) for the LTX-2.0 model. + + Args: + causal_offset (`int`, *optional*, defaults to `1`): + Offset in the temporal axis for causal VAE modeling. This is typically 1 (for causal modeling where the VAE + treats the very first frame differently), but could also be 0 (for non-causal modeling). + """ + + def __init__( + self, + dim: int, + patch_size: int = 1, + patch_size_t: int = 1, + base_num_frames: int = 20, + base_height: int = 2048, + base_width: int = 2048, + sampling_rate: int = 16000, + hop_length: int = 160, + scale_factors: tuple[int, ...] = (8, 32, 32), + theta: float = 10000.0, + causal_offset: int = 1, + modality: str = "video", + double_precision: bool = True, + rope_type: str = "interleaved", + num_attention_heads: int = 32, + ) -> None: + super().__init__() + + self.dim = dim + self.patch_size = patch_size + self.patch_size_t = patch_size_t + + if rope_type not in ["interleaved", "split"]: + raise ValueError(f"{rope_type=} not supported. Choose between 'interleaved' and 'split'.") + self.rope_type = rope_type + + self.base_num_frames = base_num_frames + self.num_attention_heads = num_attention_heads + + # Video-specific + self.base_height = base_height + self.base_width = base_width + + # Audio-specific + self.sampling_rate = sampling_rate + self.hop_length = hop_length + self.audio_latents_per_second = float(sampling_rate) / float(hop_length) / float(scale_factors[0]) + + self.scale_factors = scale_factors + self.theta = theta + self.causal_offset = causal_offset + + self.modality = modality + if self.modality not in ["video", "audio"]: + raise ValueError(f"Modality {modality} is not supported. Supported modalities are `video` and `audio`.") + self.double_precision = double_precision + + def prepare_video_coords( + self, + batch_size: int, + num_frames: int, + height: int, + width: int, + device: torch.device, + fps: float = 24.0, + ) -> torch.Tensor: + """ + Create per-dimension bounds [inclusive start, exclusive end) for each patch with respect to the original pixel + space video grid (num_frames, height, width). This will ultimately have shape (batch_size, 3, num_patches, 2) + where + - axis 1 (size 3) enumerates (frame, height, width) dimensions (e.g. idx 0 corresponds to frames) + - axis 3 (size 2) stores `[start, end)` indices within each dimension + + Args: + batch_size (`int`): + Batch size of the video latents. + num_frames (`int`): + Number of latent frames in the video latents. + height (`int`): + Latent height of the video latents. + width (`int`): + Latent width of the video latents. + device (`torch.device`): + Device on which to create the video grid. + + Returns: + `torch.Tensor`: + Per-dimension patch boundaries tensor of shape [batch_size, 3, num_patches, 2]. + """ + + # 1. Generate grid coordinates for each spatiotemporal dimension (frames, height, width) + # Always compute rope in fp32 + grid_f = torch.arange(start=0, end=num_frames, step=self.patch_size_t, dtype=torch.float32, device=device) + grid_h = torch.arange(start=0, end=height, step=self.patch_size, dtype=torch.float32, device=device) + grid_w = torch.arange(start=0, end=width, step=self.patch_size, dtype=torch.float32, device=device) + # indexing='ij' ensures that the dimensions are kept in order as (frames, height, width) + grid = torch.meshgrid(grid_f, grid_h, grid_w, indexing="ij") + grid = torch.stack(grid, dim=0) # [3, N_F, N_H, N_W], where e.g. N_F is the number of temporal patches + + # 2. Get the patch boundaries with respect to the latent video grid + patch_size = (self.patch_size_t, self.patch_size, self.patch_size) + patch_size_delta = torch.tensor(patch_size, dtype=grid.dtype, device=grid.device) + patch_ends = grid + patch_size_delta.view(3, 1, 1, 1) + + # Combine the start (grid) and end (patch_ends) coordinates along new trailing dimension + latent_coords = torch.stack([grid, patch_ends], dim=-1) # [3, N_F, N_H, N_W, 2] + # Reshape to (batch_size, 3, num_patches, 2) + latent_coords = latent_coords.flatten(1, 3) + latent_coords = latent_coords.unsqueeze(0).repeat(batch_size, 1, 1, 1) + + # 3. Calculate the pixel space patch boundaries from the latent boundaries. + scale_tensor = torch.tensor(self.scale_factors, device=latent_coords.device) + # Broadcast the VAE scale factors such that they are compatible with latent_coords's shape + broadcast_shape = [1] * latent_coords.ndim + broadcast_shape[1] = -1 # This is the (frame, height, width) dim + # Apply per-axis scaling to convert latent coordinates to pixel space coordinates + pixel_coords = latent_coords * scale_tensor.view(*broadcast_shape) + + # As the VAE temporal stride for the first frame is 1 instead of self.vae_scale_factors[0], we need to shift + # and clamp to keep the first-frame timestamps causal and non-negative. + pixel_coords[:, 0, ...] = (pixel_coords[:, 0, ...] + self.causal_offset - self.scale_factors[0]).clamp(min=0) + + # Scale the temporal coordinates by the video FPS + pixel_coords[:, 0, ...] = pixel_coords[:, 0, ...] / fps + + return pixel_coords + + def prepare_audio_coords( + self, + batch_size: int, + num_frames: int, + device: torch.device, + shift: int = 0, + ) -> torch.Tensor: + """ + Create per-dimension bounds [inclusive start, exclusive end) of start and end timestamps for each latent frame. + This will ultimately have shape (batch_size, 3, num_patches, 2) where + - axis 1 (size 1) represents the temporal dimension + - axis 3 (size 2) stores `[start, end)` indices within each dimension + + Args: + batch_size (`int`): + Batch size of the audio latents. + num_frames (`int`): + Number of latent frames in the audio latents. + device (`torch.device`): + Device on which to create the audio grid. + shift (`int`, *optional*, defaults to `0`): + Offset on the latent indices. Different shift values correspond to different overlapping windows with + respect to the same underlying latent grid. + + Returns: + `torch.Tensor`: + Per-dimension patch boundaries tensor of shape [batch_size, 1, num_patches, 2]. + """ + + # 1. Generate coordinates in the frame (time) dimension. + # Always compute rope in fp32 + grid_f = torch.arange( + start=shift, end=num_frames + shift, step=self.patch_size_t, dtype=torch.float32, device=device + ) + + # 2. Calculate start timstamps in seconds with respect to the original spectrogram grid + audio_scale_factor = self.scale_factors[0] + # Scale back to mel spectrogram space + grid_start_mel = grid_f * audio_scale_factor + # Handle first frame causal offset, ensuring non-negative timestamps + grid_start_mel = (grid_start_mel + self.causal_offset - audio_scale_factor).clip(min=0) + # Convert mel bins back into seconds + grid_start_s = grid_start_mel * self.hop_length / self.sampling_rate + + # 3. Calculate start timstamps in seconds with respect to the original spectrogram grid + grid_end_mel = (grid_f + self.patch_size_t) * audio_scale_factor + grid_end_mel = (grid_end_mel + self.causal_offset - audio_scale_factor).clip(min=0) + grid_end_s = grid_end_mel * self.hop_length / self.sampling_rate + + audio_coords = torch.stack([grid_start_s, grid_end_s], dim=-1) # [num_patches, 2] + audio_coords = audio_coords.unsqueeze(0).expand(batch_size, -1, -1) # [batch_size, num_patches, 2] + audio_coords = audio_coords.unsqueeze(1) # [batch_size, 1, num_patches, 2] + return audio_coords + + def prepare_coords(self, *args, **kwargs): + if self.modality == "video": + return self.prepare_video_coords(*args, **kwargs) + elif self.modality == "audio": + return self.prepare_audio_coords(*args, **kwargs) + + def forward( + self, coords: torch.Tensor, device: str | torch.device | None = None + ) -> tuple[torch.Tensor, torch.Tensor]: + device = device or coords.device + + # Number of spatiotemporal dimensions (3 for video, 1 (temporal) for audio and cross attn) + num_pos_dims = coords.shape[1] + + # 1. If the coords are patch boundaries [start, end), use the midpoint of these boundaries as the patch + # position index + if coords.ndim == 4: + coords_start, coords_end = coords.chunk(2, dim=-1) + coords = (coords_start + coords_end) / 2.0 + coords = coords.squeeze(-1) # [B, num_pos_dims, num_patches] + + # 2. Get coordinates as a fraction of the base data shape + if self.modality == "video": + max_positions = (self.base_num_frames, self.base_height, self.base_width) + elif self.modality == "audio": + max_positions = (self.base_num_frames,) + # [B, num_pos_dims, num_patches] --> [B, num_patches, num_pos_dims] + grid = torch.stack([coords[:, i] / max_positions[i] for i in range(num_pos_dims)], dim=-1).to(device) + # Number of spatiotemporal dimensions (3 for video, 1 for audio and cross attn) times 2 for cos, sin + num_rope_elems = num_pos_dims * 2 + + # 3. Create a 1D grid of frequencies for RoPE + freqs_dtype = torch.float64 if self.double_precision else torch.float32 + pow_indices = torch.pow( + self.theta, + torch.linspace(start=0.0, end=1.0, steps=self.dim // num_rope_elems, dtype=freqs_dtype, device=device), + ) + freqs = (pow_indices * torch.pi / 2.0).to(dtype=torch.float32) + + # 4. Tensor-vector outer product between pos ids tensor of shape (B, 3, num_patches) and freqs vector of shape + # (self.dim // num_elems,) + freqs = (grid.unsqueeze(-1) * 2 - 1) * freqs # [B, num_patches, num_pos_dims, self.dim // num_elems] + freqs = freqs.transpose(-1, -2).flatten(2) # [B, num_patches, self.dim // 2] + + # 5. Get real, interleaved (cos, sin) frequencies, padded to self.dim + # TODO: consider implementing this as a utility and reuse in `connectors.py`. + # src/diffusers/pipelines/ltx2/connectors.py + if self.rope_type == "interleaved": + cos_freqs = freqs.cos().repeat_interleave(2, dim=-1) + sin_freqs = freqs.sin().repeat_interleave(2, dim=-1) + + if self.dim % num_rope_elems != 0: + cos_padding = torch.ones_like(cos_freqs[:, :, : self.dim % num_rope_elems]) + sin_padding = torch.zeros_like(cos_freqs[:, :, : self.dim % num_rope_elems]) + cos_freqs = torch.cat([cos_padding, cos_freqs], dim=-1) + sin_freqs = torch.cat([sin_padding, sin_freqs], dim=-1) + + elif self.rope_type == "split": + expected_freqs = self.dim // 2 + current_freqs = freqs.shape[-1] + pad_size = expected_freqs - current_freqs + cos_freq = freqs.cos() + sin_freq = freqs.sin() + + if pad_size != 0: + cos_padding = torch.ones_like(cos_freq[:, :, :pad_size]) + sin_padding = torch.zeros_like(sin_freq[:, :, :pad_size]) + + cos_freq = torch.concatenate([cos_padding, cos_freq], axis=-1) + sin_freq = torch.concatenate([sin_padding, sin_freq], axis=-1) + + # Reshape freqs to be compatible with multi-head attention + b = cos_freq.shape[0] + t = cos_freq.shape[1] + + cos_freq = cos_freq.reshape(b, t, self.num_attention_heads, -1) + sin_freq = sin_freq.reshape(b, t, self.num_attention_heads, -1) + + cos_freqs = torch.swapaxes(cos_freq, 1, 2) # (B,H,T,D//2) + sin_freqs = torch.swapaxes(sin_freq, 1, 2) # (B,H,T,D//2) + + return cos_freqs, sin_freqs + + +class LTX2VideoTransformer3DModel( + ModelMixin, ConfigMixin, AttentionMixin, FromOriginalModelMixin, PeftAdapterMixin, CacheMixin +): + r""" + A Transformer model for video-like data used in [LTX](https://huggingface.co/Lightricks/LTX-Video). + + Args: + in_channels (`int`, defaults to `128`): + The number of channels in the input. + out_channels (`int`, defaults to `128`): + The number of channels in the output. + patch_size (`int`, defaults to `1`): + The size of the spatial patches to use in the patch embedding layer. + patch_size_t (`int`, defaults to `1`): + The size of the tmeporal patches to use in the patch embedding layer. + num_attention_heads (`int`, defaults to `32`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `64`): + The number of channels in each head. + cross_attention_dim (`int`, defaults to `2048 `): + The number of channels for cross attention heads. + num_layers (`int`, defaults to `28`): + The number of layers of Transformer blocks to use. + activation_fn (`str`, defaults to `"gelu-approximate"`): + Activation function to use in feed-forward. + qk_norm (`str`, defaults to `"rms_norm_across_heads"`): + The normalization layer to use. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["norm"] + _repeated_blocks = ["LTX2VideoTransformerBlock"] + _cp_plan = { + "": { + "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "encoder_attention_mask": ContextParallelInput(split_dim=1, expected_dims=2, split_output=False), + }, + "rope": { + 0: ContextParallelInput(split_dim=1, expected_dims=3, split_output=True), + 1: ContextParallelInput(split_dim=1, expected_dims=3, split_output=True), + }, + "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), + } + + @register_to_config + def __init__( + self, + in_channels: int = 128, # Video Arguments + out_channels: int | None = 128, + patch_size: int = 1, + patch_size_t: int = 1, + num_attention_heads: int = 32, + attention_head_dim: int = 128, + cross_attention_dim: int = 4096, + vae_scale_factors: tuple[int, int, int] = (8, 32, 32), + pos_embed_max_pos: int = 20, + base_height: int = 2048, + base_width: int = 2048, + audio_in_channels: int = 128, # Audio Arguments + audio_out_channels: int | None = 128, + audio_patch_size: int = 1, + audio_patch_size_t: int = 1, + audio_num_attention_heads: int = 32, + audio_attention_head_dim: int = 64, + audio_cross_attention_dim: int = 2048, + audio_scale_factor: int = 4, + audio_pos_embed_max_pos: int = 20, + audio_sampling_rate: int = 16000, + audio_hop_length: int = 160, + num_layers: int = 48, # Shared arguments + activation_fn: str = "gelu-approximate", + qk_norm: str = "rms_norm_across_heads", + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + caption_channels: int = 3840, + attention_bias: bool = True, + attention_out_bias: bool = True, + rope_theta: float = 10000.0, + rope_double_precision: bool = True, + causal_offset: int = 1, + timestep_scale_multiplier: int = 1000, + cross_attn_timestep_scale_multiplier: int = 1000, + rope_type: str = "interleaved", + ) -> None: + super().__init__() + + out_channels = out_channels or in_channels + audio_out_channels = audio_out_channels or audio_in_channels + inner_dim = num_attention_heads * attention_head_dim + audio_inner_dim = audio_num_attention_heads * audio_attention_head_dim + + # 1. Patchification input projections + self.proj_in = nn.Linear(in_channels, inner_dim) + self.audio_proj_in = nn.Linear(audio_in_channels, audio_inner_dim) + + # 2. Prompt embeddings + self.caption_projection = PixArtAlphaTextProjection(in_features=caption_channels, hidden_size=inner_dim) + self.audio_caption_projection = PixArtAlphaTextProjection( + in_features=caption_channels, hidden_size=audio_inner_dim + ) + + # 3. Timestep Modulation Params and Embedding + # 3.1. Global Timestep Modulation Parameters (except for cross-attention) and timestep + size embedding + # time_embed and audio_time_embed calculate both the timestep embedding and (global) modulation parameters + self.time_embed = LTX2AdaLayerNormSingle(inner_dim, num_mod_params=6, use_additional_conditions=False) + self.audio_time_embed = LTX2AdaLayerNormSingle( + audio_inner_dim, num_mod_params=6, use_additional_conditions=False + ) + + # 3.2. Global Cross Attention Modulation Parameters + # Used in the audio-to-video and video-to-audio cross attention layers as a global set of modulation params, + # which are then further modified by per-block modulaton params in each transformer block. + # There are 2 sets of scale/shift parameters for each modality, 1 each for audio-to-video (a2v) and + # video-to-audio (v2a) cross attention + self.av_cross_attn_video_scale_shift = LTX2AdaLayerNormSingle( + inner_dim, num_mod_params=4, use_additional_conditions=False + ) + self.av_cross_attn_audio_scale_shift = LTX2AdaLayerNormSingle( + audio_inner_dim, num_mod_params=4, use_additional_conditions=False + ) + # Gate param for audio-to-video (a2v) cross attn (where the video is the queries (Q) and the audio is the keys + # and values (KV)) + self.av_cross_attn_video_a2v_gate = LTX2AdaLayerNormSingle( + inner_dim, num_mod_params=1, use_additional_conditions=False + ) + # Gate param for video-to-audio (v2a) cross attn (where the audio is the queries (Q) and the video is the keys + # and values (KV)) + self.av_cross_attn_audio_v2a_gate = LTX2AdaLayerNormSingle( + audio_inner_dim, num_mod_params=1, use_additional_conditions=False + ) + + # 3.3. Output Layer Scale/Shift Modulation parameters + self.scale_shift_table = nn.Parameter(torch.randn(2, inner_dim) / inner_dim**0.5) + self.audio_scale_shift_table = nn.Parameter(torch.randn(2, audio_inner_dim) / audio_inner_dim**0.5) + + # 4. Rotary Positional Embeddings (RoPE) + # Self-Attention + self.rope = LTX2AudioVideoRotaryPosEmbed( + dim=inner_dim, + patch_size=patch_size, + patch_size_t=patch_size_t, + base_num_frames=pos_embed_max_pos, + base_height=base_height, + base_width=base_width, + scale_factors=vae_scale_factors, + theta=rope_theta, + causal_offset=causal_offset, + modality="video", + double_precision=rope_double_precision, + rope_type=rope_type, + num_attention_heads=num_attention_heads, + ) + self.audio_rope = LTX2AudioVideoRotaryPosEmbed( + dim=audio_inner_dim, + patch_size=audio_patch_size, + patch_size_t=audio_patch_size_t, + base_num_frames=audio_pos_embed_max_pos, + sampling_rate=audio_sampling_rate, + hop_length=audio_hop_length, + scale_factors=[audio_scale_factor], + theta=rope_theta, + causal_offset=causal_offset, + modality="audio", + double_precision=rope_double_precision, + rope_type=rope_type, + num_attention_heads=audio_num_attention_heads, + ) + + # Audio-to-Video, Video-to-Audio Cross-Attention + cross_attn_pos_embed_max_pos = max(pos_embed_max_pos, audio_pos_embed_max_pos) + self.cross_attn_rope = LTX2AudioVideoRotaryPosEmbed( + dim=audio_cross_attention_dim, + patch_size=patch_size, + patch_size_t=patch_size_t, + base_num_frames=cross_attn_pos_embed_max_pos, + base_height=base_height, + base_width=base_width, + theta=rope_theta, + causal_offset=causal_offset, + modality="video", + double_precision=rope_double_precision, + rope_type=rope_type, + num_attention_heads=num_attention_heads, + ) + self.cross_attn_audio_rope = LTX2AudioVideoRotaryPosEmbed( + dim=audio_cross_attention_dim, + patch_size=audio_patch_size, + patch_size_t=audio_patch_size_t, + base_num_frames=cross_attn_pos_embed_max_pos, + sampling_rate=audio_sampling_rate, + hop_length=audio_hop_length, + theta=rope_theta, + causal_offset=causal_offset, + modality="audio", + double_precision=rope_double_precision, + rope_type=rope_type, + num_attention_heads=audio_num_attention_heads, + ) + + # 5. Transformer Blocks + self.transformer_blocks = nn.ModuleList( + [ + LTX2VideoTransformerBlock( + dim=inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + cross_attention_dim=cross_attention_dim, + audio_dim=audio_inner_dim, + audio_num_attention_heads=audio_num_attention_heads, + audio_attention_head_dim=audio_attention_head_dim, + audio_cross_attention_dim=audio_cross_attention_dim, + qk_norm=qk_norm, + activation_fn=activation_fn, + attention_bias=attention_bias, + attention_out_bias=attention_out_bias, + eps=norm_eps, + elementwise_affine=norm_elementwise_affine, + rope_type=rope_type, + ) + for _ in range(num_layers) + ] + ) + + # 6. Output layers + self.norm_out = nn.LayerNorm(inner_dim, eps=1e-6, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, out_channels) + + self.audio_norm_out = nn.LayerNorm(audio_inner_dim, eps=1e-6, elementwise_affine=False) + self.audio_proj_out = nn.Linear(audio_inner_dim, audio_out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + audio_hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + audio_encoder_hidden_states: torch.Tensor, + timestep: torch.LongTensor, + audio_timestep: torch.LongTensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + audio_encoder_attention_mask: torch.Tensor | None = None, + num_frames: int | None = None, + height: int | None = None, + width: int | None = None, + fps: float = 24.0, + audio_num_frames: int | None = None, + video_coords: torch.Tensor | None = None, + audio_coords: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.Tensor: + """ + Forward pass for LTX-2.0 audiovisual video transformer. + + Args: + hidden_states (`torch.Tensor`): + Input patchified video latents of shape `(batch_size, num_video_tokens, in_channels)`. + audio_hidden_states (`torch.Tensor`): + Input patchified audio latents of shape `(batch_size, num_audio_tokens, audio_in_channels)`. + encoder_hidden_states (`torch.Tensor`): + Input video text embeddings of shape `(batch_size, text_seq_len, self.config.caption_channels)`. + audio_encoder_hidden_states (`torch.Tensor`): + Input audio text embeddings of shape `(batch_size, text_seq_len, self.config.caption_channels)`. + timestep (`torch.Tensor`): + Input timestep of shape `(batch_size, num_video_tokens)`. These should already be scaled by + `self.config.timestep_scale_multiplier`. + audio_timestep (`torch.Tensor`, *optional*): + Input timestep of shape `(batch_size,)` or `(batch_size, num_audio_tokens)` for audio modulation + params. This is only used by certain pipelines such as the I2V pipeline. + encoder_attention_mask (`torch.Tensor`, *optional*): + Optional multiplicative text attention mask of shape `(batch_size, text_seq_len)`. + audio_encoder_attention_mask (`torch.Tensor`, *optional*): + Optional multiplicative text attention mask of shape `(batch_size, text_seq_len)` for audio modeling. + num_frames (`int`, *optional*): + The number of latent video frames. Used if calculating the video coordinates for RoPE. + height (`int`, *optional*): + The latent video height. Used if calculating the video coordinates for RoPE. + width (`int`, *optional*): + The latent video width. Used if calculating the video coordinates for RoPE. + fps: (`float`, *optional*, defaults to `24.0`): + The desired frames per second of the generated video. Used if calculating the video coordinates for + RoPE. + audio_num_frames: (`int`, *optional*): + The number of latent audio frames. Used if calculating the audio coordinates for RoPE. + video_coords (`torch.Tensor`, *optional*): + The video coordinates to be used when calculating the rotary positional embeddings (RoPE) of shape + `(batch_size, 3, num_video_tokens, 2)`. If not supplied, this will be calculated inside `forward`. + audio_coords (`torch.Tensor`, *optional*): + The audio coordinates to be used when calculating the rotary positional embeddings (RoPE) of shape + `(batch_size, 1, num_audio_tokens, 2)`. If not supplied, this will be calculated inside `forward`. + attention_kwargs (`dict[str, Any]`, *optional*): + Optional dict of keyword args to be passed to the attention processor. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a dict-like structured output of type `AudioVisualModelOutput` or a tuple. + + Returns: + `AudioVisualModelOutput` or `tuple`: + If `return_dict` is `True`, returns a structured output of type `AudioVisualModelOutput`, otherwise a + `tuple` is returned where the first element is the denoised video latent patch sequence and the second + element is the denoised audio latent patch sequence. + """ + # Determine timestep for audio. + audio_timestep = audio_timestep if audio_timestep is not None else timestep + + # convert encoder_attention_mask to a bias the same way we do for attention_mask + if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2: + encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0 + encoder_attention_mask = encoder_attention_mask.unsqueeze(1) + + if audio_encoder_attention_mask is not None and audio_encoder_attention_mask.ndim == 2: + audio_encoder_attention_mask = (1 - audio_encoder_attention_mask.to(audio_hidden_states.dtype)) * -10000.0 + audio_encoder_attention_mask = audio_encoder_attention_mask.unsqueeze(1) + + batch_size = hidden_states.size(0) + + # 1. Prepare RoPE positional embeddings + if video_coords is None: + video_coords = self.rope.prepare_video_coords( + batch_size, num_frames, height, width, hidden_states.device, fps=fps + ) + if audio_coords is None: + audio_coords = self.audio_rope.prepare_audio_coords( + batch_size, audio_num_frames, audio_hidden_states.device + ) + + video_rotary_emb = self.rope(video_coords, device=hidden_states.device) + audio_rotary_emb = self.audio_rope(audio_coords, device=audio_hidden_states.device) + + video_cross_attn_rotary_emb = self.cross_attn_rope(video_coords[:, 0:1, :], device=hidden_states.device) + audio_cross_attn_rotary_emb = self.cross_attn_audio_rope( + audio_coords[:, 0:1, :], device=audio_hidden_states.device + ) + + # 2. Patchify input projections + hidden_states = self.proj_in(hidden_states) + audio_hidden_states = self.audio_proj_in(audio_hidden_states) + + # 3. Prepare timestep embeddings and modulation parameters + timestep_cross_attn_gate_scale_factor = ( + self.config.cross_attn_timestep_scale_multiplier / self.config.timestep_scale_multiplier + ) + + # 3.1. Prepare global modality (video and audio) timestep embedding and modulation parameters + # temb is used in the transformer blocks (as expected), while embedded_timestep is used for the output layer + # modulation with scale_shift_table (and similarly for audio) + temb, embedded_timestep = self.time_embed( + timestep.flatten(), + batch_size=batch_size, + hidden_dtype=hidden_states.dtype, + ) + temb = temb.view(batch_size, -1, temb.size(-1)) + embedded_timestep = embedded_timestep.view(batch_size, -1, embedded_timestep.size(-1)) + + temb_audio, audio_embedded_timestep = self.audio_time_embed( + audio_timestep.flatten(), + batch_size=batch_size, + hidden_dtype=audio_hidden_states.dtype, + ) + temb_audio = temb_audio.view(batch_size, -1, temb_audio.size(-1)) + audio_embedded_timestep = audio_embedded_timestep.view(batch_size, -1, audio_embedded_timestep.size(-1)) + + # 3.2. Prepare global modality cross attention modulation parameters + video_cross_attn_scale_shift, _ = self.av_cross_attn_video_scale_shift( + timestep.flatten(), + batch_size=batch_size, + hidden_dtype=hidden_states.dtype, + ) + video_cross_attn_a2v_gate, _ = self.av_cross_attn_video_a2v_gate( + timestep.flatten() * timestep_cross_attn_gate_scale_factor, + batch_size=batch_size, + hidden_dtype=hidden_states.dtype, + ) + video_cross_attn_scale_shift = video_cross_attn_scale_shift.view( + batch_size, -1, video_cross_attn_scale_shift.shape[-1] + ) + video_cross_attn_a2v_gate = video_cross_attn_a2v_gate.view(batch_size, -1, video_cross_attn_a2v_gate.shape[-1]) + + audio_cross_attn_scale_shift, _ = self.av_cross_attn_audio_scale_shift( + audio_timestep.flatten(), + batch_size=batch_size, + hidden_dtype=audio_hidden_states.dtype, + ) + audio_cross_attn_v2a_gate, _ = self.av_cross_attn_audio_v2a_gate( + audio_timestep.flatten() * timestep_cross_attn_gate_scale_factor, + batch_size=batch_size, + hidden_dtype=audio_hidden_states.dtype, + ) + audio_cross_attn_scale_shift = audio_cross_attn_scale_shift.view( + batch_size, -1, audio_cross_attn_scale_shift.shape[-1] + ) + audio_cross_attn_v2a_gate = audio_cross_attn_v2a_gate.view(batch_size, -1, audio_cross_attn_v2a_gate.shape[-1]) + + # 4. Prepare prompt embeddings + encoder_hidden_states = self.caption_projection(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.size(-1)) + + audio_encoder_hidden_states = self.audio_caption_projection(audio_encoder_hidden_states) + audio_encoder_hidden_states = audio_encoder_hidden_states.view(batch_size, -1, audio_hidden_states.size(-1)) + + # 5. Run transformer blocks + for block in self.transformer_blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, audio_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + audio_hidden_states, + encoder_hidden_states, + audio_encoder_hidden_states, + temb, + temb_audio, + video_cross_attn_scale_shift, + audio_cross_attn_scale_shift, + video_cross_attn_a2v_gate, + audio_cross_attn_v2a_gate, + video_rotary_emb, + audio_rotary_emb, + video_cross_attn_rotary_emb, + audio_cross_attn_rotary_emb, + encoder_attention_mask, + audio_encoder_attention_mask, + ) + else: + hidden_states, audio_hidden_states = block( + hidden_states=hidden_states, + audio_hidden_states=audio_hidden_states, + encoder_hidden_states=encoder_hidden_states, + audio_encoder_hidden_states=audio_encoder_hidden_states, + temb=temb, + temb_audio=temb_audio, + temb_ca_scale_shift=video_cross_attn_scale_shift, + temb_ca_audio_scale_shift=audio_cross_attn_scale_shift, + temb_ca_gate=video_cross_attn_a2v_gate, + temb_ca_audio_gate=audio_cross_attn_v2a_gate, + video_rotary_emb=video_rotary_emb, + audio_rotary_emb=audio_rotary_emb, + ca_video_rotary_emb=video_cross_attn_rotary_emb, + ca_audio_rotary_emb=audio_cross_attn_rotary_emb, + encoder_attention_mask=encoder_attention_mask, + audio_encoder_attention_mask=audio_encoder_attention_mask, + ) + + # 6. Output layers (including unpatchification) + scale_shift_values = self.scale_shift_table[None, None] + embedded_timestep[:, :, None] + shift, scale = scale_shift_values[:, :, 0], scale_shift_values[:, :, 1] + + hidden_states = self.norm_out(hidden_states) + hidden_states = hidden_states * (1 + scale) + shift + output = self.proj_out(hidden_states) + + audio_scale_shift_values = self.audio_scale_shift_table[None, None] + audio_embedded_timestep[:, :, None] + audio_shift, audio_scale = audio_scale_shift_values[:, :, 0], audio_scale_shift_values[:, :, 1] + + audio_hidden_states = self.audio_norm_out(audio_hidden_states) + audio_hidden_states = audio_hidden_states * (1 + audio_scale) + audio_shift + audio_output = self.audio_proj_out(audio_hidden_states) + + if not return_dict: + return (output, audio_output) + return AudioVisualModelOutput(sample=output, audio_sample=audio_output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_lumina2.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_lumina2.py new file mode 100644 index 0000000000000000000000000000000000000000..03e2841f8bcba89c813988f2ca9104f8ee24d565 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_lumina2.py @@ -0,0 +1,530 @@ +# Copyright 2025 Alpha-VLLM Authors and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...loaders.single_file_model import FromOriginalModelMixin +from ...utils import apply_lora_scale, logging +from ..attention import LuminaFeedForward +from ..attention_processor import Attention +from ..embeddings import TimestepEmbedding, Timesteps, apply_rotary_emb, get_1d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import LuminaLayerNormContinuous, LuminaRMSNormZero, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class Lumina2CombinedTimestepCaptionEmbedding(nn.Module): + def __init__( + self, + hidden_size: int = 4096, + cap_feat_dim: int = 2048, + frequency_embedding_size: int = 256, + norm_eps: float = 1e-5, + ) -> None: + super().__init__() + + self.time_proj = Timesteps( + num_channels=frequency_embedding_size, flip_sin_to_cos=True, downscale_freq_shift=0.0 + ) + + self.timestep_embedder = TimestepEmbedding( + in_channels=frequency_embedding_size, time_embed_dim=min(hidden_size, 1024) + ) + + self.caption_embedder = nn.Sequential( + RMSNorm(cap_feat_dim, eps=norm_eps), nn.Linear(cap_feat_dim, hidden_size, bias=True) + ) + + def forward( + self, hidden_states: torch.Tensor, timestep: torch.Tensor, encoder_hidden_states: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + timestep_proj = self.time_proj(timestep).type_as(hidden_states) + time_embed = self.timestep_embedder(timestep_proj) + caption_embed = self.caption_embedder(encoder_hidden_states) + return time_embed, caption_embed + + +class Lumina2AttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the Lumina2Transformer2DModel model. It applies normalization and RoPE on query and key vectors. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + base_sequence_length: int | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = hidden_states.shape + + # Get Query-Key-Value Pair + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + query_dim = query.shape[-1] + inner_dim = key.shape[-1] + head_dim = query_dim // attn.heads + dtype = query.dtype + + # Get key-value heads + kv_heads = inner_dim // head_dim + + query = query.view(batch_size, -1, attn.heads, head_dim) + key = key.view(batch_size, -1, kv_heads, head_dim) + value = value.view(batch_size, -1, kv_heads, head_dim) + + # Apply Query-Key Norm if needed + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE if needed + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, use_real=False) + key = apply_rotary_emb(key, image_rotary_emb, use_real=False) + + query, key = query.to(dtype), key.to(dtype) + + # Apply proportional attention if true + if base_sequence_length is not None: + softmax_scale = math.sqrt(math.log(sequence_length, base_sequence_length)) * attn.scale + else: + softmax_scale = attn.scale + + # perform Grouped-qurey Attention (GQA) + n_rep = attn.heads // kv_heads + if n_rep >= 1: + key = key.unsqueeze(3).repeat(1, 1, 1, n_rep, 1).flatten(2, 3) + value = value.unsqueeze(3).repeat(1, 1, 1, n_rep, 1).flatten(2, 3) + + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + if attention_mask is not None: + attention_mask = attention_mask.bool().view(batch_size, 1, 1, -1) + + query = query.transpose(1, 2) + key = key.transpose(1, 2) + value = value.transpose(1, 2) + + hidden_states = F.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, scale=softmax_scale + ) + hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) + hidden_states = hidden_states.type_as(query) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +class Lumina2TransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + num_kv_heads: int, + multiple_of: int, + ffn_dim_multiplier: float, + norm_eps: float, + modulation: bool = True, + ) -> None: + super().__init__() + self.head_dim = dim // num_attention_heads + self.modulation = modulation + + self.attn = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=dim // num_attention_heads, + qk_norm="rms_norm", + heads=num_attention_heads, + kv_heads=num_kv_heads, + eps=1e-5, + bias=False, + out_bias=False, + processor=Lumina2AttnProcessor2_0(), + ) + + self.feed_forward = LuminaFeedForward( + dim=dim, + inner_dim=4 * dim, + multiple_of=multiple_of, + ffn_dim_multiplier=ffn_dim_multiplier, + ) + + if modulation: + self.norm1 = LuminaRMSNormZero( + embedding_dim=dim, + norm_eps=norm_eps, + norm_elementwise_affine=True, + ) + else: + self.norm1 = RMSNorm(dim, eps=norm_eps) + self.ffn_norm1 = RMSNorm(dim, eps=norm_eps) + + self.norm2 = RMSNorm(dim, eps=norm_eps) + self.ffn_norm2 = RMSNorm(dim, eps=norm_eps) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + image_rotary_emb: torch.Tensor, + temb: torch.Tensor | None = None, + ) -> torch.Tensor: + if self.modulation: + norm_hidden_states, gate_msa, scale_mlp, gate_mlp = self.norm1(hidden_states, temb) + attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = hidden_states + gate_msa.unsqueeze(1).tanh() * self.norm2(attn_output) + mlp_output = self.feed_forward(self.ffn_norm1(hidden_states) * (1 + scale_mlp.unsqueeze(1))) + hidden_states = hidden_states + gate_mlp.unsqueeze(1).tanh() * self.ffn_norm2(mlp_output) + else: + norm_hidden_states = self.norm1(hidden_states) + attn_output = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = hidden_states + self.norm2(attn_output) + mlp_output = self.feed_forward(self.ffn_norm1(hidden_states)) + hidden_states = hidden_states + self.ffn_norm2(mlp_output) + + return hidden_states + + +class Lumina2RotaryPosEmbed(nn.Module): + def __init__(self, theta: int, axes_dim: list[int], axes_lens: list[int] = (300, 512, 512), patch_size: int = 2): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + self.axes_lens = axes_lens + self.patch_size = patch_size + + self.freqs_cis = self._precompute_freqs_cis(axes_dim, axes_lens, theta) + + def _precompute_freqs_cis(self, axes_dim: list[int], axes_lens: list[int], theta: int) -> list[torch.Tensor]: + freqs_cis = [] + freqs_dtype = torch.float32 if torch.backends.mps.is_available() else torch.float64 + for i, (d, e) in enumerate(zip(axes_dim, axes_lens)): + emb = get_1d_rotary_pos_embed(d, e, theta=self.theta, freqs_dtype=freqs_dtype) + freqs_cis.append(emb) + return freqs_cis + + def _get_freqs_cis(self, ids: torch.Tensor) -> torch.Tensor: + device = ids.device + if ids.device.type == "mps": + ids = ids.to("cpu") + + result = [] + for i in range(len(self.axes_dim)): + freqs = self.freqs_cis[i].to(ids.device) + index = ids[:, :, i : i + 1].repeat(1, 1, freqs.shape[-1]).to(torch.int64) + result.append(torch.gather(freqs.unsqueeze(0).repeat(index.shape[0], 1, 1), dim=1, index=index)) + return torch.cat(result, dim=-1).to(device) + + def forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor): + batch_size, channels, height, width = hidden_states.shape + p = self.patch_size + post_patch_height, post_patch_width = height // p, width // p + image_seq_len = post_patch_height * post_patch_width + device = hidden_states.device + + encoder_seq_len = attention_mask.shape[1] + l_effective_cap_len = attention_mask.sum(dim=1).tolist() + seq_lengths = [cap_seq_len + image_seq_len for cap_seq_len in l_effective_cap_len] + max_seq_len = max(seq_lengths) + + # Create position IDs + position_ids = torch.zeros(batch_size, max_seq_len, 3, dtype=torch.int32, device=device) + + for i, (cap_seq_len, seq_len) in enumerate(zip(l_effective_cap_len, seq_lengths)): + # add caption position ids + position_ids[i, :cap_seq_len, 0] = torch.arange(cap_seq_len, dtype=torch.int32, device=device) + position_ids[i, cap_seq_len:seq_len, 0] = cap_seq_len + + # add image position ids + row_ids = ( + torch.arange(post_patch_height, dtype=torch.int32, device=device) + .view(-1, 1) + .repeat(1, post_patch_width) + .flatten() + ) + col_ids = ( + torch.arange(post_patch_width, dtype=torch.int32, device=device) + .view(1, -1) + .repeat(post_patch_height, 1) + .flatten() + ) + position_ids[i, cap_seq_len:seq_len, 1] = row_ids + position_ids[i, cap_seq_len:seq_len, 2] = col_ids + + # Get combined rotary embeddings + freqs_cis = self._get_freqs_cis(position_ids) + + # create separate rotary embeddings for captions and images + cap_freqs_cis = torch.zeros( + batch_size, encoder_seq_len, freqs_cis.shape[-1], device=device, dtype=freqs_cis.dtype + ) + img_freqs_cis = torch.zeros( + batch_size, image_seq_len, freqs_cis.shape[-1], device=device, dtype=freqs_cis.dtype + ) + + for i, (cap_seq_len, seq_len) in enumerate(zip(l_effective_cap_len, seq_lengths)): + cap_freqs_cis[i, :cap_seq_len] = freqs_cis[i, :cap_seq_len] + img_freqs_cis[i, :image_seq_len] = freqs_cis[i, cap_seq_len:seq_len] + + # image patch embeddings + hidden_states = ( + hidden_states.view(batch_size, channels, post_patch_height, p, post_patch_width, p) + .permute(0, 2, 4, 3, 5, 1) + .flatten(3) + .flatten(1, 2) + ) + + return hidden_states, cap_freqs_cis, img_freqs_cis, freqs_cis, l_effective_cap_len, seq_lengths + + +class Lumina2Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): + r""" + Lumina2NextDiT: Diffusion model with a Transformer backbone. + + Parameters: + sample_size (`int`): The width of the latent images. This is fixed during training since + it is used to learn a number of position embeddings. + patch_size (`int`, *optional*, (`int`, *optional*, defaults to 2): + The size of each patch in the image. This parameter defines the resolution of patches fed into the model. + in_channels (`int`, *optional*, defaults to 4): + The number of input channels for the model. Typically, this matches the number of channels in the input + images. + hidden_size (`int`, *optional*, defaults to 4096): + The dimensionality of the hidden layers in the model. This parameter determines the width of the model's + hidden representations. + num_layers (`int`, *optional*, default to 32): + The number of layers in the model. This defines the depth of the neural network. + num_attention_heads (`int`, *optional*, defaults to 32): + The number of attention heads in each attention layer. This parameter specifies how many separate attention + mechanisms are used. + num_kv_heads (`int`, *optional*, defaults to 8): + The number of key-value heads in the attention mechanism, if different from the number of attention heads. + If None, it defaults to num_attention_heads. + multiple_of (`int`, *optional*, defaults to 256): + A factor that the hidden size should be a multiple of. This can help optimize certain hardware + configurations. + ffn_dim_multiplier (`float`, *optional*): + A multiplier for the dimensionality of the feed-forward network. If None, it uses a default value based on + the model configuration. + norm_eps (`float`, *optional*, defaults to 1e-5): + A small value added to the denominator for numerical stability in normalization layers. + scaling_factor (`float`, *optional*, defaults to 1.0): + A scaling factor applied to certain parameters or layers in the model. This can be used for adjusting the + overall scale of the model's operations. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["Lumina2TransformerBlock"] + _skip_layerwise_casting_patterns = ["x_embedder", "norm"] + + @register_to_config + def __init__( + self, + sample_size: int = 128, + patch_size: int = 2, + in_channels: int = 16, + out_channels: int | None = None, + hidden_size: int = 2304, + num_layers: int = 26, + num_refiner_layers: int = 2, + num_attention_heads: int = 24, + num_kv_heads: int = 8, + multiple_of: int = 256, + ffn_dim_multiplier: float | None = None, + norm_eps: float = 1e-5, + scaling_factor: float = 1.0, + axes_dim_rope: tuple[int, int, int] = (32, 32, 32), + axes_lens: tuple[int, int, int] = (300, 512, 512), + cap_feat_dim: int = 1024, + ) -> None: + super().__init__() + self.out_channels = out_channels or in_channels + + # 1. Positional, patch & conditional embeddings + self.rope_embedder = Lumina2RotaryPosEmbed( + theta=10000, axes_dim=axes_dim_rope, axes_lens=axes_lens, patch_size=patch_size + ) + + self.x_embedder = nn.Linear(in_features=patch_size * patch_size * in_channels, out_features=hidden_size) + + self.time_caption_embed = Lumina2CombinedTimestepCaptionEmbedding( + hidden_size=hidden_size, cap_feat_dim=cap_feat_dim, norm_eps=norm_eps + ) + + # 2. Noise and context refinement blocks + self.noise_refiner = nn.ModuleList( + [ + Lumina2TransformerBlock( + hidden_size, + num_attention_heads, + num_kv_heads, + multiple_of, + ffn_dim_multiplier, + norm_eps, + modulation=True, + ) + for _ in range(num_refiner_layers) + ] + ) + + self.context_refiner = nn.ModuleList( + [ + Lumina2TransformerBlock( + hidden_size, + num_attention_heads, + num_kv_heads, + multiple_of, + ffn_dim_multiplier, + norm_eps, + modulation=False, + ) + for _ in range(num_refiner_layers) + ] + ) + + # 3. Transformer blocks + self.layers = nn.ModuleList( + [ + Lumina2TransformerBlock( + hidden_size, + num_attention_heads, + num_kv_heads, + multiple_of, + ffn_dim_multiplier, + norm_eps, + modulation=True, + ) + for _ in range(num_layers) + ] + ) + + # 4. Output norm & projection + self.norm_out = LuminaLayerNormContinuous( + embedding_dim=hidden_size, + conditioning_embedding_dim=min(hidden_size, 1024), + elementwise_affine=False, + eps=1e-6, + bias=True, + out_dim=patch_size * patch_size * self.out_channels, + ) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_attention_mask: torch.Tensor, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.Tensor | Transformer2DModelOutput: + # 1. Condition, positional & patch embedding + batch_size, _, height, width = hidden_states.shape + + temb, encoder_hidden_states = self.time_caption_embed(hidden_states, timestep, encoder_hidden_states) + + ( + hidden_states, + context_rotary_emb, + noise_rotary_emb, + rotary_emb, + encoder_seq_lengths, + seq_lengths, + ) = self.rope_embedder(hidden_states, encoder_attention_mask) + + hidden_states = self.x_embedder(hidden_states) + + # 2. Context & noise refinement + for layer in self.context_refiner: + encoder_hidden_states = layer(encoder_hidden_states, encoder_attention_mask, context_rotary_emb) + + for layer in self.noise_refiner: + hidden_states = layer(hidden_states, None, noise_rotary_emb, temb) + + # 3. Joint Transformer blocks + max_seq_len = max(seq_lengths) + use_mask = len(set(seq_lengths)) > 1 + + attention_mask = hidden_states.new_zeros(batch_size, max_seq_len, dtype=torch.bool) + joint_hidden_states = hidden_states.new_zeros(batch_size, max_seq_len, self.config.hidden_size) + for i, (encoder_seq_len, seq_len) in enumerate(zip(encoder_seq_lengths, seq_lengths)): + attention_mask[i, :seq_len] = True + joint_hidden_states[i, :encoder_seq_len] = encoder_hidden_states[i, :encoder_seq_len] + joint_hidden_states[i, encoder_seq_len:seq_len] = hidden_states[i] + + hidden_states = joint_hidden_states + + for layer in self.layers: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + layer, hidden_states, attention_mask if use_mask else None, rotary_emb, temb + ) + else: + hidden_states = layer(hidden_states, attention_mask if use_mask else None, rotary_emb, temb) + + # 4. Output norm & projection + hidden_states = self.norm_out(hidden_states, temb) + + # 5. Unpatchify + p = self.config.patch_size + output = [] + for i, (encoder_seq_len, seq_len) in enumerate(zip(encoder_seq_lengths, seq_lengths)): + output.append( + hidden_states[i][encoder_seq_len:seq_len] + .view(height // p, width // p, p, p, self.out_channels) + .permute(4, 0, 2, 1, 3) + .flatten(3, 4) + .flatten(1, 2) + ) + output = torch.stack(output, dim=0) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_mochi.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_mochi.py new file mode 100644 index 0000000000000000000000000000000000000000..31106e3a04761fb185f475e141fe9da080110f00 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_mochi.py @@ -0,0 +1,470 @@ +# Copyright 2025 The Genmo team and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import PeftAdapterMixin +from ...loaders.single_file_model import FromOriginalModelMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import FeedForward +from ..attention_processor import MochiAttention, MochiAttnProcessor2_0 +from ..cache_utils import CacheMixin +from ..embeddings import MochiCombinedTimestepCaptionEmbedding, PatchEmbed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class MochiModulatedRMSNorm(nn.Module): + def __init__(self, eps: float): + super().__init__() + + self.eps = eps + self.norm = RMSNorm(0, eps, False) + + def forward(self, hidden_states, scale=None): + hidden_states_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + + hidden_states = self.norm(hidden_states) + + if scale is not None: + hidden_states = hidden_states * scale + + hidden_states = hidden_states.to(hidden_states_dtype) + + return hidden_states + + +class MochiLayerNormContinuous(nn.Module): + def __init__( + self, + embedding_dim: int, + conditioning_embedding_dim: int, + eps=1e-5, + bias=True, + ): + super().__init__() + + # AdaLN + self.silu = nn.SiLU() + self.linear_1 = nn.Linear(conditioning_embedding_dim, embedding_dim, bias=bias) + self.norm = MochiModulatedRMSNorm(eps=eps) + + def forward( + self, + x: torch.Tensor, + conditioning_embedding: torch.Tensor, + ) -> torch.Tensor: + input_dtype = x.dtype + + # convert back to the original dtype in case `conditioning_embedding`` is upcasted to float32 (needed for hunyuanDiT) + scale = self.linear_1(self.silu(conditioning_embedding).to(x.dtype)) + x = self.norm(x, (1 + scale.unsqueeze(1).to(torch.float32))) + + return x.to(input_dtype) + + +class MochiRMSNormZero(nn.Module): + r""" + Adaptive RMS Norm used in Mochi. + + Parameters: + embedding_dim (`int`): The size of each embedding vector. + """ + + def __init__( + self, embedding_dim: int, hidden_dim: int, eps: float = 1e-5, elementwise_affine: bool = False + ) -> None: + super().__init__() + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, hidden_dim) + self.norm = RMSNorm(0, eps, False) + + def forward( + self, hidden_states: torch.Tensor, emb: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + hidden_states_dtype = hidden_states.dtype + + emb = self.linear(self.silu(emb)) + scale_msa, gate_msa, scale_mlp, gate_mlp = emb.chunk(4, dim=1) + hidden_states = self.norm(hidden_states.to(torch.float32)) * (1 + scale_msa[:, None].to(torch.float32)) + hidden_states = hidden_states.to(hidden_states_dtype) + + return hidden_states, gate_msa, scale_mlp, gate_mlp + + +@maybe_allow_in_graph +class MochiTransformerBlock(nn.Module): + r""" + Transformer block used in [Mochi](https://huggingface.co/genmo/mochi-1-preview). + + Args: + dim (`int`): + The number of channels in the input and output. + num_attention_heads (`int`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`): + The number of channels in each head. + qk_norm (`str`, defaults to `"rms_norm"`): + The normalization layer to use. + activation_fn (`str`, defaults to `"swiglu"`): + Activation function to use in feed-forward. + context_pre_only (`bool`, defaults to `False`): + Whether or not to process context-related conditions with additional layers. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + pooled_projection_dim: int, + qk_norm: str = "rms_norm", + activation_fn: str = "swiglu", + context_pre_only: bool = False, + eps: float = 1e-6, + ) -> None: + super().__init__() + + self.context_pre_only = context_pre_only + self.ff_inner_dim = (4 * dim * 2) // 3 + self.ff_context_inner_dim = (4 * pooled_projection_dim * 2) // 3 + + self.norm1 = MochiRMSNormZero(dim, 4 * dim, eps=eps, elementwise_affine=False) + + if not context_pre_only: + self.norm1_context = MochiRMSNormZero(dim, 4 * pooled_projection_dim, eps=eps, elementwise_affine=False) + else: + self.norm1_context = MochiLayerNormContinuous( + embedding_dim=pooled_projection_dim, + conditioning_embedding_dim=dim, + eps=eps, + ) + + self.attn1 = MochiAttention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + bias=False, + added_kv_proj_dim=pooled_projection_dim, + added_proj_bias=False, + out_dim=dim, + out_context_dim=pooled_projection_dim, + context_pre_only=context_pre_only, + processor=MochiAttnProcessor2_0(), + eps=1e-5, + ) + + # TODO(aryan): norm_context layers are not needed when `context_pre_only` is True + self.norm2 = MochiModulatedRMSNorm(eps=eps) + self.norm2_context = MochiModulatedRMSNorm(eps=eps) if not self.context_pre_only else None + + self.norm3 = MochiModulatedRMSNorm(eps) + self.norm3_context = MochiModulatedRMSNorm(eps=eps) if not self.context_pre_only else None + + self.ff = FeedForward(dim, inner_dim=self.ff_inner_dim, activation_fn=activation_fn, bias=False) + self.ff_context = None + if not context_pre_only: + self.ff_context = FeedForward( + pooled_projection_dim, + inner_dim=self.ff_context_inner_dim, + activation_fn=activation_fn, + bias=False, + ) + + self.norm4 = MochiModulatedRMSNorm(eps=eps) + self.norm4_context = MochiModulatedRMSNorm(eps=eps) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + encoder_attention_mask: torch.Tensor, + image_rotary_emb: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + norm_hidden_states, gate_msa, scale_mlp, gate_mlp = self.norm1(hidden_states, temb) + + if not self.context_pre_only: + norm_encoder_hidden_states, enc_gate_msa, enc_scale_mlp, enc_gate_mlp = self.norm1_context( + encoder_hidden_states, temb + ) + else: + norm_encoder_hidden_states = self.norm1_context(encoder_hidden_states, temb) + + attn_hidden_states, context_attn_hidden_states = self.attn1( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + attention_mask=encoder_attention_mask, + ) + + hidden_states = hidden_states + self.norm2(attn_hidden_states, torch.tanh(gate_msa).unsqueeze(1)) + norm_hidden_states = self.norm3(hidden_states, (1 + scale_mlp.unsqueeze(1).to(torch.float32))) + ff_output = self.ff(norm_hidden_states) + hidden_states = hidden_states + self.norm4(ff_output, torch.tanh(gate_mlp).unsqueeze(1)) + + if not self.context_pre_only: + encoder_hidden_states = encoder_hidden_states + self.norm2_context( + context_attn_hidden_states, torch.tanh(enc_gate_msa).unsqueeze(1) + ) + norm_encoder_hidden_states = self.norm3_context( + encoder_hidden_states, (1 + enc_scale_mlp.unsqueeze(1).to(torch.float32)) + ) + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + self.norm4_context( + context_ff_output, torch.tanh(enc_gate_mlp).unsqueeze(1) + ) + + return hidden_states, encoder_hidden_states + + +class MochiRoPE(nn.Module): + r""" + RoPE implementation used in [Mochi](https://huggingface.co/genmo/mochi-1-preview). + + Args: + base_height (`int`, defaults to `192`): + Base height used to compute interpolation scale for rotary positional embeddings. + base_width (`int`, defaults to `192`): + Base width used to compute interpolation scale for rotary positional embeddings. + """ + + def __init__(self, base_height: int = 192, base_width: int = 192) -> None: + super().__init__() + + self.target_area = base_height * base_width + + def _centers(self, start, stop, num, device, dtype) -> torch.Tensor: + edges = torch.linspace(start, stop, num + 1, device=device, dtype=dtype) + return (edges[:-1] + edges[1:]) / 2 + + def _get_positions( + self, + num_frames: int, + height: int, + width: int, + device: torch.device | None = None, + dtype: torch.dtype | None = None, + ) -> torch.Tensor: + scale = (self.target_area / (height * width)) ** 0.5 + + t = torch.arange(num_frames, device=device, dtype=dtype) + h = self._centers(-height * scale / 2, height * scale / 2, height, device, dtype) + w = self._centers(-width * scale / 2, width * scale / 2, width, device, dtype) + + grid_t, grid_h, grid_w = torch.meshgrid(t, h, w, indexing="ij") + + positions = torch.stack([grid_t, grid_h, grid_w], dim=-1).view(-1, 3) + return positions + + def _create_rope(self, freqs: torch.Tensor, pos: torch.Tensor) -> torch.Tensor: + with torch.autocast(freqs.device.type, torch.float32): + # Always run ROPE freqs computation in FP32 + freqs = torch.einsum("nd,dhf->nhf", pos.to(torch.float32), freqs.to(torch.float32)) + + freqs_cos = torch.cos(freqs) + freqs_sin = torch.sin(freqs) + return freqs_cos, freqs_sin + + def forward( + self, + pos_frequencies: torch.Tensor, + num_frames: int, + height: int, + width: int, + device: torch.device | None = None, + dtype: torch.dtype | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + pos = self._get_positions(num_frames, height, width, device, dtype) + rope_cos, rope_sin = self._create_rope(pos_frequencies, pos) + return rope_cos, rope_sin + + +@maybe_allow_in_graph +class MochiTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin): + r""" + A Transformer model for video-like data introduced in [Mochi](https://huggingface.co/genmo/mochi-1-preview). + + Args: + patch_size (`int`, defaults to `2`): + The size of the patches to use in the patch embedding layer. + num_attention_heads (`int`, defaults to `24`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + num_layers (`int`, defaults to `48`): + The number of layers of Transformer blocks to use. + in_channels (`int`, defaults to `12`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `None`): + The number of channels in the output. + qk_norm (`str`, defaults to `"rms_norm"`): + The normalization layer to use. + text_embed_dim (`int`, defaults to `4096`): + Input dimension of text embeddings from the text encoder. + time_embed_dim (`int`, defaults to `256`): + Output dimension of timestep embeddings. + activation_fn (`str`, defaults to `"swiglu"`): + Activation function to use in feed-forward. + max_sequence_length (`int`, defaults to `256`): + The maximum sequence length of text embeddings supported. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["MochiTransformerBlock"] + _skip_layerwise_casting_patterns = ["patch_embed", "norm"] + + @register_to_config + def __init__( + self, + patch_size: int = 2, + num_attention_heads: int = 24, + attention_head_dim: int = 128, + num_layers: int = 48, + pooled_projection_dim: int = 1536, + in_channels: int = 12, + out_channels: int | None = None, + qk_norm: str = "rms_norm", + text_embed_dim: int = 4096, + time_embed_dim: int = 256, + activation_fn: str = "swiglu", + max_sequence_length: int = 256, + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + self.patch_embed = PatchEmbed( + patch_size=patch_size, + in_channels=in_channels, + embed_dim=inner_dim, + pos_embed_type=None, + ) + + self.time_embed = MochiCombinedTimestepCaptionEmbedding( + embedding_dim=inner_dim, + pooled_projection_dim=pooled_projection_dim, + text_embed_dim=text_embed_dim, + time_embed_dim=time_embed_dim, + num_attention_heads=8, + ) + + self.pos_frequencies = nn.Parameter(torch.full((3, num_attention_heads, attention_head_dim // 2), 0.0)) + self.rope = MochiRoPE() + + self.transformer_blocks = nn.ModuleList( + [ + MochiTransformerBlock( + dim=inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + pooled_projection_dim=pooled_projection_dim, + qk_norm=qk_norm, + activation_fn=activation_fn, + context_pre_only=i == num_layers - 1, + ) + for i in range(num_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous( + inner_dim, + inner_dim, + elementwise_affine=False, + eps=1e-6, + norm_type="layer_norm", + ) + self.proj_out = nn.Linear(inner_dim, patch_size * patch_size * out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_attention_mask: torch.Tensor, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p = self.config.patch_size + + post_patch_height = height // p + post_patch_width = width // p + + temb, encoder_hidden_states = self.time_embed( + timestep, + encoder_hidden_states, + encoder_attention_mask, + hidden_dtype=hidden_states.dtype, + ) + + hidden_states = hidden_states.permute(0, 2, 1, 3, 4).flatten(0, 1) + hidden_states = self.patch_embed(hidden_states) + hidden_states = hidden_states.unflatten(0, (batch_size, -1)).flatten(1, 2) + + image_rotary_emb = self.rope( + self.pos_frequencies, + num_frames, + post_patch_height, + post_patch_width, + device=hidden_states.device, + dtype=torch.float32, + ) + + for i, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states, encoder_hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + encoder_attention_mask, + image_rotary_emb, + ) + else: + hidden_states, encoder_hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + encoder_attention_mask=encoder_attention_mask, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape(batch_size, num_frames, post_patch_height, post_patch_width, p, p, -1) + hidden_states = hidden_states.permute(0, 6, 1, 2, 4, 3, 5) + output = hidden_states.reshape(batch_size, -1, num_frames, height, width) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_omnigen.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_omnigen.py new file mode 100644 index 0000000000000000000000000000000000000000..bd8bb107e25c22385465f028b25a184a6e42264e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_omnigen.py @@ -0,0 +1,468 @@ +# Copyright 2025 OmniGen team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ..attention_processor import Attention +from ..embeddings import TimestepEmbedding, Timesteps, get_2d_sincos_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNorm, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class OmniGenFeedForward(nn.Module): + def __init__(self, hidden_size: int, intermediate_size: int): + super().__init__() + + self.gate_up_proj = nn.Linear(hidden_size, 2 * intermediate_size, bias=False) + self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=False) + self.activation_fn = nn.SiLU() + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + up_states = self.gate_up_proj(hidden_states) + gate, up_states = up_states.chunk(2, dim=-1) + up_states = up_states * self.activation_fn(gate) + return self.down_proj(up_states) + + +class OmniGenPatchEmbed(nn.Module): + def __init__( + self, + patch_size: int = 2, + in_channels: int = 4, + embed_dim: int = 768, + bias: bool = True, + interpolation_scale: float = 1, + pos_embed_max_size: int = 192, + base_size: int = 64, + ): + super().__init__() + + self.output_image_proj = nn.Conv2d( + in_channels, embed_dim, kernel_size=(patch_size, patch_size), stride=patch_size, bias=bias + ) + self.input_image_proj = nn.Conv2d( + in_channels, embed_dim, kernel_size=(patch_size, patch_size), stride=patch_size, bias=bias + ) + + self.patch_size = patch_size + self.interpolation_scale = interpolation_scale + self.pos_embed_max_size = pos_embed_max_size + + pos_embed = get_2d_sincos_pos_embed( + embed_dim, + self.pos_embed_max_size, + base_size=base_size, + interpolation_scale=self.interpolation_scale, + output_type="pt", + ) + self.register_buffer("pos_embed", pos_embed.float().unsqueeze(0), persistent=True) + + def _cropped_pos_embed(self, height, width): + """Crops positional embeddings for SD3 compatibility.""" + if self.pos_embed_max_size is None: + raise ValueError("`pos_embed_max_size` must be set for cropping.") + + height = height // self.patch_size + width = width // self.patch_size + if height > self.pos_embed_max_size: + raise ValueError( + f"Height ({height}) cannot be greater than `pos_embed_max_size`: {self.pos_embed_max_size}." + ) + if width > self.pos_embed_max_size: + raise ValueError( + f"Width ({width}) cannot be greater than `pos_embed_max_size`: {self.pos_embed_max_size}." + ) + + top = (self.pos_embed_max_size - height) // 2 + left = (self.pos_embed_max_size - width) // 2 + spatial_pos_embed = self.pos_embed.reshape(1, self.pos_embed_max_size, self.pos_embed_max_size, -1) + spatial_pos_embed = spatial_pos_embed[:, top : top + height, left : left + width, :] + spatial_pos_embed = spatial_pos_embed.reshape(1, -1, spatial_pos_embed.shape[-1]) + return spatial_pos_embed + + def _patch_embeddings(self, hidden_states: torch.Tensor, is_input_image: bool) -> torch.Tensor: + if is_input_image: + hidden_states = self.input_image_proj(hidden_states) + else: + hidden_states = self.output_image_proj(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) + return hidden_states + + def forward( + self, hidden_states: torch.Tensor, is_input_image: bool, padding_latent: torch.Tensor = None + ) -> torch.Tensor: + if isinstance(hidden_states, list): + if padding_latent is None: + padding_latent = [None] * len(hidden_states) + patched_latents = [] + for sub_latent, padding in zip(hidden_states, padding_latent): + height, width = sub_latent.shape[-2:] + sub_latent = self._patch_embeddings(sub_latent, is_input_image) + pos_embed = self._cropped_pos_embed(height, width) + sub_latent = sub_latent + pos_embed + if padding is not None: + sub_latent = torch.cat([sub_latent, padding.to(sub_latent.device)], dim=-2) + patched_latents.append(sub_latent) + else: + height, width = hidden_states.shape[-2:] + pos_embed = self._cropped_pos_embed(height, width) + hidden_states = self._patch_embeddings(hidden_states, is_input_image) + patched_latents = hidden_states + pos_embed + + return patched_latents + + +class OmniGenSuScaledRotaryEmbedding(nn.Module): + def __init__( + self, dim, max_position_embeddings=131072, original_max_position_embeddings=4096, base=10000, rope_scaling=None + ): + super().__init__() + + self.dim = dim + self.max_position_embeddings = max_position_embeddings + self.base = base + + inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2, dtype=torch.int64).float() / self.dim)) + self.register_buffer("inv_freq", tensor=inv_freq, persistent=False) + + self.short_factor = rope_scaling["short_factor"] + self.long_factor = rope_scaling["long_factor"] + self.original_max_position_embeddings = original_max_position_embeddings + + def forward(self, hidden_states, position_ids): + seq_len = torch.max(position_ids) + 1 + if seq_len > self.original_max_position_embeddings: + ext_factors = torch.tensor(self.long_factor, dtype=torch.float32, device=hidden_states.device) + else: + ext_factors = torch.tensor(self.short_factor, dtype=torch.float32, device=hidden_states.device) + + inv_freq_shape = ( + torch.arange(0, self.dim, 2, dtype=torch.int64, device=hidden_states.device).float() / self.dim + ) + self.inv_freq = 1.0 / (ext_factors * self.base**inv_freq_shape) + + inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1) + position_ids_expanded = position_ids[:, None, :].float() + + # Force float32 since bfloat16 loses precision on long contexts + # See https://github.com/huggingface/transformers/pull/29285 + device_type = hidden_states.device.type + device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu" + with torch.autocast(device_type=device_type, enabled=False): + freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) + emb = torch.cat((freqs, freqs), dim=-1)[0] + + scale = self.max_position_embeddings / self.original_max_position_embeddings + if scale <= 1.0: + scaling_factor = 1.0 + else: + scaling_factor = math.sqrt(1 + math.log(scale) / math.log(self.original_max_position_embeddings)) + + cos = emb.cos() * scaling_factor + sin = emb.sin() * scaling_factor + return cos, sin + + +class OmniGenAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is + used in the OmniGen model. + """ + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = hidden_states.shape + + # Get Query-Key-Value Pair + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + bsz, q_len, query_dim = query.size() + inner_dim = key.shape[-1] + head_dim = query_dim // attn.heads + + # Get key-value heads + kv_heads = inner_dim // head_dim + + query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + key = key.view(batch_size, -1, kv_heads, head_dim).transpose(1, 2) + value = value.view(batch_size, -1, kv_heads, head_dim).transpose(1, 2) + + # Apply RoPE if needed + if image_rotary_emb is not None: + from ..embeddings import apply_rotary_emb + + query = apply_rotary_emb(query, image_rotary_emb, use_real_unbind_dim=-2) + key = apply_rotary_emb(key, image_rotary_emb, use_real_unbind_dim=-2) + + hidden_states = F.scaled_dot_product_attention(query, key, value, attn_mask=attention_mask) + hidden_states = hidden_states.transpose(1, 2).type_as(query) + hidden_states = hidden_states.reshape(bsz, q_len, attn.out_dim) + hidden_states = attn.to_out[0](hidden_states) + return hidden_states + + +class OmniGenBlock(nn.Module): + def __init__( + self, + hidden_size: int, + num_attention_heads: int, + num_key_value_heads: int, + intermediate_size: int, + rms_norm_eps: float, + ) -> None: + super().__init__() + + self.input_layernorm = RMSNorm(hidden_size, eps=rms_norm_eps) + self.self_attn = Attention( + query_dim=hidden_size, + cross_attention_dim=hidden_size, + dim_head=hidden_size // num_attention_heads, + heads=num_attention_heads, + kv_heads=num_key_value_heads, + bias=False, + out_dim=hidden_size, + out_bias=False, + processor=OmniGenAttnProcessor2_0(), + ) + self.post_attention_layernorm = RMSNorm(hidden_size, eps=rms_norm_eps) + self.mlp = OmniGenFeedForward(hidden_size, intermediate_size) + + def forward( + self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, image_rotary_emb: torch.Tensor + ) -> torch.Tensor: + # 1. Attention + norm_hidden_states = self.input_layernorm(hidden_states) + attn_output = self.self_attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + hidden_states = hidden_states + attn_output + + # 2. Feed Forward + norm_hidden_states = self.post_attention_layernorm(hidden_states) + ff_output = self.mlp(norm_hidden_states) + hidden_states = hidden_states + ff_output + return hidden_states + + +class OmniGenTransformer2DModel(ModelMixin, ConfigMixin): + """ + The Transformer model introduced in OmniGen (https://huggingface.co/papers/2409.11340). + + Parameters: + in_channels (`int`, defaults to `4`): + The number of channels in the input. + patch_size (`int`, defaults to `2`): + The size of the spatial patches to use in the patch embedding layer. + hidden_size (`int`, defaults to `3072`): + The dimensionality of the hidden layers in the model. + rms_norm_eps (`float`, defaults to `1e-5`): + Eps for RMSNorm layer. + num_attention_heads (`int`, defaults to `32`): + The number of heads to use for multi-head attention. + num_key_value_heads (`int`, defaults to `32`): + The number of heads to use for keys and values in multi-head attention. + intermediate_size (`int`, defaults to `8192`): + Dimension of the hidden layer in FeedForward layers. + num_layers (`int`, default to `32`): + The number of layers of transformer blocks to use. + pad_token_id (`int`, default to `32000`): + The id of the padding token. + vocab_size (`int`, default to `32064`): + The size of the vocabulary of the embedding vocabulary. + rope_base (`int`, default to `10000`): + The default theta value to use when creating RoPE. + rope_scaling (`dict`, optional): + The scaling factors for the RoPE. Must contain `short_factor` and `long_factor`. + pos_embed_max_size (`int`, default to `192`): + The maximum size of the positional embeddings. + time_step_dim (`int`, default to `256`): + Output dimension of timestep embeddings. + flip_sin_to_cos (`bool`, default to `True`): + Whether to flip the sin and cos in the positional embeddings when preparing timestep embeddings. + downscale_freq_shift (`int`, default to `0`): + The frequency shift to use when downscaling the timestep embeddings. + timestep_activation_fn (`str`, default to `silu`): + The activation function to use for the timestep embeddings. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["OmniGenBlock"] + _skip_layerwise_casting_patterns = ["patch_embedding", "embed_tokens", "norm"] + + @register_to_config + def __init__( + self, + in_channels: int = 4, + patch_size: int = 2, + hidden_size: int = 3072, + rms_norm_eps: float = 1e-5, + num_attention_heads: int = 32, + num_key_value_heads: int = 32, + intermediate_size: int = 8192, + num_layers: int = 32, + pad_token_id: int = 32000, + vocab_size: int = 32064, + max_position_embeddings: int = 131072, + original_max_position_embeddings: int = 4096, + rope_base: int = 10000, + rope_scaling: dict = None, + pos_embed_max_size: int = 192, + time_step_dim: int = 256, + flip_sin_to_cos: bool = True, + downscale_freq_shift: int = 0, + timestep_activation_fn: str = "silu", + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = in_channels + + self.patch_embedding = OmniGenPatchEmbed( + patch_size=patch_size, + in_channels=in_channels, + embed_dim=hidden_size, + pos_embed_max_size=pos_embed_max_size, + ) + + self.time_proj = Timesteps(time_step_dim, flip_sin_to_cos, downscale_freq_shift) + self.time_token = TimestepEmbedding(time_step_dim, hidden_size, timestep_activation_fn) + self.t_embedder = TimestepEmbedding(time_step_dim, hidden_size, timestep_activation_fn) + + self.embed_tokens = nn.Embedding(vocab_size, hidden_size, pad_token_id) + self.rope = OmniGenSuScaledRotaryEmbedding( + hidden_size // num_attention_heads, + max_position_embeddings=max_position_embeddings, + original_max_position_embeddings=original_max_position_embeddings, + base=rope_base, + rope_scaling=rope_scaling, + ) + + self.layers = nn.ModuleList( + [ + OmniGenBlock(hidden_size, num_attention_heads, num_key_value_heads, intermediate_size, rms_norm_eps) + for _ in range(num_layers) + ] + ) + + self.norm = RMSNorm(hidden_size, eps=rms_norm_eps) + self.norm_out = AdaLayerNorm(hidden_size, norm_elementwise_affine=False, norm_eps=1e-6, chunk_dim=1) + self.proj_out = nn.Linear(hidden_size, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + + def _get_multimodal_embeddings( + self, input_ids: torch.Tensor, input_img_latents: list[torch.Tensor], input_image_sizes: dict + ) -> torch.Tensor | None: + if input_ids is None: + return None + + input_img_latents = [x.to(self.dtype) for x in input_img_latents] + condition_tokens = self.embed_tokens(input_ids) + input_img_inx = 0 + input_image_tokens = self.patch_embedding(input_img_latents, is_input_image=True) + for b_inx in input_image_sizes.keys(): + for start_inx, end_inx in input_image_sizes[b_inx]: + # replace the placeholder in text tokens with the image embedding. + condition_tokens[b_inx, start_inx:end_inx] = input_image_tokens[input_img_inx].to( + condition_tokens.dtype + ) + input_img_inx += 1 + return condition_tokens + + def forward( + self, + hidden_states: torch.Tensor, + timestep: int | float | torch.FloatTensor, + input_ids: torch.Tensor, + input_img_latents: list[torch.Tensor], + input_image_sizes: dict[int, list[int]], + attention_mask: torch.Tensor, + position_ids: torch.Tensor, + return_dict: bool = True, + ) -> Transformer2DModelOutput | tuple[torch.Tensor]: + batch_size, num_channels, height, width = hidden_states.shape + p = self.config.patch_size + post_patch_height, post_patch_width = height // p, width // p + + # 1. Patch & Timestep & Conditional Embedding + hidden_states = self.patch_embedding(hidden_states, is_input_image=False) + num_tokens_for_output_image = hidden_states.size(1) + + timestep_proj = self.time_proj(timestep).type_as(hidden_states) + time_token = self.time_token(timestep_proj).unsqueeze(1) + temb = self.t_embedder(timestep_proj) + + condition_tokens = self._get_multimodal_embeddings(input_ids, input_img_latents, input_image_sizes) + if condition_tokens is not None: + hidden_states = torch.cat([condition_tokens, time_token, hidden_states], dim=1) + else: + hidden_states = torch.cat([time_token, hidden_states], dim=1) + + seq_length = hidden_states.size(1) + position_ids = position_ids.view(-1, seq_length).long() + + # 2. Attention mask preprocessing + if attention_mask is not None and attention_mask.dim() == 3: + dtype = hidden_states.dtype + min_dtype = torch.finfo(dtype).min + attention_mask = (1 - attention_mask) * min_dtype + attention_mask = attention_mask.unsqueeze(1).type_as(hidden_states) + + # 3. Rotary position embedding + image_rotary_emb = self.rope(hidden_states, position_ids) + + # 4. Transformer blocks + for block in self.layers: + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, hidden_states, attention_mask, image_rotary_emb + ) + else: + hidden_states = block(hidden_states, attention_mask=attention_mask, image_rotary_emb=image_rotary_emb) + + # 5. Output norm & projection + hidden_states = self.norm(hidden_states) + hidden_states = hidden_states[:, -num_tokens_for_output_image:] + hidden_states = self.norm_out(hidden_states, temb=temb) + hidden_states = self.proj_out(hidden_states) + hidden_states = hidden_states.reshape(batch_size, post_patch_height, post_patch_width, p, p, -1) + output = hidden_states.permute(0, 5, 1, 3, 2, 4).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ovis_image.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ovis_image.py new file mode 100644 index 0000000000000000000000000000000000000000..8280a4871f104587e874b653316a623a21047897 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_ovis_image.py @@ -0,0 +1,577 @@ +# Copyright 2025 Alibaba Ovis-Image Team and The HuggingFace. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import TimestepEmbedding, Timesteps, apply_rotary_emb, get_1d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _get_projections(attn: "OvisImageAttention", hidden_states, encoder_hidden_states=None): + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + encoder_query = encoder_key = encoder_value = None + if encoder_hidden_states is not None and attn.added_kv_proj_dim is not None: + encoder_query = attn.add_q_proj(encoder_hidden_states) + encoder_key = attn.add_k_proj(encoder_hidden_states) + encoder_value = attn.add_v_proj(encoder_hidden_states) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_fused_projections(attn: "OvisImageAttention", hidden_states, encoder_hidden_states=None): + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + + encoder_query = encoder_key = encoder_value = (None,) + if encoder_hidden_states is not None and hasattr(attn, "to_added_qkv"): + encoder_query, encoder_key, encoder_value = attn.to_added_qkv(encoder_hidden_states).chunk(3, dim=-1) + + return query, key, value, encoder_query, encoder_key, encoder_value + + +def _get_qkv_projections(attn: "OvisImageAttention", hidden_states, encoder_hidden_states=None): + if attn.fused_projections: + return _get_fused_projections(attn, hidden_states, encoder_hidden_states) + return _get_projections(attn, hidden_states, encoder_hidden_states) + + +class OvisImageAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError(f"{self.__class__.__name__} requires PyTorch 2.0. Please upgrade your pytorch version.") + + def __call__( + self, + attn: "OvisImageAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + query, key, value, encoder_query, encoder_key, encoder_value = _get_qkv_projections( + attn, hidden_states, encoder_hidden_states + ) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + if attn.added_kv_proj_dim is not None: + encoder_query = encoder_query.unflatten(-1, (attn.heads, -1)) + encoder_key = encoder_key.unflatten(-1, (attn.heads, -1)) + encoder_value = encoder_value.unflatten(-1, (attn.heads, -1)) + + encoder_query = attn.norm_added_q(encoder_query) + encoder_key = attn.norm_added_k(encoder_key) + + query = torch.cat([encoder_query, query], dim=1) + key = torch.cat([encoder_key, key], dim=1) + value = torch.cat([encoder_value, value], dim=1) + + if image_rotary_emb is not None: + query = apply_rotary_emb(query, image_rotary_emb, sequence_dim=1) + key = apply_rotary_emb(key, image_rotary_emb, sequence_dim=1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(query.dtype) + + if encoder_hidden_states is not None: + encoder_hidden_states, hidden_states = hidden_states.split_with_sizes( + [encoder_hidden_states.shape[1], hidden_states.shape[1] - encoder_hidden_states.shape[1]], dim=1 + ) + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + encoder_hidden_states = attn.to_add_out(encoder_hidden_states) + + return hidden_states, encoder_hidden_states + else: + return hidden_states + + +class OvisImageAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = OvisImageAttnProcessor + _available_processors = [ + OvisImageAttnProcessor, + ] + + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias: bool = False, + added_kv_proj_dim: int | None = None, + added_proj_bias: bool | None = True, + out_bias: bool = True, + eps: float = 1e-5, + out_dim: int = None, + context_pre_only: bool | None = None, + pre_only: bool = False, + elementwise_affine: bool = True, + processor=None, + ): + super().__init__() + + self.head_dim = dim_head + self.inner_dim = out_dim if out_dim is not None else dim_head * heads + self.query_dim = query_dim + self.use_bias = bias + self.dropout = dropout + self.out_dim = out_dim if out_dim is not None else query_dim + self.context_pre_only = context_pre_only + self.pre_only = pre_only + self.heads = out_dim // dim_head if out_dim is not None else heads + self.added_kv_proj_dim = added_kv_proj_dim + self.added_proj_bias = added_proj_bias + + self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=elementwise_affine) + self.to_q = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + self.to_v = torch.nn.Linear(query_dim, self.inner_dim, bias=bias) + + if not self.pre_only: + self.to_out = torch.nn.ModuleList([]) + self.to_out.append(torch.nn.Linear(self.inner_dim, self.out_dim, bias=out_bias)) + self.to_out.append(torch.nn.Dropout(dropout)) + + if added_kv_proj_dim is not None: + self.norm_added_q = torch.nn.RMSNorm(dim_head, eps=eps) + self.norm_added_k = torch.nn.RMSNorm(dim_head, eps=eps) + self.add_q_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=added_proj_bias) + self.to_add_out = torch.nn.Linear(self.inner_dim, query_dim, bias=out_bias) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + attn_parameters = set(inspect.signature(self.processor.__call__).parameters.keys()) + quiet_attn_parameters = {"ip_adapter_masks", "ip_hidden_states"} + unused_kwargs = [k for k, _ in kwargs.items() if k not in attn_parameters and k not in quiet_attn_parameters] + if len(unused_kwargs) > 0: + logger.warning( + f"joint_attention_kwargs {unused_kwargs} are not expected by {self.processor.__class__.__name__} and will be ignored." + ) + kwargs = {k: w for k, w in kwargs.items() if k in attn_parameters} + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, image_rotary_emb, **kwargs) + + +@maybe_allow_in_graph +class OvisImageSingleTransformerBlock(nn.Module): + def __init__(self, dim: int, num_attention_heads: int, attention_head_dim: int, mlp_ratio: float = 4.0): + super().__init__() + self.mlp_hidden_dim = int(dim * mlp_ratio) + + self.norm = AdaLayerNormZeroSingle(dim) + self.proj_mlp = nn.Linear(dim, self.mlp_hidden_dim * 2) + self.act_mlp = nn.SiLU() + self.proj_out = nn.Linear(dim + self.mlp_hidden_dim, dim) + + self.attn = OvisImageAttention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + processor=OvisImageAttnProcessor(), + eps=1e-6, + pre_only=True, + ) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + text_seq_len = encoder_hidden_states.shape[1] + hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1) + + residual = hidden_states + norm_hidden_states, gate = self.norm(hidden_states, emb=temb) + mlp_hidden_states, mlp_hidden_gate = torch.split( + self.proj_mlp(norm_hidden_states), [self.mlp_hidden_dim, self.mlp_hidden_dim], dim=-1 + ) + mlp_hidden_states = self.act_mlp(mlp_hidden_gate) * mlp_hidden_states + joint_attention_kwargs = joint_attention_kwargs or {} + attn_output = self.attn( + hidden_states=norm_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2) + gate = gate.unsqueeze(1) + hidden_states = gate * self.proj_out(hidden_states) + hidden_states = residual + hidden_states + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + encoder_hidden_states, hidden_states = hidden_states[:, :text_seq_len], hidden_states[:, text_seq_len:] + return encoder_hidden_states, hidden_states + + +@maybe_allow_in_graph +class OvisImageTransformerBlock(nn.Module): + def __init__( + self, dim: int, num_attention_heads: int, attention_head_dim: int, qk_norm: str = "rms_norm", eps: float = 1e-6 + ): + super().__init__() + + self.norm1 = AdaLayerNormZero(dim) + self.norm1_context = AdaLayerNormZero(dim) + + self.attn = OvisImageAttention( + query_dim=dim, + added_kv_proj_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=False, + bias=True, + processor=OvisImageAttnProcessor(), + eps=eps, + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="swiglu") + + self.norm2_context = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff_context = FeedForward(dim=dim, dim_out=dim, activation_fn="swiglu") + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + + norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = self.norm1_context( + encoder_hidden_states, emb=temb + ) + joint_attention_kwargs = joint_attention_kwargs or {} + + # Attention. + attention_outputs = self.attn( + hidden_states=norm_hidden_states, + encoder_hidden_states=norm_encoder_hidden_states, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + if len(attention_outputs) == 2: + attn_output, context_attn_output = attention_outputs + elif len(attention_outputs) == 3: + attn_output, context_attn_output, ip_attn_output = attention_outputs + + # Process attention outputs for the `hidden_states`. + attn_output = gate_msa.unsqueeze(1) * attn_output + hidden_states = hidden_states + attn_output + + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None] + + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp.unsqueeze(1) * ff_output + + hidden_states = hidden_states + ff_output + if len(attention_outputs) == 3: + hidden_states = hidden_states + ip_attn_output + + # Process attention outputs for the `encoder_hidden_states`. + context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output + encoder_hidden_states = encoder_hidden_states + context_attn_output + + norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states) + norm_encoder_hidden_states = norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None] + + context_ff_output = self.ff_context(norm_encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states + c_gate_mlp.unsqueeze(1) * context_ff_output + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + + return encoder_hidden_states, hidden_states + + +class OvisImagePosEmbed(nn.Module): + def __init__(self, theta: int, axes_dim: list[int]): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + n_axes = ids.shape[-1] + cos_out = [] + sin_out = [] + pos = ids.float() + is_mps = ids.device.type == "mps" + is_npu = ids.device.type == "npu" + freqs_dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + for i in range(n_axes): + cos, sin = get_1d_rotary_pos_embed( + self.axes_dim[i], + pos[:, i], + theta=self.theta, + repeat_interleave_real=True, + use_real=True, + freqs_dtype=freqs_dtype, + ) + cos_out.append(cos) + sin_out.append(sin) + freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device) + freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device) + return freqs_cos, freqs_sin + + +class OvisImageTransformer2DModel( + ModelMixin, + ConfigMixin, + PeftAdapterMixin, + FromOriginalModelMixin, + CacheMixin, +): + """ + The Transformer model introduced in Ovis-Image. + + Reference: https://github.com/AIDC-AI/Ovis-Image + + Args: + patch_size (`int`, defaults to `1`): + Patch size to turn the input data into small patches. + in_channels (`int`, defaults to `64`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `None`): + The number of channels in the output. If not specified, it defaults to `in_channels`. + num_layers (`int`, defaults to `6`): + The number of layers of dual stream DiT blocks to use. + num_single_layers (`int`, defaults to `27`): + The number of layers of single stream DiT blocks to use. + attention_head_dim (`int`, defaults to `128`): + The number of dimensions to use for each attention head. + num_attention_heads (`int`, defaults to `24`): + The number of attention heads to use. + joint_attention_dim (`int`, defaults to `2048`): + The number of dimensions to use for the joint attention (embedding/channel dimension of + `encoder_hidden_states`). + axes_dims_rope (`tuple[int]`, defaults to `(16, 56, 56)`): + The dimensions to use for the rotary positional embeddings. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["OvisImageTransformerBlock", "OvisImageSingleTransformerBlock"] + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + _repeated_blocks = ["OvisImageTransformerBlock", "OvisImageSingleTransformerBlock"] + + @register_to_config + def __init__( + self, + patch_size: int = 1, + in_channels: int = 64, + out_channels: int | None = 64, + num_layers: int = 6, + num_single_layers: int = 27, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 2048, + axes_dims_rope: tuple[int, int, int] = (16, 56, 56), + ): + super().__init__() + self.out_channels = out_channels or in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + self.pos_embed = OvisImagePosEmbed(theta=10000, axes_dim=axes_dims_rope) + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=self.inner_dim) + + self.context_embedder_norm = nn.RMSNorm(joint_attention_dim, eps=1e-6) + self.context_embedder = nn.Linear(joint_attention_dim, self.inner_dim) + self.x_embedder = nn.Linear(in_channels, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + OvisImageTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for _ in range(num_layers) + ] + ) + + self.single_transformer_blocks = nn.ModuleList( + [ + OvisImageSingleTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + ) + for _ in range(num_single_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_ids: torch.Tensor = None, + txt_ids: torch.Tensor = None, + return_dict: bool = True, + ) -> torch.Tensor | Transformer2DModelOutput: + """ + The [`OvisImageTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`): + Input `hidden_states`. + encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + timestep (`torch.LongTensor`): + Used to indicate denoising step. + img_ids: (`torch.Tensor`): + The position ids for image tokens. + txt_ids (`torch.Tensor`): + The position ids for text tokens. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + hidden_states = self.x_embedder(hidden_states) + + timestep = timestep.to(hidden_states.dtype) * 1000 + + timesteps_proj = self.time_proj(timestep) + temb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_states.dtype)) + + encoder_hidden_states = self.context_embedder_norm(encoder_hidden_states) + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + if txt_ids.ndim == 3: + logger.warning( + "Passing `txt_ids` 3d torch.Tensor is deprecated." + "Please remove the batch dimension and pass it as a 2d torch Tensor" + ) + txt_ids = txt_ids[0] + if img_ids.ndim == 3: + logger.warning( + "Passing `img_ids` 3d torch.Tensor is deprecated." + "Please remove the batch dimension and pass it as a 2d torch Tensor" + ) + img_ids = img_ids[0] + + ids = torch.cat((txt_ids, img_ids), dim=0) + image_rotary_emb = self.pos_embed(ids) + + for index_block, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + + for index_block, block in enumerate(self.single_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + image_rotary_emb, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + image_rotary_emb=image_rotary_emb, + ) + + hidden_states = self.norm_out(hidden_states, temb) + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_prx.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_prx.py new file mode 100644 index 0000000000000000000000000000000000000000..3c8e8ae4e2c974f7aa68c5425e28130064c01a51 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_prx.py @@ -0,0 +1,801 @@ +# Copyright 2025 The Photoroom and The HuggingFace Teams. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import logging +from ..attention import AttentionMixin, AttentionModuleMixin +from ..attention_dispatch import dispatch_attention_fn +from ..embeddings import get_timestep_embedding +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import RMSNorm + + +logger = logging.get_logger(__name__) + + +def get_image_ids(batch_size: int, height: int, width: int, patch_size: int, device: torch.device) -> torch.Tensor: + r""" + Generates 2D patch coordinate indices for a batch of images. + + Args: + batch_size (`int`): + Number of images in the batch. + height (`int`): + Height of the input images (in pixels). + width (`int`): + Width of the input images (in pixels). + patch_size (`int`): + Size of the square patches that the image is divided into. + device (`torch.device`): + The device on which to create the tensor. + + Returns: + `torch.Tensor`: + Tensor of shape `(batch_size, num_patches, 2)` containing the (row, col) coordinates of each patch in the + image grid. + """ + + img_ids = torch.zeros(height // patch_size, width // patch_size, 2, device=device) + img_ids[..., 0] = torch.arange(height // patch_size, device=device)[:, None] + img_ids[..., 1] = torch.arange(width // patch_size, device=device)[None, :] + return img_ids.reshape((height // patch_size) * (width // patch_size), 2).unsqueeze(0).repeat(batch_size, 1, 1) + + +def apply_rope(xq: torch.Tensor, freqs_cis: torch.Tensor) -> torch.Tensor: + r""" + Applies rotary positional embeddings (RoPE) to a query tensor. + + Args: + xq (`torch.Tensor`): + Input tensor of shape `(..., dim)` representing the queries. + freqs_cis (`torch.Tensor`): + Precomputed rotary frequency components of shape `(..., dim/2, 2)` containing cosine and sine pairs. + + Returns: + `torch.Tensor`: + Tensor of the same shape as `xq` with rotary embeddings applied. + """ + xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) + # Ensure freqs_cis is on the same device as queries to avoid device mismatches with offloading + freqs_cis = freqs_cis.to(device=xq.device, dtype=xq_.dtype) + xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] + return xq_out.reshape(*xq.shape).type_as(xq) + + +class PRXAttnProcessor2_0: + r""" + Processor for implementing PRX-style attention with multi-source tokens and RoPE. Supports multiple attention + backends (Flash Attention, Sage Attention, etc.) via dispatch_attention_fn. + """ + + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(torch.nn.functional, "scaled_dot_product_attention"): + raise ImportError("PRXAttnProcessor2_0 requires PyTorch 2.0, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: "PRXAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + Apply PRX attention using PRXAttention module. + + Args: + attn: PRXAttention module containing projection layers + hidden_states: Image tokens [B, L_img, D] + encoder_hidden_states: Text tokens [B, L_txt, D] + attention_mask: Boolean mask for text tokens [B, L_txt] + image_rotary_emb: Rotary positional embeddings [B, 1, L_img, head_dim//2, 2, 2] + """ + + if encoder_hidden_states is None: + raise ValueError("PRXAttnProcessor2_0 requires 'encoder_hidden_states' containing text tokens.") + + # Project image tokens to Q, K, V + img_qkv = attn.img_qkv_proj(hidden_states) + B, L_img, _ = img_qkv.shape + img_qkv = img_qkv.reshape(B, L_img, 3, attn.heads, attn.head_dim) + img_qkv = img_qkv.permute(2, 0, 3, 1, 4) # [3, B, H, L_img, D] + img_q, img_k, img_v = img_qkv[0], img_qkv[1], img_qkv[2] + + # Apply QK normalization to image tokens + img_q = attn.norm_q(img_q) + img_k = attn.norm_k(img_k) + + # Project text tokens to K, V + txt_kv = attn.txt_kv_proj(encoder_hidden_states) + B, L_txt, _ = txt_kv.shape + txt_kv = txt_kv.reshape(B, L_txt, 2, attn.heads, attn.head_dim) + txt_kv = txt_kv.permute(2, 0, 3, 1, 4) # [2, B, H, L_txt, D] + txt_k, txt_v = txt_kv[0], txt_kv[1] + + # Apply K normalization to text tokens + txt_k = attn.norm_added_k(txt_k) + + # Apply RoPE to image queries and keys + if image_rotary_emb is not None: + img_q = apply_rope(img_q, image_rotary_emb) + img_k = apply_rope(img_k, image_rotary_emb) + + # Concatenate text and image keys/values + k = torch.cat((txt_k, img_k), dim=2) # [B, H, L_txt + L_img, D] + v = torch.cat((txt_v, img_v), dim=2) # [B, H, L_txt + L_img, D] + + # Build attention mask if provided + attn_mask_tensor = None + if attention_mask is not None: + bs, _, l_img, _ = img_q.shape + l_txt = txt_k.shape[2] + + if attention_mask.dim() != 2: + raise ValueError(f"Unsupported attention_mask shape: {attention_mask.shape}") + if attention_mask.shape[-1] != l_txt: + raise ValueError(f"attention_mask last dim {attention_mask.shape[-1]} must equal text length {l_txt}") + + device = img_q.device + ones_img = torch.ones((bs, l_img), dtype=torch.bool, device=device) + attention_mask = attention_mask.to(device=device, dtype=torch.bool) + joint_mask = torch.cat([attention_mask, ones_img], dim=-1) + attn_mask_tensor = joint_mask[:, None, None, :].expand(-1, attn.heads, l_img, -1) + + # Apply attention using dispatch_attention_fn for backend support + # Reshape to match dispatch_attention_fn expectations: [B, L, H, D] + query = img_q.transpose(1, 2) # [B, L_img, H, D] + key = k.transpose(1, 2) # [B, L_txt + L_img, H, D] + value = v.transpose(1, 2) # [B, L_txt + L_img, H, D] + + attn_output = dispatch_attention_fn( + query, + key, + value, + attn_mask=attn_mask_tensor, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + + # Reshape from [B, L_img, H, D] to [B, L_img, H*D] + batch_size, seq_len, num_heads, head_dim = attn_output.shape + attn_output = attn_output.reshape(batch_size, seq_len, num_heads * head_dim) + + # Apply output projection + attn_output = attn.to_out[0](attn_output) + if len(attn.to_out) > 1: + attn_output = attn.to_out[1](attn_output) # dropout if present + + return attn_output + + +class PRXAttention(nn.Module, AttentionModuleMixin): + r""" + PRX-style attention module that handles multi-source tokens and RoPE. Similar to FluxAttention but adapted for + PRX's architecture. + """ + + _default_processor_cls = PRXAttnProcessor2_0 + _available_processors = [PRXAttnProcessor2_0] + + def __init__( + self, + query_dim: int, + heads: int = 8, + dim_head: int = 64, + bias: bool = False, + out_bias: bool = False, + eps: float = 1e-6, + processor=None, + ): + super().__init__() + + self.heads = heads + self.head_dim = dim_head + self.inner_dim = dim_head * heads + self.query_dim = query_dim + + self.img_qkv_proj = nn.Linear(query_dim, query_dim * 3, bias=bias) + + self.norm_q = RMSNorm(self.head_dim, eps=eps, elementwise_affine=True) + self.norm_k = RMSNorm(self.head_dim, eps=eps, elementwise_affine=True) + + self.txt_kv_proj = nn.Linear(query_dim, query_dim * 2, bias=bias) + self.norm_added_k = RMSNorm(self.head_dim, eps=eps, elementwise_affine=True) + + self.to_out = nn.ModuleList([]) + self.to_out.append(nn.Linear(self.inner_dim, query_dim, bias=out_bias)) + self.to_out.append(nn.Dropout(0.0)) + + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + return self.processor( + self, + hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + **kwargs, + ) + + +# inspired from https://github.com/black-forest-labs/flux/blob/main/src/flux/modules/layers.py +class PRXEmbedND(nn.Module): + r""" + N-dimensional rotary positional embedding. + + This module creates rotary embeddings (RoPE) across multiple axes, where each axis can have its own embedding + dimension. The embeddings are combined and returned as a single tensor + + Args: + dim (int): + Base embedding dimension (must be even). + theta (int): + Scaling factor that controls the frequency spectrum of the rotary embeddings. + axes_dim (list[int]): + list of embedding dimensions for each axis (each must be even). + """ + + def __init__(self, dim: int, theta: int, axes_dim: list[int]): + super().__init__() + self.dim = dim + self.theta = theta + self.axes_dim = axes_dim + + def rope(self, pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor: + assert dim % 2 == 0 + + is_mps = pos.device.type == "mps" + is_npu = pos.device.type == "npu" + dtype = torch.float32 if (is_mps or is_npu) else torch.float64 + + scale = torch.arange(0, dim, 2, dtype=dtype, device=pos.device) / dim + omega = 1.0 / (theta**scale) + out = pos.unsqueeze(-1) * omega.unsqueeze(0) + out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1) + # Native PyTorch equivalent of: Rearrange("b n d (i j) -> b n d i j", i=2, j=2) + # out shape: (b, n, d, 4) -> reshape to (b, n, d, 2, 2) + out = out.reshape(*out.shape[:-1], 2, 2) + return out.float() + + def forward(self, ids: torch.Tensor) -> torch.Tensor: + n_axes = ids.shape[-1] + emb = torch.cat( + [self.rope(ids[:, :, i], self.axes_dim[i], self.theta) for i in range(n_axes)], + dim=-3, + ) + return emb.unsqueeze(1) + + +class MLPEmbedder(nn.Module): + r""" + A simple 2-layer MLP used for embedding inputs. + + Args: + in_dim (`int`): + Dimensionality of the input features. + hidden_dim (`int`): + Dimensionality of the hidden and output embedding space. + + Returns: + `torch.Tensor`: + Tensor of shape `(..., hidden_dim)` containing the embedded representations. + """ + + def __init__(self, in_dim: int, hidden_dim: int): + super().__init__() + self.in_layer = nn.Linear(in_dim, hidden_dim, bias=True) + self.silu = nn.SiLU() + self.out_layer = nn.Linear(hidden_dim, hidden_dim, bias=True) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.out_layer(self.silu(self.in_layer(x))) + + +class Modulation(nn.Module): + r""" + Modulation network that generates scale, shift, and gating parameters. + + Given an input vector, the module projects it through a linear layer to produce six chunks, which are grouped into + two tuples `(shift, scale, gate)`. + + Args: + dim (`int`): + Dimensionality of the input vector. The output will have `6 * dim` features internally. + + Returns: + ((`torch.Tensor`, `torch.Tensor`, `torch.Tensor`), (`torch.Tensor`, `torch.Tensor`, `torch.Tensor`)): + Two tuples `(shift, scale, gate)`. + """ + + def __init__(self, dim: int): + super().__init__() + self.lin = nn.Linear(dim, 6 * dim, bias=True) + nn.init.constant_(self.lin.weight, 0) + nn.init.constant_(self.lin.bias, 0) + + def forward( + self, vec: torch.Tensor + ) -> tuple[tuple[torch.Tensor, torch.Tensor, torch.Tensor], tuple[torch.Tensor, torch.Tensor, torch.Tensor]]: + out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(6, dim=-1) + return tuple(out[:3]), tuple(out[3:]) + + +class PRXBlock(nn.Module): + r""" + Multimodal transformer block with text–image cross-attention, modulation, and MLP. + + Args: + hidden_size (`int`): + Dimension of the hidden representations. + num_heads (`int`): + Number of attention heads. + mlp_ratio (`float`, *optional*, defaults to 4.0): + Expansion ratio for the hidden dimension inside the MLP. + qk_scale (`float`, *optional*): + Scale factor for queries and keys. If not provided, defaults to ``head_dim**-0.5``. + + Attributes: + img_pre_norm (`nn.LayerNorm`): + Pre-normalization applied to image tokens before attention. + attention (`PRXAttention`): + Multi-head attention module with built-in QKV projections and normalizations for cross-attention between + image and text tokens. + post_attention_layernorm (`nn.LayerNorm`): + Normalization applied after attention. + gate_proj / up_proj / down_proj (`nn.Linear`): + Feedforward layers forming the gated MLP. + mlp_act (`nn.GELU`): + Nonlinear activation used in the MLP. + modulation (`Modulation`): + Produces scale/shift/gating parameters for modulated layers. + + Methods: + The forward method performs cross-attention and the MLP with modulation. + """ + + def __init__( + self, + hidden_size: int, + num_heads: int, + mlp_ratio: float = 4.0, + qk_scale: float | None = None, + ): + super().__init__() + + self.hidden_dim = hidden_size + self.num_heads = num_heads + self.head_dim = hidden_size // num_heads + self.scale = qk_scale or self.head_dim**-0.5 + + self.mlp_hidden_dim = int(hidden_size * mlp_ratio) + self.hidden_size = hidden_size + + # Pre-attention normalization for image tokens + self.img_pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + + # PRXAttention module with built-in projections and norms + self.attention = PRXAttention( + query_dim=hidden_size, + heads=num_heads, + dim_head=self.head_dim, + bias=False, + out_bias=False, + eps=1e-6, + processor=PRXAttnProcessor2_0(), + ) + + # mlp + self.post_attention_layernorm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.gate_proj = nn.Linear(hidden_size, self.mlp_hidden_dim, bias=False) + self.up_proj = nn.Linear(hidden_size, self.mlp_hidden_dim, bias=False) + self.down_proj = nn.Linear(self.mlp_hidden_dim, hidden_size, bias=False) + self.mlp_act = nn.GELU(approximate="tanh") + + self.modulation = Modulation(hidden_size) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: torch.Tensor, + attention_mask: torch.Tensor | None = None, + **kwargs: dict[str, Any], + ) -> torch.Tensor: + r""" + Runs modulation-gated cross-attention and MLP, with residual connections. + + Args: + hidden_states (`torch.Tensor`): + Image tokens of shape `(B, L_img, hidden_size)`. + encoder_hidden_states (`torch.Tensor`): + Text tokens of shape `(B, L_txt, hidden_size)`. + temb (`torch.Tensor`): + Conditioning vector used by `Modulation` to produce scale/shift/gates, shape `(B, hidden_size)` (or + broadcastable). + image_rotary_emb (`torch.Tensor`): + Rotary positional embeddings applied inside attention. + attention_mask (`torch.Tensor`, *optional*): + Boolean mask for text tokens of shape `(B, L_txt)`, where `0` marks padding. + **kwargs: + Additional keyword arguments for API compatibility. + + Returns: + `torch.Tensor`: + Updated image tokens of shape `(B, L_img, hidden_size)`. + """ + + mod_attn, mod_mlp = self.modulation(temb) + attn_shift, attn_scale, attn_gate = mod_attn + mlp_shift, mlp_scale, mlp_gate = mod_mlp + + hidden_states_mod = (1 + attn_scale) * self.img_pre_norm(hidden_states) + attn_shift + + attn_out = self.attention( + hidden_states=hidden_states_mod, + encoder_hidden_states=encoder_hidden_states, + attention_mask=attention_mask, + image_rotary_emb=image_rotary_emb, + ) + + hidden_states = hidden_states + attn_gate * attn_out + + x = (1 + mlp_scale) * self.post_attention_layernorm(hidden_states) + mlp_shift + hidden_states = hidden_states + mlp_gate * (self.down_proj(self.mlp_act(self.gate_proj(x)) * self.up_proj(x))) + return hidden_states + + +class FinalLayer(nn.Module): + r""" + Final projection layer with adaptive LayerNorm modulation. + + This layer applies a normalized and modulated transformation to input tokens and projects them into patch-level + outputs. + + Args: + hidden_size (`int`): + Dimensionality of the input tokens. + patch_size (`int`): + Size of the square image patches. + out_channels (`int`): + Number of output channels per pixel (e.g. RGB = 3). + + Forward Inputs: + x (`torch.Tensor`): + Input tokens of shape `(B, L, hidden_size)`, where `L` is the number of patches. + vec (`torch.Tensor`): + Conditioning vector of shape `(B, hidden_size)` used to generate shift and scale parameters for adaptive + LayerNorm. + + Returns: + `torch.Tensor`: + Projected patch outputs of shape `(B, L, patch_size * patch_size * out_channels)`. + """ + + def __init__(self, hidden_size: int, patch_size: int, out_channels: int): + super().__init__() + self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True) + self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True)) + + def forward(self, x: torch.Tensor, vec: torch.Tensor) -> torch.Tensor: + shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1) + x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] + x = self.linear(x) + return x + + +def img2seq(img: torch.Tensor, patch_size: int) -> torch.Tensor: + r""" + Flattens an image tensor into a sequence of non-overlapping patches. + + Args: + img (`torch.Tensor`): + Input image tensor of shape `(B, C, H, W)`. + patch_size (`int`): + Size of each square patch. Must evenly divide both `H` and `W`. + + Returns: + `torch.Tensor`: + Flattened patch sequence of shape `(B, L, C * patch_size * patch_size)`, where `L = (H // patch_size) * (W + // patch_size)` is the number of patches. + """ + b, c, h, w = img.shape + p = patch_size + + # Reshape to (B, C, H//p, p, W//p, p) separating grid and patch dimensions + img = img.reshape(b, c, h // p, p, w // p, p) + + # Permute to (B, H//p, W//p, C, p, p) using einsum + # n=batch, c=channels, h=grid_height, p=patch_height, w=grid_width, q=patch_width + img = torch.einsum("nchpwq->nhwcpq", img) + + # Flatten to (B, L, C * p * p) + img = img.reshape(b, -1, c * p * p) + return img + + +def seq2img(seq: torch.Tensor, patch_size: int, shape: torch.Tensor) -> torch.Tensor: + r""" + Reconstructs an image tensor from a sequence of patches (inverse of `img2seq`). + + Args: + seq (`torch.Tensor`): + Patch sequence of shape `(B, L, C * patch_size * patch_size)`, where `L = (H // patch_size) * (W // + patch_size)`. + patch_size (`int`): + Size of each square patch. + shape (`tuple` or `torch.Tensor`): + The original image spatial shape `(H, W)`. If a tensor is provided, the first two values are interpreted as + height and width. + + Returns: + `torch.Tensor`: + Reconstructed image tensor of shape `(B, C, H, W)`. + """ + if isinstance(shape, tuple): + h, w = shape[-2:] + elif isinstance(shape, torch.Tensor): + h, w = (int(shape[0]), int(shape[1])) + else: + raise NotImplementedError(f"shape type {type(shape)} not supported") + + b, l, d = seq.shape + p = patch_size + c = d // (p * p) + + # Reshape back to grid structure: (B, H//p, W//p, C, p, p) + seq = seq.reshape(b, h // p, w // p, c, p, p) + + # Permute back to image layout: (B, C, H//p, p, W//p, p) + # n=batch, h=grid_height, w=grid_width, c=channels, p=patch_height, q=patch_width + seq = torch.einsum("nhwcpq->nchpwq", seq) + + # Final reshape to (B, C, H, W) + seq = seq.reshape(b, c, h, w) + return seq + + +class PRXTransformer2DModel(ModelMixin, ConfigMixin, AttentionMixin): + r""" + Transformer-based 2D model for text to image generation. + + Args: + in_channels (`int`, *optional*, defaults to 16): + Number of input channels in the latent image. + patch_size (`int`, *optional*, defaults to 2): + Size of the square patches used to flatten the input image. + context_in_dim (`int`, *optional*, defaults to 2304): + Dimensionality of the text conditioning input. + hidden_size (`int`, *optional*, defaults to 1792): + Dimension of the hidden representation. + mlp_ratio (`float`, *optional*, defaults to 3.5): + Expansion ratio for the hidden dimension inside MLP blocks. + num_heads (`int`, *optional*, defaults to 28): + Number of attention heads. + depth (`int`, *optional*, defaults to 16): + Number of transformer blocks. + axes_dim (`list[int]`, *optional*): + list of dimensions for each positional embedding axis. Defaults to `[32, 32]`. + theta (`int`, *optional*, defaults to 10000): + Frequency scaling factor for rotary embeddings. + time_factor (`float`, *optional*, defaults to 1000.0): + Scaling factor applied in timestep embeddings. + time_max_period (`int`, *optional*, defaults to 10000): + Maximum frequency period for timestep embeddings. + + Attributes: + pe_embedder (`EmbedND`): + Multi-axis rotary embedding generator for positional encodings. + img_in (`nn.Linear`): + Projection layer for image patch tokens. + time_in (`MLPEmbedder`): + Embedding layer for timestep embeddings. + txt_in (`nn.Linear`): + Projection layer for text conditioning. + blocks (`nn.ModuleList`): + Stack of transformer blocks (`PRXBlock`). + final_layer (`LastLayer`): + Projection layer mapping hidden tokens back to patch outputs. + + Methods: + attn_processors: + Returns a dictionary of all attention processors in the model. + set_attn_processor(processor): + Replaces attention processors across all attention layers. + process_inputs(image_latent, txt): + Converts inputs into patch tokens, encodes text, and produces positional encodings. + compute_timestep_embedding(timestep, dtype): + Creates a timestep embedding of dimension 256, scaled and projected. + forward_transformers(image_latent, cross_attn_conditioning, timestep, time_embedding, attention_mask, + **block_kwargs): + Runs the sequence of transformer blocks over image and text tokens. + forward(image_latent, timestep, cross_attn_conditioning, micro_conditioning, cross_attn_mask=None, + attention_kwargs=None, return_dict=True): + Full forward pass from latent input to reconstructed output image. + + Returns: + `Transformer2DModelOutput` if `return_dict=True` (default), otherwise a tuple containing: + - `sample` (`torch.Tensor`): Reconstructed image of shape `(B, C, H, W)`. + """ + + config_name = "config.json" + _supports_gradient_checkpointing = True + + @register_to_config + def __init__( + self, + in_channels: int = 16, + patch_size: int = 2, + context_in_dim: int = 2304, + hidden_size: int = 1792, + mlp_ratio: float = 3.5, + num_heads: int = 28, + depth: int = 16, + axes_dim: list = None, + theta: int = 10000, + time_factor: float = 1000.0, + time_max_period: int = 10000, + ): + super().__init__() + + if axes_dim is None: + axes_dim = [32, 32] + + # Store parameters directly + self.in_channels = in_channels + self.patch_size = patch_size + self.out_channels = self.in_channels * self.patch_size**2 + + self.time_factor = time_factor + self.time_max_period = time_max_period + + if hidden_size % num_heads != 0: + raise ValueError(f"Hidden size {hidden_size} must be divisible by num_heads {num_heads}") + + pe_dim = hidden_size // num_heads + + if sum(axes_dim) != pe_dim: + raise ValueError(f"Got {axes_dim} but expected positional dim {pe_dim}") + + self.hidden_size = hidden_size + self.num_heads = num_heads + self.pe_embedder = PRXEmbedND(dim=pe_dim, theta=theta, axes_dim=axes_dim) + self.img_in = nn.Linear(self.in_channels * self.patch_size**2, self.hidden_size, bias=True) + self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) + self.txt_in = nn.Linear(context_in_dim, self.hidden_size) + + self.blocks = nn.ModuleList( + [ + PRXBlock( + self.hidden_size, + self.num_heads, + mlp_ratio=mlp_ratio, + ) + for i in range(depth) + ] + ) + + self.final_layer = FinalLayer(self.hidden_size, 1, self.out_channels) + + self.gradient_checkpointing = False + + def _compute_timestep_embedding(self, timestep: torch.Tensor, dtype: torch.dtype) -> torch.Tensor: + return self.time_in( + get_timestep_embedding( + timesteps=timestep, + embedding_dim=256, + max_period=self.time_max_period, + scale=self.time_factor, + flip_sin_to_cos=True, # Match original cos, sin order + downscale_freq_shift=0.0, + ).to(dtype) + ) + + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor, ...] | Transformer2DModelOutput: + r""" + Forward pass of the PRXTransformer2DModel. + + The latent image is split into patch tokens, combined with text conditioning, and processed through a stack of + transformer blocks modulated by the timestep. The output is reconstructed into the latent image space. + + Args: + hidden_states (`torch.Tensor`): + Input latent image tensor of shape `(B, C, H, W)`. + timestep (`torch.Tensor`): + Timestep tensor of shape `(B,)` or `(1,)`, used for temporal conditioning. + encoder_hidden_states (`torch.Tensor`): + Text conditioning tensor of shape `(B, L_txt, context_in_dim)`. + attention_mask (`torch.Tensor`, *optional*): + Boolean mask of shape `(B, L_txt)`, where `0` marks padding in the text sequence. + attention_kwargs (`dict`, *optional*): + Additional arguments passed to attention layers. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return a `Transformer2DModelOutput` or a tuple. + + Returns: + `Transformer2DModelOutput` if `return_dict=True`, otherwise a tuple: + + - `sample` (`torch.Tensor`): Output latent image of shape `(B, C, H, W)`. + """ + # Process text conditioning + txt = self.txt_in(encoder_hidden_states) + + # Convert image to sequence and embed + img = img2seq(hidden_states, self.patch_size) + img = self.img_in(img) + + # Generate positional embeddings + bs, _, h, w = hidden_states.shape + img_ids = get_image_ids(bs, h, w, patch_size=self.patch_size, device=hidden_states.device) + pe = self.pe_embedder(img_ids) + + # Compute time embedding + vec = self._compute_timestep_embedding(timestep, dtype=img.dtype) + + # Apply transformer blocks + for block in self.blocks: + if torch.is_grad_enabled() and self.gradient_checkpointing: + img = self._gradient_checkpointing_func( + block.__call__, + img, + txt, + vec, + pe, + attention_mask, + ) + else: + img = block( + hidden_states=img, + encoder_hidden_states=txt, + temb=vec, + image_rotary_emb=pe, + attention_mask=attention_mask, + ) + + # Final layer and convert back to image + img = self.final_layer(img, vec) + output = seq2img(img, self.patch_size, hidden_states.shape) + + if not return_dict: + return (output,) + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_qwenimage.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_qwenimage.py new file mode 100644 index 0000000000000000000000000000000000000000..a54cb3b8e0920e62186a526e14d23c40483398af --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_qwenimage.py @@ -0,0 +1,978 @@ +# Copyright 2025 Qwen-Image Team, The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import math +from math import prod +from typing import Any + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, deprecate, logging +from ...utils.torch_utils import maybe_allow_in_graph +from .._modeling_parallel import ContextParallelInput, ContextParallelOutput +from ..attention import AttentionMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..attention_processor import Attention +from ..cache_utils import CacheMixin +from ..embeddings import TimestepEmbedding, Timesteps +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def get_timestep_embedding( + timesteps: torch.Tensor, + embedding_dim: int, + flip_sin_to_cos: bool = False, + downscale_freq_shift: float = 1, + scale: float = 1, + max_period: int = 10000, +) -> torch.Tensor: + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. + + Args + timesteps (torch.Tensor): + a 1-D Tensor of N indices, one per batch element. These may be fractional. + embedding_dim (int): + the dimension of the output. + flip_sin_to_cos (bool): + Whether the embedding order should be `cos, sin` (if True) or `sin, cos` (if False) + downscale_freq_shift (float): + Controls the delta between frequencies between dimensions + scale (float): + Scaling factor applied to the embeddings. + max_period (int): + Controls the maximum frequency of the embeddings + Returns + torch.Tensor: an [N x dim] Tensor of positional embeddings. + """ + assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array" + + half_dim = embedding_dim // 2 + exponent = -math.log(max_period) * torch.arange( + start=0, end=half_dim, dtype=torch.float32, device=timesteps.device + ) + exponent = exponent / (half_dim - downscale_freq_shift) + + emb = torch.exp(exponent).to(timesteps.dtype) + emb = timesteps[:, None].float() * emb[None, :] + + # scale embeddings + emb = scale * emb + + # concat sine and cosine embeddings + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1) + + # flip sine and cosine embeddings + if flip_sin_to_cos: + emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1) + + # zero pad + if embedding_dim % 2 == 1: + emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) + return emb + + +def apply_rotary_emb_qwen( + x: torch.Tensor, + freqs_cis: torch.Tensor | tuple[torch.Tensor], + use_real: bool = True, + use_real_unbind_dim: int = -1, +) -> tuple[torch.Tensor, torch.Tensor]: + """ + Apply rotary embeddings to input tensors using the given frequency tensor. This function applies rotary embeddings + to the given query or key 'x' tensors using the provided frequency tensor 'freqs_cis'. The input tensors are + reshaped as complex numbers, and the frequency tensor is reshaped for broadcasting compatibility. The resulting + tensors contain rotary embeddings and are returned as real tensors. + + Args: + x (`torch.Tensor`): + Query or key tensor to apply rotary embeddings. [B, S, H, D] xk (torch.Tensor): Key tensor to apply + freqs_cis (`tuple[torch.Tensor]`): Precomputed frequency tensor for complex exponentials. ([S, D], [S, D],) + + Returns: + tuple[torch.Tensor, torch.Tensor]: tuple of modified query tensor and key tensor with rotary embeddings. + """ + if use_real: + cos, sin = freqs_cis # [S, D] + cos = cos[None, None] + sin = sin[None, None] + cos, sin = cos.to(x.device), sin.to(x.device) + + if use_real_unbind_dim == -1: + # Used for flux, cogvideox, hunyuan-dit + x_real, x_imag = x.reshape(*x.shape[:-1], -1, 2).unbind(-1) # [B, S, H, D//2] + x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(3) + elif use_real_unbind_dim == -2: + # Used for Stable Audio, OmniGen, CogView4 and Cosmos + x_real, x_imag = x.reshape(*x.shape[:-1], 2, -1).unbind(-2) # [B, S, H, D//2] + x_rotated = torch.cat([-x_imag, x_real], dim=-1) + else: + raise ValueError(f"`use_real_unbind_dim={use_real_unbind_dim}` but should be -1 or -2.") + + out = (x.float() * cos + x_rotated.float() * sin).to(x.dtype) + + return out + else: + x_rotated = torch.view_as_complex(x.float().reshape(*x.shape[:-1], -1, 2)) + freqs_cis = freqs_cis.unsqueeze(1) + x_out = torch.view_as_real(x_rotated * freqs_cis).flatten(3) + + return x_out.type_as(x) + + +def compute_text_seq_len_from_mask( + encoder_hidden_states: torch.Tensor, encoder_hidden_states_mask: torch.Tensor | None +) -> tuple[int, torch.Tensor | None, torch.Tensor | None]: + """ + Compute text sequence length without assuming contiguous masks. Returns length for RoPE and a normalized bool mask. + """ + batch_size, text_seq_len = encoder_hidden_states.shape[:2] + if encoder_hidden_states_mask is None: + return text_seq_len, None, None + + if encoder_hidden_states_mask.shape[:2] != (batch_size, text_seq_len): + raise ValueError( + f"`encoder_hidden_states_mask` shape {encoder_hidden_states_mask.shape} must match " + f"(batch_size, text_seq_len)=({batch_size}, {text_seq_len})." + ) + + if encoder_hidden_states_mask.dtype != torch.bool: + encoder_hidden_states_mask = encoder_hidden_states_mask.to(torch.bool) + + position_ids = torch.arange(text_seq_len, device=encoder_hidden_states.device, dtype=torch.long) + active_positions = torch.where(encoder_hidden_states_mask, position_ids, position_ids.new_zeros(())) + has_active = encoder_hidden_states_mask.any(dim=1) + per_sample_len = torch.where( + has_active, + active_positions.max(dim=1).values + 1, + torch.as_tensor(text_seq_len, device=encoder_hidden_states.device), + ) + return text_seq_len, per_sample_len, encoder_hidden_states_mask + + +class QwenTimestepProjEmbeddings(nn.Module): + def __init__(self, embedding_dim, use_additional_t_cond=False): + super().__init__() + + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0, scale=1000) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + self.use_additional_t_cond = use_additional_t_cond + if use_additional_t_cond: + self.addition_t_embedding = nn.Embedding(2, embedding_dim) + + def forward(self, timestep, hidden_states, addition_t_cond=None): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_states.dtype)) # (N, D) + + conditioning = timesteps_emb + if self.use_additional_t_cond: + if addition_t_cond is None: + raise ValueError("When additional_t_cond is True, addition_t_cond must be provided.") + addition_t_emb = self.addition_t_embedding(addition_t_cond) + addition_t_emb = addition_t_emb.to(dtype=hidden_states.dtype) + conditioning = conditioning + addition_t_emb + + return conditioning + + +class QwenEmbedRope(nn.Module): + def __init__(self, theta: int, axes_dim: list[int], scale_rope=False): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + pos_index = torch.arange(4096) + neg_index = torch.arange(4096).flip(0) * -1 - 1 + self.pos_freqs = torch.cat( + [ + self.rope_params(pos_index, self.axes_dim[0], self.theta), + self.rope_params(pos_index, self.axes_dim[1], self.theta), + self.rope_params(pos_index, self.axes_dim[2], self.theta), + ], + dim=1, + ) + self.neg_freqs = torch.cat( + [ + self.rope_params(neg_index, self.axes_dim[0], self.theta), + self.rope_params(neg_index, self.axes_dim[1], self.theta), + self.rope_params(neg_index, self.axes_dim[2], self.theta), + ], + dim=1, + ) + + # DO NOT USING REGISTER BUFFER HERE, IT WILL CAUSE COMPLEX NUMBERS LOSE ITS IMAGINARY PART + self.scale_rope = scale_rope + + def rope_params(self, index, dim, theta=10000): + """ + Args: + index: [0, 1, 2, 3] 1D Tensor representing the position index of the token + """ + assert dim % 2 == 0 + freqs = torch.outer(index, 1.0 / torch.pow(theta, torch.arange(0, dim, 2).to(torch.float32).div(dim))) + freqs = torch.polar(torch.ones_like(freqs), freqs) + return freqs + + def forward( + self, + video_fhw: tuple[int, int, int, list[tuple[int, int, int]]], + txt_seq_lens: list[int] | None = None, + device: torch.device = None, + max_txt_seq_len: int | torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + """ + Args: + video_fhw (`tuple[int, int, int]` or `list[tuple[int, int, int]]`): + A list of 3 integers [frame, height, width] representing the shape of the video. + txt_seq_lens (`list[int]`, *optional*, **Deprecated**): + Deprecated parameter. Use `max_txt_seq_len` instead. If provided, the maximum value will be used. + device: (`torch.device`, *optional*): + The device on which to perform the RoPE computation. + max_txt_seq_len (`int` or `torch.Tensor`, *optional*): + The maximum text sequence length for RoPE computation. This should match the encoder hidden states + sequence length. Can be either an int or a scalar tensor (for torch.compile compatibility). + """ + # Handle deprecated txt_seq_lens parameter + if txt_seq_lens is not None: + deprecate( + "txt_seq_lens", + "0.39.0", + "Passing `txt_seq_lens` is deprecated and will be removed in version 0.39.0. " + "Please use `max_txt_seq_len` instead. " + "The new parameter accepts a single int or tensor value representing the maximum text sequence length.", + standard_warn=False, + ) + if max_txt_seq_len is None: + # Use max of txt_seq_lens for backward compatibility + max_txt_seq_len = max(txt_seq_lens) if isinstance(txt_seq_lens, list) else txt_seq_lens + + if max_txt_seq_len is None: + raise ValueError("Either `max_txt_seq_len` or `txt_seq_lens` (deprecated) must be provided.") + + # Validate batch inference with variable-sized images + if isinstance(video_fhw, list) and len(video_fhw) > 1: + # Check if all instances have the same size + first_fhw = video_fhw[0] + if not all(fhw == first_fhw for fhw in video_fhw): + logger.warning( + "Batch inference with variable-sized images is not currently supported in QwenEmbedRope. " + "All images in the batch should have the same dimensions (frame, height, width). " + f"Detected sizes: {video_fhw}. Using the first image's dimensions {first_fhw} " + "for RoPE computation, which may lead to incorrect results for other images in the batch." + ) + + if isinstance(video_fhw, list): + video_fhw = video_fhw[0] + if not isinstance(video_fhw, list): + video_fhw = [video_fhw] + + vid_freqs = [] + max_vid_index = 0 + for idx, fhw in enumerate(video_fhw): + frame, height, width = fhw + # RoPE frequencies are cached via a lru_cache decorator on _compute_video_freqs + video_freq = self._compute_video_freqs(frame, height, width, idx, device) + vid_freqs.append(video_freq) + + if self.scale_rope: + max_vid_index = max(height // 2, width // 2, max_vid_index) + else: + max_vid_index = max(height, width, max_vid_index) + + max_txt_seq_len_int = int(max_txt_seq_len) + # Create device-specific copy for text freqs without modifying self.pos_freqs + txt_freqs = self.pos_freqs.to(device)[max_vid_index : max_vid_index + max_txt_seq_len_int, ...] + vid_freqs = torch.cat(vid_freqs, dim=0) + + return vid_freqs, txt_freqs + + @functools.lru_cache(maxsize=128) + def _compute_video_freqs( + self, frame: int, height: int, width: int, idx: int = 0, device: torch.device = None + ) -> torch.Tensor: + seq_lens = frame * height * width + pos_freqs = self.pos_freqs.to(device) if device is not None else self.pos_freqs + neg_freqs = self.neg_freqs.to(device) if device is not None else self.neg_freqs + + freqs_pos = pos_freqs.split([x // 2 for x in self.axes_dim], dim=1) + freqs_neg = neg_freqs.split([x // 2 for x in self.axes_dim], dim=1) + + freqs_frame = freqs_pos[0][idx : idx + frame].view(frame, 1, 1, -1).expand(frame, height, width, -1) + if self.scale_rope: + freqs_height = torch.cat([freqs_neg[1][-(height - height // 2) :], freqs_pos[1][: height // 2]], dim=0) + freqs_height = freqs_height.view(1, height, 1, -1).expand(frame, height, width, -1) + freqs_width = torch.cat([freqs_neg[2][-(width - width // 2) :], freqs_pos[2][: width // 2]], dim=0) + freqs_width = freqs_width.view(1, 1, width, -1).expand(frame, height, width, -1) + else: + freqs_height = freqs_pos[1][:height].view(1, height, 1, -1).expand(frame, height, width, -1) + freqs_width = freqs_pos[2][:width].view(1, 1, width, -1).expand(frame, height, width, -1) + + freqs = torch.cat([freqs_frame, freqs_height, freqs_width], dim=-1).reshape(seq_lens, -1) + return freqs.clone().contiguous() + + +class QwenEmbedLayer3DRope(nn.Module): + def __init__(self, theta: int, axes_dim: list[int], scale_rope=False): + super().__init__() + self.theta = theta + self.axes_dim = axes_dim + pos_index = torch.arange(4096) + neg_index = torch.arange(4096).flip(0) * -1 - 1 + self.pos_freqs = torch.cat( + [ + self.rope_params(pos_index, self.axes_dim[0], self.theta), + self.rope_params(pos_index, self.axes_dim[1], self.theta), + self.rope_params(pos_index, self.axes_dim[2], self.theta), + ], + dim=1, + ) + self.neg_freqs = torch.cat( + [ + self.rope_params(neg_index, self.axes_dim[0], self.theta), + self.rope_params(neg_index, self.axes_dim[1], self.theta), + self.rope_params(neg_index, self.axes_dim[2], self.theta), + ], + dim=1, + ) + + self.scale_rope = scale_rope + + def rope_params(self, index, dim, theta=10000): + """ + Args: + index: [0, 1, 2, 3] 1D Tensor representing the position index of the token + """ + assert dim % 2 == 0 + freqs = torch.outer(index, 1.0 / torch.pow(theta, torch.arange(0, dim, 2).to(torch.float32).div(dim))) + freqs = torch.polar(torch.ones_like(freqs), freqs) + return freqs + + def forward( + self, + video_fhw: tuple[int, int, int, list[tuple[int, int, int]]], + max_txt_seq_len: int | torch.Tensor, + device: torch.device = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + """ + Args: + video_fhw (`tuple[int, int, int]` or `list[tuple[int, int, int]]`): + A list of 3 integers [frame, height, width] representing the shape of the video, or a list of layer + structures. + max_txt_seq_len (`int` or `torch.Tensor`): + The maximum text sequence length for RoPE computation. This should match the encoder hidden states + sequence length. Can be either an int or a scalar tensor (for torch.compile compatibility). + device: (`torch.device`, *optional*): + The device on which to perform the RoPE computation. + """ + # Validate batch inference with variable-sized images + # In Layer3DRope, the outer list represents batch, inner list/tuple represents layers + if isinstance(video_fhw, list) and len(video_fhw) > 1: + # Check if this is batch inference (list of layer lists/tuples) + first_entry = video_fhw[0] + if not all(entry == first_entry for entry in video_fhw): + logger.warning( + "Batch inference with variable-sized images is not currently supported in QwenEmbedLayer3DRope. " + "All images in the batch should have the same layer structure. " + f"Detected sizes: {video_fhw}. Using the first image's layer structure {first_entry} " + "for RoPE computation, which may lead to incorrect results for other images in the batch." + ) + + if isinstance(video_fhw, list): + video_fhw = video_fhw[0] + if not isinstance(video_fhw, list): + video_fhw = [video_fhw] + + vid_freqs = [] + max_vid_index = 0 + layer_num = len(video_fhw) - 1 + for idx, fhw in enumerate(video_fhw): + frame, height, width = fhw + if idx != layer_num: + video_freq = self._compute_video_freqs(frame, height, width, idx, device) + else: + ### For the condition image, we set the layer index to -1 + video_freq = self._compute_condition_freqs(frame, height, width, device) + vid_freqs.append(video_freq) + + if self.scale_rope: + max_vid_index = max(height // 2, width // 2, max_vid_index) + else: + max_vid_index = max(height, width, max_vid_index) + + max_vid_index = max(max_vid_index, layer_num) + max_txt_seq_len_int = int(max_txt_seq_len) + # Create device-specific copy for text freqs without modifying self.pos_freqs + txt_freqs = self.pos_freqs.to(device)[max_vid_index : max_vid_index + max_txt_seq_len_int, ...] + vid_freqs = torch.cat(vid_freqs, dim=0) + + return vid_freqs, txt_freqs + + @functools.lru_cache(maxsize=None) + def _compute_video_freqs(self, frame, height, width, idx=0, device: torch.device = None): + seq_lens = frame * height * width + pos_freqs = self.pos_freqs.to(device) if device is not None else self.pos_freqs + neg_freqs = self.neg_freqs.to(device) if device is not None else self.neg_freqs + + freqs_pos = pos_freqs.split([x // 2 for x in self.axes_dim], dim=1) + freqs_neg = neg_freqs.split([x // 2 for x in self.axes_dim], dim=1) + + freqs_frame = freqs_pos[0][idx : idx + frame].view(frame, 1, 1, -1).expand(frame, height, width, -1) + if self.scale_rope: + freqs_height = torch.cat([freqs_neg[1][-(height - height // 2) :], freqs_pos[1][: height // 2]], dim=0) + freqs_height = freqs_height.view(1, height, 1, -1).expand(frame, height, width, -1) + freqs_width = torch.cat([freqs_neg[2][-(width - width // 2) :], freqs_pos[2][: width // 2]], dim=0) + freqs_width = freqs_width.view(1, 1, width, -1).expand(frame, height, width, -1) + else: + freqs_height = freqs_pos[1][:height].view(1, height, 1, -1).expand(frame, height, width, -1) + freqs_width = freqs_pos[2][:width].view(1, 1, width, -1).expand(frame, height, width, -1) + + freqs = torch.cat([freqs_frame, freqs_height, freqs_width], dim=-1).reshape(seq_lens, -1) + return freqs.clone().contiguous() + + @functools.lru_cache(maxsize=None) + def _compute_condition_freqs(self, frame, height, width, device: torch.device = None): + seq_lens = frame * height * width + pos_freqs = self.pos_freqs.to(device) if device is not None else self.pos_freqs + neg_freqs = self.neg_freqs.to(device) if device is not None else self.neg_freqs + + freqs_pos = pos_freqs.split([x // 2 for x in self.axes_dim], dim=1) + freqs_neg = neg_freqs.split([x // 2 for x in self.axes_dim], dim=1) + + freqs_frame = freqs_neg[0][-1:].view(frame, 1, 1, -1).expand(frame, height, width, -1) + if self.scale_rope: + freqs_height = torch.cat([freqs_neg[1][-(height - height // 2) :], freqs_pos[1][: height // 2]], dim=0) + freqs_height = freqs_height.view(1, height, 1, -1).expand(frame, height, width, -1) + freqs_width = torch.cat([freqs_neg[2][-(width - width // 2) :], freqs_pos[2][: width // 2]], dim=0) + freqs_width = freqs_width.view(1, 1, width, -1).expand(frame, height, width, -1) + else: + freqs_height = freqs_pos[1][:height].view(1, height, 1, -1).expand(frame, height, width, -1) + freqs_width = freqs_pos[2][:width].view(1, 1, width, -1).expand(frame, height, width, -1) + + freqs = torch.cat([freqs_frame, freqs_height, freqs_width], dim=-1).reshape(seq_lens, -1) + return freqs.clone().contiguous() + + +class QwenDoubleStreamAttnProcessor2_0: + """ + Attention processor for Qwen double-stream architecture, matching DoubleStreamLayerMegatron logic. This processor + implements joint attention computation where text and image streams are processed together. + """ + + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "QwenDoubleStreamAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, # Image stream + encoder_hidden_states: torch.FloatTensor = None, # Text stream + encoder_hidden_states_mask: torch.FloatTensor = None, + attention_mask: torch.FloatTensor | None = None, + image_rotary_emb: torch.Tensor | None = None, + ) -> torch.FloatTensor: + if encoder_hidden_states is None: + raise ValueError("QwenDoubleStreamAttnProcessor2_0 requires encoder_hidden_states (text stream)") + + seq_txt = encoder_hidden_states.shape[1] + + # Compute QKV for image stream (sample projections) + img_query = attn.to_q(hidden_states) + img_key = attn.to_k(hidden_states) + img_value = attn.to_v(hidden_states) + + # Compute QKV for text stream (context projections) + txt_query = attn.add_q_proj(encoder_hidden_states) + txt_key = attn.add_k_proj(encoder_hidden_states) + txt_value = attn.add_v_proj(encoder_hidden_states) + + # Reshape for multi-head attention + img_query = img_query.unflatten(-1, (attn.heads, -1)) + img_key = img_key.unflatten(-1, (attn.heads, -1)) + img_value = img_value.unflatten(-1, (attn.heads, -1)) + + txt_query = txt_query.unflatten(-1, (attn.heads, -1)) + txt_key = txt_key.unflatten(-1, (attn.heads, -1)) + txt_value = txt_value.unflatten(-1, (attn.heads, -1)) + + # Apply QK normalization + if attn.norm_q is not None: + img_query = attn.norm_q(img_query) + if attn.norm_k is not None: + img_key = attn.norm_k(img_key) + if attn.norm_added_q is not None: + txt_query = attn.norm_added_q(txt_query) + if attn.norm_added_k is not None: + txt_key = attn.norm_added_k(txt_key) + + # Apply RoPE + if image_rotary_emb is not None: + img_freqs, txt_freqs = image_rotary_emb + img_query = apply_rotary_emb_qwen(img_query, img_freqs, use_real=False) + img_key = apply_rotary_emb_qwen(img_key, img_freqs, use_real=False) + txt_query = apply_rotary_emb_qwen(txt_query, txt_freqs, use_real=False) + txt_key = apply_rotary_emb_qwen(txt_key, txt_freqs, use_real=False) + + # Concatenate for joint attention + # Order: [text, image] + joint_query = torch.cat([txt_query, img_query], dim=1) + joint_key = torch.cat([txt_key, img_key], dim=1) + joint_value = torch.cat([txt_value, img_value], dim=1) + + joint_hidden_states = dispatch_attention_fn( + joint_query, + joint_key, + joint_value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + + # Reshape back + joint_hidden_states = joint_hidden_states.flatten(2, 3) + joint_hidden_states = joint_hidden_states.to(joint_query.dtype) + + # Split attention outputs back + txt_attn_output = joint_hidden_states[:, :seq_txt, :] # Text part + img_attn_output = joint_hidden_states[:, seq_txt:, :] # Image part + + # Apply output projections + img_attn_output = attn.to_out[0](img_attn_output.contiguous()) + if len(attn.to_out) > 1: + img_attn_output = attn.to_out[1](img_attn_output) # dropout + + txt_attn_output = attn.to_add_out(txt_attn_output.contiguous()) + + return img_attn_output, txt_attn_output + + +@maybe_allow_in_graph +class QwenImageTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + qk_norm: str = "rms_norm", + eps: float = 1e-6, + zero_cond_t: bool = False, + ): + super().__init__() + + self.dim = dim + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + + # Image processing modules + self.img_mod = nn.Sequential( + nn.SiLU(), + nn.Linear(dim, 6 * dim, bias=True), # For scale, shift, gate for norm1 and norm2 + ) + self.img_norm1 = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + self.attn = Attention( + query_dim=dim, + cross_attention_dim=None, # Enable cross attention for joint computation + added_kv_proj_dim=dim, # Enable added KV projections for text stream + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + context_pre_only=False, + bias=True, + processor=QwenDoubleStreamAttnProcessor2_0(), + qk_norm=qk_norm, + eps=eps, + ) + self.img_norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + self.img_mlp = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + # Text processing modules + self.txt_mod = nn.Sequential( + nn.SiLU(), + nn.Linear(dim, 6 * dim, bias=True), # For scale, shift, gate for norm1 and norm2 + ) + self.txt_norm1 = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + # Text doesn't need separate attention - it's handled by img_attn joint computation + self.txt_norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=eps) + self.txt_mlp = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + self.zero_cond_t = zero_cond_t + + def _modulate(self, x, mod_params, index=None): + """Apply modulation to input tensor""" + # x: b l d, shift: b d, scale: b d, gate: b d + shift, scale, gate = mod_params.chunk(3, dim=-1) + + if index is not None: + # Assuming mod_params batch dim is 2*actual_batch (chunked into 2 parts) + # So shift, scale, gate have shape [2*actual_batch, d] + actual_batch = shift.size(0) // 2 + shift_0, shift_1 = shift[:actual_batch], shift[actual_batch:] # each: [actual_batch, d] + scale_0, scale_1 = scale[:actual_batch], scale[actual_batch:] + gate_0, gate_1 = gate[:actual_batch], gate[actual_batch:] + + # index: [b, l] where b is actual batch size + # Expand to [b, l, 1] to match feature dimension + index_expanded = index.unsqueeze(-1) # [b, l, 1] + + # Expand chunks to [b, 1, d] then broadcast to [b, l, d] + shift_0_exp = shift_0.unsqueeze(1) # [b, 1, d] + shift_1_exp = shift_1.unsqueeze(1) # [b, 1, d] + scale_0_exp = scale_0.unsqueeze(1) + scale_1_exp = scale_1.unsqueeze(1) + gate_0_exp = gate_0.unsqueeze(1) + gate_1_exp = gate_1.unsqueeze(1) + + # Use torch.where to select based on index + shift_result = torch.where(index_expanded == 0, shift_0_exp, shift_1_exp) + scale_result = torch.where(index_expanded == 0, scale_0_exp, scale_1_exp) + gate_result = torch.where(index_expanded == 0, gate_0_exp, gate_1_exp) + else: + shift_result = shift.unsqueeze(1) + scale_result = scale.unsqueeze(1) + gate_result = gate.unsqueeze(1) + + return x * (1 + scale_result) + shift_result, gate_result + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_mask: torch.Tensor, + temb: torch.Tensor, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + joint_attention_kwargs: dict[str, Any] | None = None, + modulate_index: list[int] | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: + # Get modulation parameters for both streams + img_mod_params = self.img_mod(temb) # [B, 6*dim] + + if self.zero_cond_t: + temb = torch.chunk(temb, 2, dim=0)[0] + txt_mod_params = self.txt_mod(temb) # [B, 6*dim] + + # Split modulation parameters for norm1 and norm2 + img_mod1, img_mod2 = img_mod_params.chunk(2, dim=-1) # Each [B, 3*dim] + txt_mod1, txt_mod2 = txt_mod_params.chunk(2, dim=-1) # Each [B, 3*dim] + + # Process image stream - norm1 + modulation + img_normed = self.img_norm1(hidden_states) + img_modulated, img_gate1 = self._modulate(img_normed, img_mod1, modulate_index) + + # Process text stream - norm1 + modulation + txt_normed = self.txt_norm1(encoder_hidden_states) + txt_modulated, txt_gate1 = self._modulate(txt_normed, txt_mod1) + + # Use QwenAttnProcessor2_0 for joint attention computation + # This directly implements the DoubleStreamLayerMegatron logic: + # 1. Computes QKV for both streams + # 2. Applies QK normalization and RoPE + # 3. Concatenates and runs joint attention + # 4. Splits results back to separate streams + joint_attention_kwargs = joint_attention_kwargs or {} + attn_output = self.attn( + hidden_states=img_modulated, # Image stream (will be processed as "sample") + encoder_hidden_states=txt_modulated, # Text stream (will be processed as "context") + encoder_hidden_states_mask=encoder_hidden_states_mask, + image_rotary_emb=image_rotary_emb, + **joint_attention_kwargs, + ) + + # QwenAttnProcessor2_0 returns (img_output, txt_output) when encoder_hidden_states is provided + img_attn_output, txt_attn_output = attn_output + + # Apply attention gates and add residual (like in Megatron) + hidden_states = hidden_states + img_gate1 * img_attn_output + encoder_hidden_states = encoder_hidden_states + txt_gate1 * txt_attn_output + + # Process image stream - norm2 + MLP + img_normed2 = self.img_norm2(hidden_states) + img_modulated2, img_gate2 = self._modulate(img_normed2, img_mod2, modulate_index) + img_mlp_output = self.img_mlp(img_modulated2) + hidden_states = hidden_states + img_gate2 * img_mlp_output + + # Process text stream - norm2 + MLP + txt_normed2 = self.txt_norm2(encoder_hidden_states) + txt_modulated2, txt_gate2 = self._modulate(txt_normed2, txt_mod2) + txt_mlp_output = self.txt_mlp(txt_modulated2) + encoder_hidden_states = encoder_hidden_states + txt_gate2 * txt_mlp_output + + # Clip to prevent overflow for fp16 + if encoder_hidden_states.dtype == torch.float16: + encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504) + if hidden_states.dtype == torch.float16: + hidden_states = hidden_states.clip(-65504, 65504) + + return encoder_hidden_states, hidden_states + + +class QwenImageTransformer2DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin +): + """ + The Transformer model introduced in Qwen. + + Args: + patch_size (`int`, defaults to `2`): + Patch size to turn the input data into small patches. + in_channels (`int`, defaults to `64`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `None`): + The number of channels in the output. If not specified, it defaults to `in_channels`. + num_layers (`int`, defaults to `60`): + The number of layers of dual stream DiT blocks to use. + attention_head_dim (`int`, defaults to `128`): + The number of dimensions to use for each attention head. + num_attention_heads (`int`, defaults to `24`): + The number of attention heads to use. + joint_attention_dim (`int`, defaults to `3584`): + The number of dimensions to use for the joint attention (embedding/channel dimension of + `encoder_hidden_states`). + guidance_embeds (`bool`, defaults to `False`): + Whether to use guidance embeddings for guidance-distilled variant of the model. + axes_dims_rope (`tuple[int]`, defaults to `(16, 56, 56)`): + The dimensions to use for the rotary positional embeddings. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["QwenImageTransformerBlock"] + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + _repeated_blocks = ["QwenImageTransformerBlock"] + # Make CP plan compatible with https://github.com/huggingface/diffusers/pull/12702 + _cp_plan = { + "transformer_blocks.0": { + "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + "encoder_hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + }, + "transformer_blocks.*": { + "modulate_index": ContextParallelInput(split_dim=1, expected_dims=2, split_output=False), + }, + "pos_embed": { + 0: ContextParallelInput(split_dim=0, expected_dims=2, split_output=True), + 1: ContextParallelInput(split_dim=0, expected_dims=2, split_output=True), + }, + "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), + } + + @register_to_config + def __init__( + self, + patch_size: int = 2, + in_channels: int = 64, + out_channels: int | None = 16, + num_layers: int = 60, + attention_head_dim: int = 128, + num_attention_heads: int = 24, + joint_attention_dim: int = 3584, + guidance_embeds: bool = False, # TODO: this should probably be removed + axes_dims_rope: tuple[int, int, int] = (16, 56, 56), + zero_cond_t: bool = False, + use_additional_t_cond: bool = False, + use_layer3d_rope: bool = False, + ): + super().__init__() + self.out_channels = out_channels or in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + if not use_layer3d_rope: + self.pos_embed = QwenEmbedRope(theta=10000, axes_dim=list(axes_dims_rope), scale_rope=True) + else: + self.pos_embed = QwenEmbedLayer3DRope(theta=10000, axes_dim=list(axes_dims_rope), scale_rope=True) + + self.time_text_embed = QwenTimestepProjEmbeddings( + embedding_dim=self.inner_dim, use_additional_t_cond=use_additional_t_cond + ) + + self.txt_norm = RMSNorm(joint_attention_dim, eps=1e-6) + + self.img_in = nn.Linear(in_channels, self.inner_dim) + self.txt_in = nn.Linear(joint_attention_dim, self.inner_dim) + + self.transformer_blocks = nn.ModuleList( + [ + QwenImageTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + zero_cond_t=zero_cond_t, + ) + for _ in range(num_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + self.zero_cond_t = zero_cond_t + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + encoder_hidden_states_mask: torch.Tensor = None, + timestep: torch.LongTensor = None, + img_shapes: list[tuple[int, int, int]] | None = None, + txt_seq_lens: list[int] | None = None, + guidance: torch.Tensor = None, # TODO: this should probably be removed + attention_kwargs: dict[str, Any] | None = None, + controlnet_block_samples=None, + additional_t_cond=None, + return_dict: bool = True, + ) -> torch.Tensor | Transformer2DModelOutput: + """ + The [`QwenTransformer2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch_size, image_sequence_length, in_channels)`): + Input `hidden_states`. + encoder_hidden_states (`torch.Tensor` of shape `(batch_size, text_sequence_length, joint_attention_dim)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + encoder_hidden_states_mask (`torch.Tensor` of shape `(batch_size, text_sequence_length)`, *optional*): + Mask for the encoder hidden states. Expected to have 1.0 for valid tokens and 0.0 for padding tokens. + Used in the attention processor to prevent attending to padding tokens. The mask can have any pattern + (not just contiguous valid tokens followed by padding) since it's applied element-wise in attention. + timestep ( `torch.LongTensor`): + Used to indicate denoising step. + img_shapes (`list[tuple[int, int, int]]`, *optional*): + Image shapes for RoPE computation. + txt_seq_lens (`list[int]`, *optional*, **Deprecated**): + Deprecated parameter. Use `encoder_hidden_states_mask` instead. If provided, the maximum value will be + used to compute RoPE sequence length. + guidance (`torch.Tensor`, *optional*): + Guidance tensor for conditional generation. + attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + controlnet_block_samples (*optional*): + ControlNet block samples to add to the transformer blocks. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + if txt_seq_lens is not None: + deprecate( + "txt_seq_lens", + "0.39.0", + "Passing `txt_seq_lens` is deprecated and will be removed in version 0.39.0. " + "Please use `encoder_hidden_states_mask` instead. " + "The mask-based approach is more flexible and supports variable-length sequences.", + standard_warn=False, + ) + + hidden_states = self.img_in(hidden_states) + + timestep = timestep.to(hidden_states.dtype) + + if self.zero_cond_t: + timestep = torch.cat([timestep, timestep * 0], dim=0) + modulate_index = torch.tensor( + [[0] * prod(sample[0]) + [1] * sum([prod(s) for s in sample[1:]]) for sample in img_shapes], + device=timestep.device, + dtype=torch.int, + ) + else: + modulate_index = None + + encoder_hidden_states = self.txt_norm(encoder_hidden_states) + encoder_hidden_states = self.txt_in(encoder_hidden_states) + + # Use the encoder_hidden_states sequence length for RoPE computation and normalize mask + text_seq_len, _, encoder_hidden_states_mask = compute_text_seq_len_from_mask( + encoder_hidden_states, encoder_hidden_states_mask + ) + + if guidance is not None: + guidance = guidance.to(hidden_states.dtype) * 1000 + + temb = ( + self.time_text_embed(timestep, hidden_states, additional_t_cond) + if guidance is None + else self.time_text_embed(timestep, guidance, hidden_states, additional_t_cond) + ) + + image_rotary_emb = self.pos_embed(img_shapes, max_txt_seq_len=text_seq_len, device=hidden_states.device) + + # Construct joint attention mask once to avoid reconstructing in every block + # This eliminates 60 GPU syncs during training while maintaining torch.compile compatibility + block_attention_kwargs = attention_kwargs.copy() if attention_kwargs is not None else {} + if encoder_hidden_states_mask is not None: + # Build joint mask: [text_mask, all_ones_for_image] + batch_size, image_seq_len = hidden_states.shape[:2] + image_mask = torch.ones((batch_size, image_seq_len), dtype=torch.bool, device=hidden_states.device) + joint_attention_mask = torch.cat([encoder_hidden_states_mask, image_mask], dim=1) + block_attention_kwargs["attention_mask"] = joint_attention_mask + + for index_block, block in enumerate(self.transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + None, # Don't pass encoder_hidden_states_mask (using attention_mask instead) + temb, + image_rotary_emb, + block_attention_kwargs, + modulate_index, + ) + + else: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + encoder_hidden_states_mask=None, # Don't pass (using attention_mask instead) + temb=temb, + image_rotary_emb=image_rotary_emb, + joint_attention_kwargs=block_attention_kwargs, + modulate_index=modulate_index, + ) + + # controlnet residual + if controlnet_block_samples is not None: + interval_control = len(self.transformer_blocks) / len(controlnet_block_samples) + interval_control = int(np.ceil(interval_control)) + hidden_states = hidden_states + controlnet_block_samples[index_block // interval_control] + + if self.zero_cond_t: + temb = temb.chunk(2, dim=0)[0] + # Use only the image part (hidden_states) from the dual-stream blocks + hidden_states = self.norm_out(hidden_states, temb) + output = self.proj_out(hidden_states) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_sana_video.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_sana_video.py new file mode 100644 index 0000000000000000000000000000000000000000..f833c0e842c372ab63262fc33b11f7a165311851 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_sana_video.py @@ -0,0 +1,687 @@ +# Copyright 2025 The HuggingFace Team and SANA-Video Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +import torch.nn.functional as F +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ..attention import AttentionMixin +from ..attention_dispatch import dispatch_attention_fn +from ..attention_processor import Attention +from ..embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps, get_1d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormSingle, RMSNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class GLUMBTempConv(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + expand_ratio: float = 4, + norm_type: str | None = None, + residual_connection: bool = True, + ) -> None: + super().__init__() + + hidden_channels = int(expand_ratio * in_channels) + self.norm_type = norm_type + self.residual_connection = residual_connection + + self.nonlinearity = nn.SiLU() + self.conv_inverted = nn.Conv2d(in_channels, hidden_channels * 2, 1, 1, 0) + self.conv_depth = nn.Conv2d(hidden_channels * 2, hidden_channels * 2, 3, 1, 1, groups=hidden_channels * 2) + self.conv_point = nn.Conv2d(hidden_channels, out_channels, 1, 1, 0, bias=False) + + self.norm = None + if norm_type == "rms_norm": + self.norm = RMSNorm(out_channels, eps=1e-5, elementwise_affine=True, bias=True) + + self.conv_temp = nn.Conv2d( + out_channels, out_channels, kernel_size=(3, 1), stride=1, padding=(1, 0), bias=False + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.residual_connection: + residual = hidden_states + batch_size, num_frames, height, width, num_channels = hidden_states.shape + hidden_states = hidden_states.view(batch_size * num_frames, height, width, num_channels).permute(0, 3, 1, 2) + + hidden_states = self.conv_inverted(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + + hidden_states = self.conv_depth(hidden_states) + hidden_states, gate = torch.chunk(hidden_states, 2, dim=1) + hidden_states = hidden_states * self.nonlinearity(gate) + + hidden_states = self.conv_point(hidden_states) + + # Temporal aggregation + hidden_states_temporal = hidden_states.view(batch_size, num_frames, num_channels, height * width).permute( + 0, 2, 1, 3 + ) + hidden_states = hidden_states_temporal + self.conv_temp(hidden_states_temporal) + hidden_states = hidden_states.permute(0, 2, 3, 1).view(batch_size, num_frames, height, width, num_channels) + + if self.norm_type == "rms_norm": + # move channel to the last dimension so we apply RMSnorm across channel dimension + hidden_states = self.norm(hidden_states.movedim(1, -1)).movedim(-1, 1) + + if self.residual_connection: + hidden_states = hidden_states + residual + + return hidden_states + + +class SanaLinearAttnProcessor3_0: + r""" + Processor for implementing scaled dot-product linear attention. + """ + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + original_dtype = hidden_states.dtype + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + # B,N,H,C + + query = F.relu(query) + key = F.relu(key) + + if rotary_emb is not None: + + def apply_rotary_emb( + hidden_states: torch.Tensor, + freqs_cos: torch.Tensor, + freqs_sin: torch.Tensor, + ): + x1, x2 = hidden_states.unflatten(-1, (-1, 2)).unbind(-1) + cos = freqs_cos[..., 0::2] + sin = freqs_sin[..., 1::2] + out = torch.empty_like(hidden_states) + out[..., 0::2] = x1 * cos - x2 * sin + out[..., 1::2] = x1 * sin + x2 * cos + return out.type_as(hidden_states) + + query_rotate = apply_rotary_emb(query, *rotary_emb) + key_rotate = apply_rotary_emb(key, *rotary_emb) + + # B,H,C,N + query = query.permute(0, 2, 3, 1) + key = key.permute(0, 2, 3, 1) + query_rotate = query_rotate.permute(0, 2, 3, 1) + key_rotate = key_rotate.permute(0, 2, 3, 1) + value = value.permute(0, 2, 3, 1) + + query_rotate, key_rotate, value = query_rotate.float(), key_rotate.float(), value.float() + + z = 1 / (key.sum(dim=-1, keepdim=True).transpose(-2, -1) @ query + 1e-15) + + scores = torch.matmul(value, key_rotate.transpose(-1, -2)) + hidden_states = torch.matmul(scores, query_rotate) + + hidden_states = hidden_states * z + # B,H,C,N + hidden_states = hidden_states.flatten(1, 2).transpose(1, 2) + hidden_states = hidden_states.to(original_dtype) + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states + + +class WanRotaryPosEmbed(nn.Module): + def __init__( + self, + attention_head_dim: int, + patch_size: tuple[int, int, int], + max_seq_len: int, + theta: float = 10000.0, + ): + super().__init__() + + self.attention_head_dim = attention_head_dim + self.patch_size = patch_size + self.max_seq_len = max_seq_len + + h_dim = w_dim = 2 * (attention_head_dim // 6) + t_dim = attention_head_dim - h_dim - w_dim + + self.t_dim = t_dim + self.h_dim = h_dim + self.w_dim = w_dim + + freqs_dtype = torch.float32 if torch.backends.mps.is_available() else torch.float64 + + freqs_cos = [] + freqs_sin = [] + + for dim in [t_dim, h_dim, w_dim]: + freq_cos, freq_sin = get_1d_rotary_pos_embed( + dim, + max_seq_len, + theta, + use_real=True, + repeat_interleave_real=True, + freqs_dtype=freqs_dtype, + ) + freqs_cos.append(freq_cos) + freqs_sin.append(freq_sin) + + self.register_buffer("freqs_cos", torch.cat(freqs_cos, dim=1), persistent=False) + self.register_buffer("freqs_sin", torch.cat(freqs_sin, dim=1), persistent=False) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.patch_size + ppf, pph, ppw = num_frames // p_t, height // p_h, width // p_w + + split_sizes = [self.t_dim, self.h_dim, self.w_dim] + + freqs_cos = self.freqs_cos.split(split_sizes, dim=1) + freqs_sin = self.freqs_sin.split(split_sizes, dim=1) + + freqs_cos_f = freqs_cos[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_h = freqs_cos[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_w = freqs_cos[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_sin_f = freqs_sin[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_h = freqs_sin[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_w = freqs_sin[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_cos = torch.cat([freqs_cos_f, freqs_cos_h, freqs_cos_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + freqs_sin = torch.cat([freqs_sin_f, freqs_sin_h, freqs_sin_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + + return freqs_cos, freqs_sin + + +class SanaModulatedNorm(nn.Module): + def __init__(self, dim: int, elementwise_affine: bool = False, eps: float = 1e-6): + super().__init__() + self.norm = nn.LayerNorm(dim, elementwise_affine=elementwise_affine, eps=eps) + + def forward( + self, hidden_states: torch.Tensor, temb: torch.Tensor, scale_shift_table: torch.Tensor + ) -> torch.Tensor: + hidden_states = self.norm(hidden_states) + shift, scale = (scale_shift_table[None, None] + temb[:, :, None].to(scale_shift_table.device)).unbind(dim=2) + hidden_states = hidden_states * (1 + scale) + shift + return hidden_states + + +class SanaCombinedTimestepGuidanceEmbeddings(nn.Module): + def __init__(self, embedding_dim): + super().__init__() + self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.guidance_condition_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0) + self.guidance_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim) + + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, 6 * embedding_dim, bias=True) + + def forward(self, timestep: torch.Tensor, guidance: torch.Tensor = None, hidden_dtype: torch.dtype = None): + timesteps_proj = self.time_proj(timestep) + timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, D) + + guidance_proj = self.guidance_condition_proj(guidance) + guidance_emb = self.guidance_embedder(guidance_proj.to(dtype=hidden_dtype)) + conditioning = timesteps_emb + guidance_emb + + return self.linear(self.silu(conditioning)), conditioning + + +class SanaAttnProcessor2_0: + r""" + Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). + """ + + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError("SanaAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size, sequence_length, _ = ( + hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape + ) + + if attention_mask is not None: + attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) + # scaled_dot_product_attention expects attention_mask shape to be + # (batch, heads, source_length, target_length) + attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) + + query = attn.to_q(hidden_states) + + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + inner_dim = key.shape[-1] + head_dim = inner_dim // attn.heads + + query = query.view(batch_size, -1, attn.heads, head_dim) + key = key.view(batch_size, -1, attn.heads, head_dim) + value = value.view(batch_size, -1, attn.heads, head_dim) + + # the output of sdp = (batch, num_heads, seq_len, head_dim) + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.type_as(query) + + # linear proj + hidden_states = attn.to_out[0](hidden_states) + # dropout + hidden_states = attn.to_out[1](hidden_states) + + hidden_states = hidden_states / attn.rescale_output_factor + + return hidden_states + + +class SanaVideoTransformerBlock(nn.Module): + r""" + Transformer block introduced in [Sana-Video](https://huggingface.co/papers/2509.24695). + """ + + def __init__( + self, + dim: int = 2240, + num_attention_heads: int = 20, + attention_head_dim: int = 112, + dropout: float = 0.0, + num_cross_attention_heads: int | None = 20, + cross_attention_head_dim: int | None = 112, + cross_attention_dim: int | None = 2240, + attention_bias: bool = True, + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + attention_out_bias: bool = True, + mlp_ratio: float = 3.0, + qk_norm: str | None = "rms_norm_across_heads", + rope_max_seq_len: int = 1024, + ) -> None: + super().__init__() + + # 1. Self Attention + self.norm1 = nn.LayerNorm(dim, elementwise_affine=False, eps=norm_eps) + self.attn1 = Attention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + kv_heads=num_attention_heads if qk_norm is not None else None, + qk_norm=qk_norm, + dropout=dropout, + bias=attention_bias, + cross_attention_dim=None, + processor=SanaLinearAttnProcessor3_0(), + ) + + # 2. Cross Attention + if cross_attention_dim is not None: + self.norm2 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps) + self.attn2 = Attention( + query_dim=dim, + qk_norm=qk_norm, + kv_heads=num_cross_attention_heads if qk_norm is not None else None, + cross_attention_dim=cross_attention_dim, + heads=num_cross_attention_heads, + dim_head=cross_attention_head_dim, + dropout=dropout, + bias=True, + out_bias=attention_out_bias, + processor=SanaAttnProcessor2_0(), + ) + + # 3. Feed-forward + self.ff = GLUMBTempConv(dim, dim, mlp_ratio, norm_type=None, residual_connection=False) + + self.scale_shift_table = nn.Parameter(torch.randn(6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor | None = None, + encoder_hidden_states: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + timestep: torch.LongTensor | None = None, + frames: int = None, + height: int = None, + width: int = None, + rotary_emb: torch.Tensor | None = None, + ) -> torch.Tensor: + batch_size = hidden_states.shape[0] + + # 1. Modulation + shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = ( + self.scale_shift_table[None, None] + timestep.reshape(batch_size, timestep.shape[1], 6, -1) + ).unbind(dim=2) + + # 2. Self Attention + norm_hidden_states = self.norm1(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa + norm_hidden_states = norm_hidden_states.to(hidden_states.dtype) + + attn_output = self.attn1(norm_hidden_states, rotary_emb=rotary_emb) + hidden_states = hidden_states + gate_msa * attn_output + + # 3. Cross Attention + if self.attn2 is not None: + attn_output = self.attn2( + hidden_states, + encoder_hidden_states=encoder_hidden_states, + attention_mask=encoder_attention_mask, + ) + hidden_states = attn_output + hidden_states + + # 4. Feed-forward + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp + + norm_hidden_states = norm_hidden_states.unflatten(1, (frames, height, width)) + ff_output = self.ff(norm_hidden_states) + ff_output = ff_output.flatten(1, 3) + hidden_states = hidden_states + gate_mlp * ff_output + + return hidden_states + + +class SanaVideoTransformer3DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, AttentionMixin): + r""" + A 3D Transformer model introduced in [Sana-Video](https://huggingface.co/papers/2509.24695) family of models. + + Args: + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, *optional*, defaults to `16`): + The number of channels in the output. + num_attention_heads (`int`, defaults to `20`): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, defaults to `112`): + The number of channels in each head. + num_layers (`int`, defaults to `20`): + The number of layers of Transformer blocks to use. + num_cross_attention_heads (`int`, *optional*, defaults to `20`): + The number of heads to use for cross-attention. + cross_attention_head_dim (`int`, *optional*, defaults to `112`): + The number of channels in each head for cross-attention. + cross_attention_dim (`int`, *optional*, defaults to `2240`): + The number of channels in the cross-attention output. + caption_channels (`int`, defaults to `2304`): + The number of channels in the caption embeddings. + mlp_ratio (`float`, defaults to `2.5`): + The expansion ratio to use in the GLUMBConv layer. + dropout (`float`, defaults to `0.0`): + The dropout probability. + attention_bias (`bool`, defaults to `False`): + Whether to use bias in the attention layer. + sample_size (`int`, defaults to `32`): + The base size of the input latent. + patch_size (`int`, defaults to `1`): + The size of the patches to use in the patch embedding layer. + norm_elementwise_affine (`bool`, defaults to `False`): + Whether to use elementwise affinity in the normalization layer. + norm_eps (`float`, defaults to `1e-6`): + The epsilon value for the normalization layer. + qk_norm (`str`, *optional*, defaults to `None`): + The normalization to use for the query and key. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["SanaVideoTransformerBlock", "SanaModulatedNorm"] + _skip_layerwise_casting_patterns = ["patch_embedding", "norm"] + + @register_to_config + def __init__( + self, + in_channels: int = 16, + out_channels: int | None = 16, + num_attention_heads: int = 20, + attention_head_dim: int = 112, + num_layers: int = 20, + num_cross_attention_heads: int | None = 20, + cross_attention_head_dim: int | None = 112, + cross_attention_dim: int | None = 2240, + caption_channels: int = 2304, + mlp_ratio: float = 2.5, + dropout: float = 0.0, + attention_bias: bool = False, + sample_size: int = 30, + patch_size: tuple[int, int, int] = (1, 2, 2), + norm_elementwise_affine: bool = False, + norm_eps: float = 1e-6, + interpolation_scale: int | None = None, + guidance_embeds: bool = False, + guidance_embeds_scale: float = 0.1, + qk_norm: str | None = "rms_norm_across_heads", + rope_max_seq_len: int = 1024, + ) -> None: + super().__init__() + + out_channels = out_channels or in_channels + inner_dim = num_attention_heads * attention_head_dim + + # 1. Patch & position embedding + self.rope = WanRotaryPosEmbed(attention_head_dim, patch_size, rope_max_seq_len) + self.patch_embedding = nn.Conv3d(in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + + # 2. Additional condition embeddings + if guidance_embeds: + self.time_embed = SanaCombinedTimestepGuidanceEmbeddings(inner_dim) + else: + self.time_embed = AdaLayerNormSingle(inner_dim) + + self.caption_projection = PixArtAlphaTextProjection(in_features=caption_channels, hidden_size=inner_dim) + self.caption_norm = RMSNorm(inner_dim, eps=1e-5, elementwise_affine=True) + + # 3. Transformer blocks + self.transformer_blocks = nn.ModuleList( + [ + SanaVideoTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + num_cross_attention_heads=num_cross_attention_heads, + cross_attention_head_dim=cross_attention_head_dim, + cross_attention_dim=cross_attention_dim, + attention_bias=attention_bias, + norm_elementwise_affine=norm_elementwise_affine, + norm_eps=norm_eps, + mlp_ratio=mlp_ratio, + qk_norm=qk_norm, + ) + for _ in range(num_layers) + ] + ) + + # 4. Output blocks + self.scale_shift_table = nn.Parameter(torch.randn(2, inner_dim) / inner_dim**0.5) + self.norm_out = SanaModulatedNorm(inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(inner_dim, math.prod(patch_size) * out_channels) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + timestep: torch.Tensor, + guidance: torch.Tensor | None = None, + encoder_attention_mask: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + attention_kwargs: dict[str, Any] | None = None, + controlnet_block_samples: tuple[torch.Tensor] | None = None, + return_dict: bool = True, + ) -> tuple[torch.Tensor, ...] | Transformer2DModelOutput: + # ensure attention_mask is a bias, and give it a singleton query_tokens dimension. + # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. + # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. + # expects mask of shape: + # [batch, key_tokens] + # adds singleton query_tokens dimension: + # [batch, 1, key_tokens] + # this helps to broadcast it as a bias over attention scores, which will be in one of the following shapes: + # [batch, heads, query_tokens, key_tokens] (e.g. torch sdp attn) + # [batch * heads, query_tokens, key_tokens] (e.g. xformers or classic attn) + if attention_mask is not None and attention_mask.ndim == 2: + # assume that mask is expressed as: + # (1 = keep, 0 = discard) + # convert mask into a bias that can be added to attention scores: + # (keep = +0, discard = -10000.0) + attention_mask = (1 - attention_mask.to(hidden_states.dtype)) * -10000.0 + attention_mask = attention_mask.unsqueeze(1) + + # convert encoder_attention_mask to a bias the same way we do for attention_mask + if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2: + encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0 + encoder_attention_mask = encoder_attention_mask.unsqueeze(1) + + # 1. Input + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.config.patch_size + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p_h + post_patch_width = width // p_w + + rotary_emb = self.rope(hidden_states) + + hidden_states = self.patch_embedding(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) + + if guidance is not None: + timestep, embedded_timestep = self.time_embed( + timestep.flatten(), guidance=guidance, hidden_dtype=hidden_states.dtype + ) + else: + timestep, embedded_timestep = self.time_embed( + timestep.flatten(), batch_size=batch_size, hidden_dtype=hidden_states.dtype + ) + + timestep = timestep.view(batch_size, -1, timestep.size(-1)) + embedded_timestep = embedded_timestep.view(batch_size, -1, embedded_timestep.size(-1)) + + encoder_hidden_states = self.caption_projection(encoder_hidden_states) + encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.shape[-1]) + + encoder_hidden_states = self.caption_norm(encoder_hidden_states) + + # 2. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + for index_block, block in enumerate(self.transformer_blocks): + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + timestep, + post_patch_num_frames, + post_patch_height, + post_patch_width, + rotary_emb, + ) + if controlnet_block_samples is not None and 0 < index_block <= len(controlnet_block_samples): + hidden_states = hidden_states + controlnet_block_samples[index_block - 1] + + else: + for index_block, block in enumerate(self.transformer_blocks): + hidden_states = block( + hidden_states, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + timestep, + post_patch_num_frames, + post_patch_height, + post_patch_width, + rotary_emb, + ) + if controlnet_block_samples is not None and 0 < index_block <= len(controlnet_block_samples): + hidden_states = hidden_states + controlnet_block_samples[index_block - 1] + + # 3. Normalization + hidden_states = self.norm_out(hidden_states, embedded_timestep, self.scale_shift_table) + + hidden_states = self.proj_out(hidden_states) + + # 5. Unpatchify + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, p_t, p_h, p_w, -1 + ) + hidden_states = hidden_states.permute(0, 7, 1, 4, 2, 5, 3, 6) + output = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_sd3.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_sd3.py new file mode 100644 index 0000000000000000000000000000000000000000..ead657d0cfd209413e2b9331f5ec70930e6cebff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_sd3.py @@ -0,0 +1,345 @@ +# Copyright 2025 Stability AI, The HuggingFace Team and The InstantX Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin, SD3Transformer2DLoadersMixin +from ...utils import apply_lora_scale, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionMixin, FeedForward, JointTransformerBlock +from ..attention_processor import ( + Attention, + FusedJointAttnProcessor2_0, + JointAttnProcessor2_0, +) +from ..embeddings import CombinedTimestepTextProjEmbeddings, PatchEmbed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@maybe_allow_in_graph +class SD3SingleTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + ): + super().__init__() + + self.norm1 = AdaLayerNormZero(dim) + self.attn = Attention( + query_dim=dim, + dim_head=attention_head_dim, + heads=num_attention_heads, + out_dim=dim, + bias=True, + processor=JointAttnProcessor2_0(), + eps=1e-6, + ) + + self.norm2 = nn.LayerNorm(dim, elementwise_affine=False, eps=1e-6) + self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate") + + def forward(self, hidden_states: torch.Tensor, temb: torch.Tensor): + # 1. Attention + norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb) + attn_output = self.attn(hidden_states=norm_hidden_states, encoder_hidden_states=None) + attn_output = gate_msa.unsqueeze(1) * attn_output + hidden_states = hidden_states + attn_output + + # 2. Feed Forward + norm_hidden_states = self.norm2(hidden_states) + norm_hidden_states = norm_hidden_states * (1 + scale_mlp.unsqueeze(1)) + shift_mlp.unsqueeze(1) + ff_output = self.ff(norm_hidden_states) + ff_output = gate_mlp.unsqueeze(1) * ff_output + hidden_states = hidden_states + ff_output + + return hidden_states + + +class SD3Transformer2DModel( + ModelMixin, AttentionMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, SD3Transformer2DLoadersMixin +): + """ + The Transformer model introduced in [Stable Diffusion 3](https://huggingface.co/papers/2403.03206). + + Parameters: + sample_size (`int`, defaults to `128`): + The width/height of the latents. This is fixed during training since it is used to learn a number of + position embeddings. + patch_size (`int`, defaults to `2`): + Patch size to turn the input data into small patches. + in_channels (`int`, defaults to `16`): + The number of latent channels in the input. + num_layers (`int`, defaults to `18`): + The number of layers of transformer blocks to use. + attention_head_dim (`int`, defaults to `64`): + The number of channels in each head. + num_attention_heads (`int`, defaults to `18`): + The number of heads to use for multi-head attention. + joint_attention_dim (`int`, defaults to `4096`): + The embedding dimension to use for joint text-image attention. + caption_projection_dim (`int`, defaults to `1152`): + The embedding dimension of caption embeddings. + pooled_projection_dim (`int`, defaults to `2048`): + The embedding dimension of pooled text projections. + out_channels (`int`, defaults to `16`): + The number of latent channels in the output. + pos_embed_max_size (`int`, defaults to `96`): + The maximum latent height/width of positional embeddings. + dual_attention_layers (`tuple[int, ...]`, defaults to `()`): + The number of dual-stream transformer blocks to use. + qk_norm (`str`, *optional*, defaults to `None`): + The normalization to use for query and key in the attention layer. If `None`, no normalization is used. + """ + + _supports_gradient_checkpointing = True + _no_split_modules = ["JointTransformerBlock"] + _skip_layerwise_casting_patterns = ["pos_embed", "norm"] + + @register_to_config + def __init__( + self, + sample_size: int = 128, + patch_size: int = 2, + in_channels: int = 16, + num_layers: int = 18, + attention_head_dim: int = 64, + num_attention_heads: int = 18, + joint_attention_dim: int = 4096, + caption_projection_dim: int = 1152, + pooled_projection_dim: int = 2048, + out_channels: int = 16, + pos_embed_max_size: int = 96, + dual_attention_layers: tuple[ + int, ... + ] = (), # () for sd3.0; (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) for sd3.5 + qk_norm: str | None = None, + ): + super().__init__() + self.out_channels = out_channels if out_channels is not None else in_channels + self.inner_dim = num_attention_heads * attention_head_dim + + self.pos_embed = PatchEmbed( + height=sample_size, + width=sample_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dim=self.inner_dim, + pos_embed_max_size=pos_embed_max_size, # hard-code for now. + ) + self.time_text_embed = CombinedTimestepTextProjEmbeddings( + embedding_dim=self.inner_dim, pooled_projection_dim=pooled_projection_dim + ) + self.context_embedder = nn.Linear(joint_attention_dim, caption_projection_dim) + + self.transformer_blocks = nn.ModuleList( + [ + JointTransformerBlock( + dim=self.inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + context_pre_only=i == num_layers - 1, + qk_norm=qk_norm, + use_dual_attention=True if i in dual_attention_layers else False, + ) + for i in range(num_layers) + ] + ) + + self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6) + self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True) + + self.gradient_checkpointing = False + + # Copied from diffusers.models.unets.unet_3d_condition.UNet3DConditionModel.enable_forward_chunking + def enable_forward_chunking(self, chunk_size: int | None = None, dim: int = 0) -> None: + """ + Sets the attention processor to use [feed forward + chunking](https://huggingface.co/blog/reformer#2-chunked-feed-forward-layers). + + Parameters: + chunk_size (`int`, *optional*): + The chunk size of the feed-forward layers. If not specified, will run feed-forward layer individually + over each tensor of dim=`dim`. + dim (`int`, *optional*, defaults to `0`): + The dimension over which the feed-forward computation should be chunked. Choose between dim=0 (batch) + or dim=1 (sequence length). + """ + if dim not in [0, 1]: + raise ValueError(f"Make sure to set `dim` to either 0 or 1, not {dim}") + + # By default chunk size is 1 + chunk_size = chunk_size or 1 + + def fn_recursive_feed_forward(module: torch.nn.Module, chunk_size: int, dim: int): + if hasattr(module, "set_chunk_feed_forward"): + module.set_chunk_feed_forward(chunk_size=chunk_size, dim=dim) + + for child in module.children(): + fn_recursive_feed_forward(child, chunk_size, dim) + + for module in self.children(): + fn_recursive_feed_forward(module, chunk_size, dim) + + # Copied from diffusers.models.unets.unet_3d_condition.UNet3DConditionModel.disable_forward_chunking + def disable_forward_chunking(self): + def fn_recursive_feed_forward(module: torch.nn.Module, chunk_size: int, dim: int): + if hasattr(module, "set_chunk_feed_forward"): + module.set_chunk_feed_forward(chunk_size=chunk_size, dim=dim) + + for child in module.children(): + fn_recursive_feed_forward(child, chunk_size, dim) + + for module in self.children(): + fn_recursive_feed_forward(module, None, 0) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections with FusedAttnProcessor2_0->FusedJointAttnProcessor2_0 + def fuse_qkv_projections(self): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is 🧪 experimental. + """ + self.original_attn_processors = None + + for _, attn_processor in self.attn_processors.items(): + if "Added" in str(attn_processor.__class__.__name__): + raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.") + + self.original_attn_processors = self.attn_processors + + for module in self.modules(): + if isinstance(module, Attention): + module.fuse_projections(fuse=True) + + self.set_attn_processor(FusedJointAttnProcessor2_0()) + + # Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections + def unfuse_qkv_projections(self): + """Disables the fused QKV projection if enabled. + + > [!WARNING] > This API is 🧪 experimental. + + """ + if self.original_attn_processors is not None: + self.set_attn_processor(self.original_attn_processors) + + @apply_lora_scale("joint_attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + pooled_projections: torch.Tensor = None, + timestep: torch.LongTensor = None, + block_controlnet_hidden_states: list = None, + joint_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + skip_layers: list[int] | None = None, + ) -> torch.Tensor | Transformer2DModelOutput: + """ + The [`SD3Transformer2DModel`] forward method. + + Args: + hidden_states (`torch.Tensor` of shape `(batch size, channel, height, width)`): + Input `hidden_states`. + encoder_hidden_states (`torch.Tensor` of shape `(batch size, sequence_len, embed_dims)`): + Conditional embeddings (embeddings computed from the input conditions such as prompts) to use. + pooled_projections (`torch.Tensor` of shape `(batch_size, projection_dim)`): + Embeddings projected from the embeddings of input conditions. + timestep (`torch.LongTensor`): + Used to indicate denoising step. + block_controlnet_hidden_states (`list` of `torch.Tensor`): + A list of tensors that if specified are added to the residuals of transformer blocks. + joint_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformer_2d.Transformer2DModelOutput`] instead of a plain + tuple. + skip_layers (`list` of `int`, *optional*): + A list of layer indices to skip during the forward pass. + + Returns: + If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + + height, width = hidden_states.shape[-2:] + + hidden_states = self.pos_embed(hidden_states) # takes care of adding positional embeddings too. + temb = self.time_text_embed(timestep, pooled_projections) + encoder_hidden_states = self.context_embedder(encoder_hidden_states) + + if joint_attention_kwargs is not None and "ip_adapter_image_embeds" in joint_attention_kwargs: + ip_adapter_image_embeds = joint_attention_kwargs.pop("ip_adapter_image_embeds") + ip_hidden_states, ip_temb = self.image_proj(ip_adapter_image_embeds, timestep) + + joint_attention_kwargs.update(ip_hidden_states=ip_hidden_states, temb=ip_temb) + + for index_block, block in enumerate(self.transformer_blocks): + # Skip specified layers + is_skip = True if skip_layers is not None and index_block in skip_layers else False + + if torch.is_grad_enabled() and self.gradient_checkpointing and not is_skip: + encoder_hidden_states, hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + temb, + joint_attention_kwargs, + ) + elif not is_skip: + encoder_hidden_states, hidden_states = block( + hidden_states=hidden_states, + encoder_hidden_states=encoder_hidden_states, + temb=temb, + joint_attention_kwargs=joint_attention_kwargs, + ) + + # controlnet residual + if block_controlnet_hidden_states is not None and block.context_pre_only is False: + interval_control = len(self.transformer_blocks) / len(block_controlnet_hidden_states) + hidden_states = hidden_states + block_controlnet_hidden_states[int(index_block / interval_control)] + + hidden_states = self.norm_out(hidden_states, temb) + hidden_states = self.proj_out(hidden_states) + + # unpatchify + patch_size = self.config.patch_size + height = height // patch_size + width = width // patch_size + + hidden_states = hidden_states.reshape( + shape=(hidden_states.shape[0], height, width, patch_size, patch_size, self.out_channels) + ) + hidden_states = torch.einsum("nhwpqc->nchpwq", hidden_states) + output = hidden_states.reshape( + shape=(hidden_states.shape[0], self.out_channels, height * patch_size, width * patch_size) + ) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_skyreels_v2.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_skyreels_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..9067e32ea5c32a1d5c3f58da9abd9ed38b693677 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_skyreels_v2.py @@ -0,0 +1,766 @@ +# Copyright 2025 The SkyReels Team, The Wan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, deprecate, logging +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import ( + PixArtAlphaTextProjection, + TimestepEmbedding, + get_1d_rotary_pos_embed, + get_1d_sincos_pos_embed_from_grid, +) +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin, get_parameter_dtype +from ..normalization import FP32LayerNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _get_qkv_projections( + attn: "SkyReelsV2Attention", hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor +): + # encoder_hidden_states is only passed for cross-attention + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + if attn.fused_projections: + if attn.cross_attention_dim_head is None: + # In self-attention layers, we can fuse the entire QKV projection into a single linear + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + else: + # In cross-attention layers, we can only fuse the KV projections into a single linear + query = attn.to_q(hidden_states) + key, value = attn.to_kv(encoder_hidden_states).chunk(2, dim=-1) + else: + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + return query, key, value + + +def _get_added_kv_projections(attn: "SkyReelsV2Attention", encoder_hidden_states_img: torch.Tensor): + if attn.fused_projections: + key_img, value_img = attn.to_added_kv(encoder_hidden_states_img).chunk(2, dim=-1) + else: + key_img = attn.add_k_proj(encoder_hidden_states_img) + value_img = attn.add_v_proj(encoder_hidden_states_img) + return key_img, value_img + + +class SkyReelsV2AttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "SkyReelsV2AttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to 2.0." + ) + + def __call__( + self, + attn: "SkyReelsV2Attention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ) -> torch.Tensor: + encoder_hidden_states_img = None + if attn.add_k_proj is not None: + # 512 is the context length of the text encoder, hardcoded for now + image_context_length = encoder_hidden_states.shape[1] - 512 + encoder_hidden_states_img = encoder_hidden_states[:, :image_context_length] + encoder_hidden_states = encoder_hidden_states[:, image_context_length:] + + query, key, value = _get_qkv_projections(attn, hidden_states, encoder_hidden_states) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + if rotary_emb is not None: + + def apply_rotary_emb( + hidden_states: torch.Tensor, + freqs_cos: torch.Tensor, + freqs_sin: torch.Tensor, + ): + x1, x2 = hidden_states.unflatten(-1, (-1, 2)).unbind(-1) + cos = freqs_cos[..., 0::2] + sin = freqs_sin[..., 1::2] + out = torch.empty_like(hidden_states) + out[..., 0::2] = x1 * cos - x2 * sin + out[..., 1::2] = x1 * sin + x2 * cos + return out.type_as(hidden_states) + + query = apply_rotary_emb(query, *rotary_emb) + key = apply_rotary_emb(key, *rotary_emb) + + # I2V task + hidden_states_img = None + if encoder_hidden_states_img is not None: + key_img, value_img = _get_added_kv_projections(attn, encoder_hidden_states_img) + key_img = attn.norm_added_k(key_img) + + key_img = key_img.unflatten(2, (attn.heads, -1)) + value_img = value_img.unflatten(2, (attn.heads, -1)) + + hidden_states_img = dispatch_attention_fn( + query, + key_img, + value_img, + attn_mask=None, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + hidden_states_img = hidden_states_img.flatten(2, 3) + hidden_states_img = hidden_states_img.type_as(query) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.type_as(query) + + if hidden_states_img is not None: + hidden_states = hidden_states + hidden_states_img + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +class SkyReelsV2AttnProcessor2_0: + def __new__(cls, *args, **kwargs): + deprecation_message = ( + "The SkyReelsV2AttnProcessor2_0 class is deprecated and will be removed in a future version. " + "Please use SkyReelsV2AttnProcessor instead. " + ) + deprecate("SkyReelsV2AttnProcessor2_0", "1.0.0", deprecation_message, standard_warn=False) + return SkyReelsV2AttnProcessor(*args, **kwargs) + + +class SkyReelsV2Attention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = SkyReelsV2AttnProcessor + _available_processors = [SkyReelsV2AttnProcessor] + + def __init__( + self, + dim: int, + heads: int = 8, + dim_head: int = 64, + eps: float = 1e-5, + dropout: float = 0.0, + added_kv_proj_dim: int | None = None, + cross_attention_dim_head: int | None = None, + processor=None, + is_cross_attention=None, + ): + super().__init__() + + self.inner_dim = dim_head * heads + self.heads = heads + self.added_kv_proj_dim = added_kv_proj_dim + self.cross_attention_dim_head = cross_attention_dim_head + self.kv_inner_dim = self.inner_dim if cross_attention_dim_head is None else cross_attention_dim_head * heads + + self.to_q = torch.nn.Linear(dim, self.inner_dim, bias=True) + self.to_k = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_v = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_out = torch.nn.ModuleList( + [ + torch.nn.Linear(self.inner_dim, dim, bias=True), + torch.nn.Dropout(dropout), + ] + ) + self.norm_q = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + self.norm_k = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + + self.add_k_proj = self.add_v_proj = None + if added_kv_proj_dim is not None: + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.norm_added_k = torch.nn.RMSNorm(dim_head * heads, eps=eps) + + self.is_cross_attention = cross_attention_dim_head is not None + + self.set_processor(processor) + + def fuse_projections(self): + if getattr(self, "fused_projections", False): + return + + if self.cross_attention_dim_head is None: + concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_qkv = nn.Linear(in_features, out_features, bias=True) + self.to_qkv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + else: + concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_kv = nn.Linear(in_features, out_features, bias=True) + self.to_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + if self.added_kv_proj_dim is not None: + concatenated_weights = torch.cat([self.add_k_proj.weight.data, self.add_v_proj.weight.data]) + concatenated_bias = torch.cat([self.add_k_proj.bias.data, self.add_v_proj.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_added_kv = nn.Linear(in_features, out_features, bias=True) + self.to_added_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + self.fused_projections = True + + @torch.no_grad() + def unfuse_projections(self): + if not getattr(self, "fused_projections", False): + return + + if hasattr(self, "to_qkv"): + delattr(self, "to_qkv") + if hasattr(self, "to_kv"): + delattr(self, "to_kv") + if hasattr(self, "to_added_kv"): + delattr(self, "to_added_kv") + + self.fused_projections = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + **kwargs, + ) -> torch.Tensor: + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, rotary_emb, **kwargs) + + +class SkyReelsV2ImageEmbedding(torch.nn.Module): + def __init__(self, in_features: int, out_features: int, pos_embed_seq_len=None): + super().__init__() + + self.norm1 = FP32LayerNorm(in_features) + self.ff = FeedForward(in_features, out_features, mult=1, activation_fn="gelu") + self.norm2 = FP32LayerNorm(out_features) + if pos_embed_seq_len is not None: + self.pos_embed = nn.Parameter(torch.zeros(1, pos_embed_seq_len, in_features)) + else: + self.pos_embed = None + + def forward(self, encoder_hidden_states_image: torch.Tensor) -> torch.Tensor: + if self.pos_embed is not None: + batch_size, seq_len, embed_dim = encoder_hidden_states_image.shape + encoder_hidden_states_image = encoder_hidden_states_image.view(-1, 2 * seq_len, embed_dim) + encoder_hidden_states_image = encoder_hidden_states_image + self.pos_embed + + hidden_states = self.norm1(encoder_hidden_states_image) + hidden_states = self.ff(hidden_states) + hidden_states = self.norm2(hidden_states) + return hidden_states + + +class SkyReelsV2Timesteps(nn.Module): + def __init__(self, num_channels: int, flip_sin_to_cos: bool, output_type: str = "pt"): + super().__init__() + self.num_channels = num_channels + self.output_type = output_type + self.flip_sin_to_cos = flip_sin_to_cos + + def forward(self, timesteps: torch.Tensor) -> torch.Tensor: + original_shape = timesteps.shape + t_emb = get_1d_sincos_pos_embed_from_grid( + self.num_channels, + timesteps, + output_type=self.output_type, + flip_sin_to_cos=self.flip_sin_to_cos, + ) + # Reshape back to maintain batch structure + if len(original_shape) > 1: + t_emb = t_emb.reshape(*original_shape, self.num_channels) + return t_emb + + +class SkyReelsV2TimeTextImageEmbedding(nn.Module): + def __init__( + self, + dim: int, + time_freq_dim: int, + time_proj_dim: int, + text_embed_dim: int, + image_embed_dim: int | None = None, + pos_embed_seq_len: int | None = None, + ): + super().__init__() + + self.timesteps_proj = SkyReelsV2Timesteps(num_channels=time_freq_dim, flip_sin_to_cos=True) + self.time_embedder = TimestepEmbedding(in_channels=time_freq_dim, time_embed_dim=dim) + self.act_fn = nn.SiLU() + self.time_proj = nn.Linear(dim, time_proj_dim) + self.text_embedder = PixArtAlphaTextProjection(text_embed_dim, dim, act_fn="gelu_tanh") + + self.image_embedder = None + if image_embed_dim is not None: + self.image_embedder = SkyReelsV2ImageEmbedding(image_embed_dim, dim, pos_embed_seq_len=pos_embed_seq_len) + + def forward( + self, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + ): + timestep = self.timesteps_proj(timestep) + + time_embedder_dtype = get_parameter_dtype(self.time_embedder) + if timestep.dtype != time_embedder_dtype and time_embedder_dtype != torch.int8: + timestep = timestep.to(time_embedder_dtype) + temb = self.time_embedder(timestep).type_as(encoder_hidden_states) + timestep_proj = self.time_proj(self.act_fn(temb)) + + encoder_hidden_states = self.text_embedder(encoder_hidden_states) + if encoder_hidden_states_image is not None: + encoder_hidden_states_image = self.image_embedder(encoder_hidden_states_image) + + return temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image + + +class SkyReelsV2RotaryPosEmbed(nn.Module): + def __init__( + self, + attention_head_dim: int, + patch_size: tuple[int, int, int], + max_seq_len: int, + theta: float = 10000.0, + ): + super().__init__() + + self.attention_head_dim = attention_head_dim + self.patch_size = patch_size + self.max_seq_len = max_seq_len + + h_dim = w_dim = 2 * (attention_head_dim // 6) + t_dim = attention_head_dim - h_dim - w_dim + freqs_dtype = torch.float32 if torch.backends.mps.is_available() else torch.float64 + + self.t_dim = t_dim + self.h_dim = h_dim + self.w_dim = w_dim + + freqs_cos = [] + freqs_sin = [] + + for dim in [t_dim, h_dim, w_dim]: + freq_cos, freq_sin = get_1d_rotary_pos_embed( + dim, + max_seq_len, + theta, + use_real=True, + repeat_interleave_real=True, + freqs_dtype=freqs_dtype, + ) + freqs_cos.append(freq_cos) + freqs_sin.append(freq_sin) + + self.register_buffer("freqs_cos", torch.cat(freqs_cos, dim=1), persistent=False) + self.register_buffer("freqs_sin", torch.cat(freqs_sin, dim=1), persistent=False) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.patch_size + ppf, pph, ppw = num_frames // p_t, height // p_h, width // p_w + + split_sizes = [self.t_dim, self.h_dim, self.w_dim] + + freqs_cos = self.freqs_cos.split(split_sizes, dim=1) + freqs_sin = self.freqs_sin.split(split_sizes, dim=1) + + freqs_cos_f = freqs_cos[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_h = freqs_cos[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_w = freqs_cos[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_sin_f = freqs_sin[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_h = freqs_sin[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_w = freqs_sin[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_cos = torch.cat([freqs_cos_f, freqs_cos_h, freqs_cos_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + freqs_sin = torch.cat([freqs_sin_f, freqs_sin_h, freqs_sin_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + + return freqs_cos, freqs_sin + + +@maybe_allow_in_graph +class SkyReelsV2TransformerBlock(nn.Module): + def __init__( + self, + dim: int, + ffn_dim: int, + num_heads: int, + qk_norm: str = "rms_norm_across_heads", + cross_attn_norm: bool = False, + eps: float = 1e-6, + added_kv_proj_dim: int | None = None, + ): + super().__init__() + + # 1. Self-attention + self.norm1 = FP32LayerNorm(dim, eps, elementwise_affine=False) + self.attn1 = SkyReelsV2Attention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + cross_attention_dim_head=None, + processor=SkyReelsV2AttnProcessor(), + ) + + # 2. Cross-attention + self.attn2 = SkyReelsV2Attention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + added_kv_proj_dim=added_kv_proj_dim, + cross_attention_dim_head=dim // num_heads, + processor=SkyReelsV2AttnProcessor(), + ) + self.norm2 = FP32LayerNorm(dim, eps, elementwise_affine=True) if cross_attn_norm else nn.Identity() + + # 3. Feed-forward + self.ffn = FeedForward(dim, inner_dim=ffn_dim, activation_fn="gelu-approximate") + self.norm3 = FP32LayerNorm(dim, eps, elementwise_affine=False) + + self.scale_shift_table = nn.Parameter(torch.randn(1, 6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + rotary_emb: torch.Tensor, + attention_mask: torch.Tensor, + ) -> torch.Tensor: + if temb.dim() == 3: + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table + temb.float() + ).chunk(6, dim=1) + elif temb.dim() == 4: + # For 4D temb in Diffusion Forcing framework, we assume the shape is (b, 6, f * pp_h * pp_w, inner_dim) + e = (self.scale_shift_table.unsqueeze(2) + temb.float()).chunk(6, dim=1) + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = [ei.squeeze(1) for ei in e] + + # 1. Self-attention + norm_hidden_states = (self.norm1(hidden_states.float()) * (1 + scale_msa) + shift_msa).type_as(hidden_states) + attn_output = self.attn1(norm_hidden_states, None, attention_mask, rotary_emb) + hidden_states = (hidden_states.float() + attn_output * gate_msa).type_as(hidden_states) + + # 2. Cross-attention + norm_hidden_states = self.norm2(hidden_states.float()).type_as(hidden_states) + attn_output = self.attn2(norm_hidden_states, encoder_hidden_states, None, None) + hidden_states = hidden_states + attn_output + + # 3. Feed-forward + norm_hidden_states = (self.norm3(hidden_states.float()) * (1 + c_scale_msa) + c_shift_msa).type_as( + hidden_states + ) + ff_output = self.ffn(norm_hidden_states) + hidden_states = (hidden_states.float() + ff_output.float() * c_gate_msa).type_as(hidden_states) + + return hidden_states + + +class SkyReelsV2Transformer3DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin +): + r""" + A Transformer model for video-like data used in the Wan-based SkyReels-V2 model. + + Args: + patch_size (`tuple[int]`, defaults to `(1, 2, 2)`): + 3D patch dimensions for video embedding (t_patch, h_patch, w_patch). + num_attention_heads (`int`, defaults to `16`): + Fixed length for text embeddings. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_dim (`int`, defaults to `4096`): + Input dimension for text embeddings. + freq_dim (`int`, defaults to `256`): + Dimension for sinusoidal time embeddings. + ffn_dim (`int`, defaults to `8192`): + Intermediate dimension in feed-forward network. + num_layers (`int`, defaults to `32`): + The number of layers of transformer blocks to use. + window_size (`tuple[int]`, defaults to `(-1, -1)`): + Window size for local attention (-1 indicates global attention). + cross_attn_norm (`bool`, defaults to `True`): + Enable cross-attention normalization. + qk_norm (`str`, *optional*, defaults to `"rms_norm_across_heads"`): + Enable query/key normalization. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + inject_sample_info (`bool`, defaults to `False`): + Whether to inject sample information into the model. + image_dim (`int`, *optional*): + The dimension of the image embeddings. + added_kv_proj_dim (`int`, *optional*): + The dimension of the added key/value projection. + rope_max_seq_len (`int`, defaults to `1024`): + The maximum sequence length for the rotary embeddings. + pos_embed_seq_len (`int`, *optional*): + The sequence length for the positional embeddings. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["patch_embedding", "condition_embedder", "norm"] + _no_split_modules = ["SkyReelsV2TransformerBlock"] + _keep_in_fp32_modules = ["time_embedder", "scale_shift_table", "norm1", "norm2", "norm3"] + _keys_to_ignore_on_load_unexpected = ["norm_added_q"] + _repeated_blocks = ["SkyReelsV2TransformerBlock"] + + @register_to_config + def __init__( + self, + patch_size: tuple[int] = (1, 2, 2), + num_attention_heads: int = 16, + attention_head_dim: int = 128, + in_channels: int = 16, + out_channels: int = 16, + text_dim: int = 4096, + freq_dim: int = 256, + ffn_dim: int = 8192, + num_layers: int = 32, + cross_attn_norm: bool = True, + qk_norm: str | None = "rms_norm_across_heads", + eps: float = 1e-6, + image_dim: int | None = None, + added_kv_proj_dim: int | None = None, + rope_max_seq_len: int = 1024, + pos_embed_seq_len: int | None = None, + inject_sample_info: bool = False, + num_frame_per_block: int = 1, + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + # 1. Patch & position embedding + self.rope = SkyReelsV2RotaryPosEmbed(attention_head_dim, patch_size, rope_max_seq_len) + self.patch_embedding = nn.Conv3d(in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + + # 2. Condition embeddings + # image_embedding_dim=1280 for I2V model + self.condition_embedder = SkyReelsV2TimeTextImageEmbedding( + dim=inner_dim, + time_freq_dim=freq_dim, + time_proj_dim=inner_dim * 6, + text_embed_dim=text_dim, + image_embed_dim=image_dim, + pos_embed_seq_len=pos_embed_seq_len, + ) + + # 3. Transformer blocks + self.blocks = nn.ModuleList( + [ + SkyReelsV2TransformerBlock( + inner_dim, ffn_dim, num_attention_heads, qk_norm, cross_attn_norm, eps, added_kv_proj_dim + ) + for _ in range(num_layers) + ] + ) + + # 4. Output norm & projection + self.norm_out = FP32LayerNorm(inner_dim, eps, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, out_channels * math.prod(patch_size)) + self.scale_shift_table = nn.Parameter(torch.randn(1, 2, inner_dim) / inner_dim**0.5) + + if inject_sample_info: + self.fps_embedding = nn.Embedding(2, inner_dim) + self.fps_projection = FeedForward(inner_dim, inner_dim * 6, mult=1, activation_fn="linear-silu") + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + enable_diffusion_forcing: bool = False, + fps: torch.Tensor | None = None, + return_dict: bool = True, + attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor | dict[str, torch.Tensor]: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.config.patch_size + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p_h + post_patch_width = width // p_w + + rotary_emb = self.rope(hidden_states) + + hidden_states = self.patch_embedding(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) + + causal_mask = None + if self.config.num_frame_per_block > 1: + block_num = post_patch_num_frames // self.config.num_frame_per_block + range_tensor = torch.arange(block_num, device=hidden_states.device).repeat_interleave( + self.config.num_frame_per_block + ) + causal_mask = range_tensor.unsqueeze(0) <= range_tensor.unsqueeze(1) # f, f + causal_mask = causal_mask.view(post_patch_num_frames, 1, 1, post_patch_num_frames, 1, 1) + causal_mask = causal_mask.repeat( + 1, post_patch_height, post_patch_width, 1, post_patch_height, post_patch_width + ) + causal_mask = causal_mask.reshape( + post_patch_num_frames * post_patch_height * post_patch_width, + post_patch_num_frames * post_patch_height * post_patch_width, + ) + causal_mask = causal_mask.unsqueeze(0).unsqueeze(0) + + temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image = self.condition_embedder( + timestep, encoder_hidden_states, encoder_hidden_states_image + ) + + timestep_proj = timestep_proj.unflatten(-1, (6, -1)) + + if encoder_hidden_states_image is not None: + encoder_hidden_states = torch.concat([encoder_hidden_states_image, encoder_hidden_states], dim=1) + + if self.config.inject_sample_info: + fps = torch.tensor(fps, dtype=torch.long, device=hidden_states.device) + + fps_emb = self.fps_embedding(fps) + if enable_diffusion_forcing: + timestep_proj = timestep_proj + self.fps_projection(fps_emb).unflatten(1, (6, -1)).repeat( + timestep.shape[1], 1, 1 + ) + else: + timestep_proj = timestep_proj + self.fps_projection(fps_emb).unflatten(1, (6, -1)) + + if enable_diffusion_forcing: + b, f = timestep.shape + temb = temb.view(b, f, 1, 1, -1) + timestep_proj = timestep_proj.view(b, f, 1, 1, 6, -1) # (b, f, 1, 1, 6, inner_dim) + temb = temb.repeat(1, 1, post_patch_height, post_patch_width, 1).flatten(1, 3) + timestep_proj = timestep_proj.repeat(1, 1, post_patch_height, post_patch_width, 1, 1).flatten( + 1, 3 + ) # (b, f, pp_h, pp_w, 6, inner_dim) -> (b, f * pp_h * pp_w, 6, inner_dim) + timestep_proj = timestep_proj.transpose(1, 2).contiguous() # (b, 6, f * pp_h * pp_w, inner_dim) + + # 4. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.blocks: + hidden_states = self._gradient_checkpointing_func( + block, + hidden_states, + encoder_hidden_states, + timestep_proj, + rotary_emb, + causal_mask, + ) + else: + for block in self.blocks: + hidden_states = block( + hidden_states, + encoder_hidden_states, + timestep_proj, + rotary_emb, + causal_mask, + ) + + if temb.dim() == 2: + # If temb is 2D, we assume it has time 1-D time embedding values for each batch. + # For models: + # - Skywork/SkyReels-V2-T2V-14B-540P-Diffusers + # - Skywork/SkyReels-V2-T2V-14B-720P-Diffusers + # - Skywork/SkyReels-V2-I2V-1.3B-540P-Diffusers + # - Skywork/SkyReels-V2-I2V-14B-540P-Diffusers + # - Skywork/SkyReels-V2-I2V-14B-720P-Diffusers + shift, scale = (self.scale_shift_table + temb.unsqueeze(1)).chunk(2, dim=1) + elif temb.dim() == 3: + # If temb is 3D, we assume it has 2-D time embedding values for each batch. + # Each time embedding tensor includes values for each latent frame; thus Diffusion Forcing. + # For models: + # - Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers + # - Skywork/SkyReels-V2-DF-14B-540P-Diffusers + # - Skywork/SkyReels-V2-DF-14B-720P-Diffusers + shift, scale = (self.scale_shift_table.unsqueeze(2) + temb.unsqueeze(1)).chunk(2, dim=1) + shift, scale = shift.squeeze(1), scale.squeeze(1) + + # Move the shift and scale tensors to the same device as hidden_states. + # When using multi-GPU inference via accelerate these will be on the + # first device rather than the last device, which hidden_states ends up + # on. + shift = shift.to(hidden_states.device) + scale = scale.to(hidden_states.device) + + hidden_states = (self.norm_out(hidden_states.float()) * (1 + scale) + shift).type_as(hidden_states) + + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, p_t, p_h, p_w, -1 + ) + hidden_states = hidden_states.permute(0, 7, 1, 4, 2, 5, 3, 6) + output = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) + + def _set_ar_attention(self, causal_block_size: int): + self.register_to_config(num_frame_per_block=causal_block_size) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_temporal.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_temporal.py new file mode 100644 index 0000000000000000000000000000000000000000..b6fedcb26cc8061ff0d6ec948a17bff76ac7d504 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_temporal.py @@ -0,0 +1,375 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from typing import Any + +import torch +from torch import nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import BaseOutput +from ..attention import BasicTransformerBlock, TemporalBasicTransformerBlock +from ..embeddings import TimestepEmbedding, Timesteps +from ..modeling_utils import ModelMixin +from ..resnet import AlphaBlender + + +@dataclass +class TransformerTemporalModelOutput(BaseOutput): + """ + The output of [`TransformerTemporalModel`]. + + Args: + sample (`torch.Tensor` of shape `(batch_size x num_frames, num_channels, height, width)`): + The hidden states output conditioned on `encoder_hidden_states` input. + """ + + sample: torch.Tensor + + +class TransformerTemporalModel(ModelMixin, ConfigMixin): + """ + A Transformer model for video-like data. + + Parameters: + num_attention_heads (`int`, *optional*, defaults to 16): The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 88): The number of channels in each head. + in_channels (`int`, *optional*): + The number of channels in the input and output (specify if the input is **continuous**). + num_layers (`int`, *optional*, defaults to 1): The number of layers of Transformer blocks to use. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + cross_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use. + attention_bias (`bool`, *optional*): + Configure if the `TransformerBlock` attention should contain a bias parameter. + sample_size (`int`, *optional*): The width of the latent images (specify if the input is **discrete**). + This is fixed during training since it is used to learn a number of position embeddings. + activation_fn (`str`, *optional*, defaults to `"geglu"`): + Activation function to use in feed-forward. See `diffusers.models.activations.get_activation` for supported + activation functions. + norm_elementwise_affine (`bool`, *optional*): + Configure if the `TransformerBlock` should use learnable elementwise affine parameters for normalization. + double_self_attention (`bool`, *optional*): + Configure if each `TransformerBlock` should contain two self-attention layers. + positional_embeddings: (`str`, *optional*): + The type of positional embeddings to apply to the sequence input before passing use. + num_positional_embeddings: (`int`, *optional*): + The maximum length of the sequence over which to apply positional embeddings. + """ + + _skip_layerwise_casting_patterns = ["norm"] + + @register_to_config + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: int | None = None, + out_channels: int | None = None, + num_layers: int = 1, + dropout: float = 0.0, + norm_num_groups: int = 32, + cross_attention_dim: int | None = None, + attention_bias: bool = False, + sample_size: int | None = None, + activation_fn: str = "geglu", + norm_elementwise_affine: bool = True, + double_self_attention: bool = True, + positional_embeddings: str | None = None, + num_positional_embeddings: int | None = None, + ): + super().__init__() + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + inner_dim = num_attention_heads * attention_head_dim + + self.in_channels = in_channels + + self.norm = torch.nn.GroupNorm(num_groups=norm_num_groups, num_channels=in_channels, eps=1e-6, affine=True) + self.proj_in = nn.Linear(in_channels, inner_dim) + + # 3. Define transformers blocks + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + cross_attention_dim=cross_attention_dim, + activation_fn=activation_fn, + attention_bias=attention_bias, + double_self_attention=double_self_attention, + norm_elementwise_affine=norm_elementwise_affine, + positional_embeddings=positional_embeddings, + num_positional_embeddings=num_positional_embeddings, + ) + for d in range(num_layers) + ] + ) + + self.proj_out = nn.Linear(inner_dim, in_channels) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.LongTensor | None = None, + timestep: torch.LongTensor | None = None, + class_labels: torch.LongTensor = None, + num_frames: int = 1, + cross_attention_kwargs: dict[str, Any] | None = None, + return_dict: bool = True, + ) -> TransformerTemporalModelOutput: + """ + The [`TransformerTemporal`] forward method. + + Args: + hidden_states (`torch.LongTensor` of shape `(batch size, num latent pixels)` if discrete, `torch.Tensor` of shape `(batch size, channel, height, width)` if continuous): + Input hidden_states. + encoder_hidden_states ( `torch.LongTensor` of shape `(batch size, encoder_hidden_states dim)`, *optional*): + Conditional embeddings for cross attention layer. If not given, cross-attention defaults to + self-attention. + timestep ( `torch.LongTensor`, *optional*): + Used to indicate denoising step. Optional timestep to be applied as an embedding in `AdaLayerNorm`. + class_labels ( `torch.LongTensor` of shape `(batch size, num classes)`, *optional*): + Used to indicate class labels conditioning. Optional class labels to be applied as an embedding in + `AdaLayerZeroNorm`. + num_frames (`int`, *optional*, defaults to 1): + The number of frames to be processed per batch. This is used to reshape the hidden states. + cross_attention_kwargs (`dict`, *optional*): + A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under + `self.processor` in + [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformers.transformer_temporal.TransformerTemporalModelOutput`] + instead of a plain tuple. + + Returns: + [`~models.transformers.transformer_temporal.TransformerTemporalModelOutput`] or `tuple`: + If `return_dict` is True, an + [`~models.transformers.transformer_temporal.TransformerTemporalModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + # 1. Input + batch_frames, channel, height, width = hidden_states.shape + batch_size = batch_frames // num_frames + + residual = hidden_states + + hidden_states = hidden_states[None, :].reshape(batch_size, num_frames, channel, height, width) + hidden_states = hidden_states.permute(0, 2, 1, 3, 4) + + hidden_states = self.norm(hidden_states) + hidden_states = hidden_states.permute(0, 3, 4, 2, 1).reshape(batch_size * height * width, num_frames, channel) + + hidden_states = self.proj_in(hidden_states) + + # 2. Blocks + for block in self.transformer_blocks: + hidden_states = block( + hidden_states, + encoder_hidden_states=encoder_hidden_states, + timestep=timestep, + cross_attention_kwargs=cross_attention_kwargs, + class_labels=class_labels, + ) + + # 3. Output + hidden_states = self.proj_out(hidden_states) + hidden_states = ( + hidden_states[None, None, :] + .reshape(batch_size, height, width, num_frames, channel) + .permute(0, 3, 4, 1, 2) + .contiguous() + ) + hidden_states = hidden_states.reshape(batch_frames, channel, height, width) + + output = hidden_states + residual + + if not return_dict: + return (output,) + + return TransformerTemporalModelOutput(sample=output) + + +class TransformerSpatioTemporalModel(nn.Module): + """ + A Transformer model for video-like data. + + Parameters: + num_attention_heads (`int`, *optional*, defaults to 16): The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 88): The number of channels in each head. + in_channels (`int`, *optional*): + The number of channels in the input and output (specify if the input is **continuous**). + out_channels (`int`, *optional*): + The number of channels in the output (specify if the input is **continuous**). + num_layers (`int`, *optional*, defaults to 1): The number of layers of Transformer blocks to use. + cross_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use. + """ + + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: int = 320, + out_channels: int | None = None, + num_layers: int = 1, + cross_attention_dim: int | None = None, + ): + super().__init__() + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + + inner_dim = num_attention_heads * attention_head_dim + self.inner_dim = inner_dim + + # 2. Define input layers + self.in_channels = in_channels + self.norm = torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6) + self.proj_in = nn.Linear(in_channels, inner_dim) + + # 3. Define transformers blocks + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + cross_attention_dim=cross_attention_dim, + ) + for d in range(num_layers) + ] + ) + + time_mix_inner_dim = inner_dim + self.temporal_transformer_blocks = nn.ModuleList( + [ + TemporalBasicTransformerBlock( + inner_dim, + time_mix_inner_dim, + num_attention_heads, + attention_head_dim, + cross_attention_dim=cross_attention_dim, + ) + for _ in range(num_layers) + ] + ) + + time_embed_dim = in_channels * 4 + self.time_pos_embed = TimestepEmbedding(in_channels, time_embed_dim, out_dim=in_channels) + self.time_proj = Timesteps(in_channels, True, 0) + self.time_mixer = AlphaBlender(alpha=0.5, merge_strategy="learned_with_images") + + # 4. Define output layers + self.out_channels = in_channels if out_channels is None else out_channels + # TODO: should use out_channels for continuous projections + self.proj_out = nn.Linear(inner_dim, in_channels) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + image_only_indicator: torch.Tensor | None = None, + return_dict: bool = True, + ): + """ + Args: + hidden_states (`torch.Tensor` of shape `(batch size, channel, height, width)`): + Input hidden_states. + num_frames (`int`): + The number of frames to be processed per batch. This is used to reshape the hidden states. + encoder_hidden_states ( `torch.LongTensor` of shape `(batch size, encoder_hidden_states dim)`, *optional*): + Conditional embeddings for cross attention layer. If not given, cross-attention defaults to + self-attention. + image_only_indicator (`torch.LongTensor` of shape `(batch size, num_frames)`, *optional*): + A tensor indicating whether the input contains only images. 1 indicates that the input contains only + images, 0 indicates that the input contains video frames. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~models.transformers.transformer_temporal.TransformerTemporalModelOutput`] + instead of a plain tuple. + + Returns: + [`~models.transformers.transformer_temporal.TransformerTemporalModelOutput`] or `tuple`: + If `return_dict` is True, an + [`~models.transformers.transformer_temporal.TransformerTemporalModelOutput`] is returned, otherwise a + `tuple` where the first element is the sample tensor. + """ + # 1. Input + batch_frames, _, height, width = hidden_states.shape + num_frames = image_only_indicator.shape[-1] + batch_size = batch_frames // num_frames + + time_context = encoder_hidden_states + time_context_first_timestep = time_context[None, :].reshape( + batch_size, num_frames, -1, time_context.shape[-1] + )[:, 0] + time_context = time_context_first_timestep[:, None].broadcast_to( + batch_size, height * width, time_context.shape[-2], time_context.shape[-1] + ) + time_context = time_context.reshape(batch_size * height * width, -1, time_context.shape[-1]) + + residual = hidden_states + + hidden_states = self.norm(hidden_states) + inner_dim = hidden_states.shape[1] + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch_frames, height * width, inner_dim) + hidden_states = self.proj_in(hidden_states) + + num_frames_emb = torch.arange(num_frames, device=hidden_states.device) + num_frames_emb = num_frames_emb.repeat(batch_size, 1) + num_frames_emb = num_frames_emb.reshape(-1) + t_emb = self.time_proj(num_frames_emb) + + # `Timesteps` does not contain any weights and will always return f32 tensors + # but time_embedding might actually be running in fp16. so we need to cast here. + # there might be better ways to encapsulate this. + t_emb = t_emb.to(dtype=hidden_states.dtype) + + emb = self.time_pos_embed(t_emb) + emb = emb[:, None, :] + + # 2. Blocks + for block, temporal_block in zip(self.transformer_blocks, self.temporal_transformer_blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, hidden_states, None, encoder_hidden_states, None + ) + else: + hidden_states = block(hidden_states, encoder_hidden_states=encoder_hidden_states) + + hidden_states_mix = hidden_states + hidden_states_mix = hidden_states_mix + emb + + hidden_states_mix = temporal_block( + hidden_states_mix, + num_frames=num_frames, + encoder_hidden_states=time_context, + ) + hidden_states = self.time_mixer( + x_spatial=hidden_states, + x_temporal=hidden_states_mix, + image_only_indicator=image_only_indicator, + ) + + # 3. Output + hidden_states = self.proj_out(hidden_states) + hidden_states = hidden_states.reshape(batch_frames, height, width, inner_dim).permute(0, 3, 1, 2).contiguous() + + output = hidden_states + residual + + if not return_dict: + return (output,) + + return TransformerTemporalModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan.py new file mode 100644 index 0000000000000000000000000000000000000000..5926bbb8e713b1f49699c706cbdd64459fcf5f3e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan.py @@ -0,0 +1,708 @@ +# Copyright 2025 The Wan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, deprecate, logging +from ...utils.torch_utils import maybe_allow_in_graph +from .._modeling_parallel import ContextParallelInput, ContextParallelOutput +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps, get_1d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import FP32LayerNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def _get_qkv_projections(attn: "WanAttention", hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor): + # encoder_hidden_states is only passed for cross-attention + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + if attn.fused_projections: + if not attn.is_cross_attention: + # In self-attention layers, we can fuse the entire QKV projection into a single linear + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + else: + # In cross-attention layers, we can only fuse the KV projections into a single linear + query = attn.to_q(hidden_states) + key, value = attn.to_kv(encoder_hidden_states).chunk(2, dim=-1) + else: + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + return query, key, value + + +def _get_added_kv_projections(attn: "WanAttention", encoder_hidden_states_img: torch.Tensor): + if attn.fused_projections: + key_img, value_img = attn.to_added_kv(encoder_hidden_states_img).chunk(2, dim=-1) + else: + key_img = attn.add_k_proj(encoder_hidden_states_img) + value_img = attn.add_v_proj(encoder_hidden_states_img) + return key_img, value_img + + +class WanAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "WanAttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to version 2.0 or higher." + ) + + def __call__( + self, + attn: "WanAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ) -> torch.Tensor: + encoder_hidden_states_img = None + if attn.add_k_proj is not None: + # 512 is the context length of the text encoder, hardcoded for now + image_context_length = encoder_hidden_states.shape[1] - 512 + encoder_hidden_states_img = encoder_hidden_states[:, :image_context_length] + encoder_hidden_states = encoder_hidden_states[:, image_context_length:] + + query, key, value = _get_qkv_projections(attn, hidden_states, encoder_hidden_states) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + if rotary_emb is not None: + + def apply_rotary_emb( + hidden_states: torch.Tensor, + freqs_cos: torch.Tensor, + freqs_sin: torch.Tensor, + ): + x1, x2 = hidden_states.unflatten(-1, (-1, 2)).unbind(-1) + cos = freqs_cos[..., 0::2] + sin = freqs_sin[..., 1::2] + out = torch.empty_like(hidden_states) + out[..., 0::2] = x1 * cos - x2 * sin + out[..., 1::2] = x1 * sin + x2 * cos + return out.type_as(hidden_states) + + query = apply_rotary_emb(query, *rotary_emb) + key = apply_rotary_emb(key, *rotary_emb) + + # I2V task + hidden_states_img = None + if encoder_hidden_states_img is not None: + key_img, value_img = _get_added_kv_projections(attn, encoder_hidden_states_img) + key_img = attn.norm_added_k(key_img) + + key_img = key_img.unflatten(2, (attn.heads, -1)) + value_img = value_img.unflatten(2, (attn.heads, -1)) + + hidden_states_img = dispatch_attention_fn( + query, + key_img, + value_img, + attn_mask=None, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + # Reference: https://github.com/huggingface/diffusers/pull/12909 + parallel_config=None, + ) + hidden_states_img = hidden_states_img.flatten(2, 3) + hidden_states_img = hidden_states_img.type_as(query) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + # Reference: https://github.com/huggingface/diffusers/pull/12909 + parallel_config=(self._parallel_config if encoder_hidden_states is None else None), + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.type_as(query) + + if hidden_states_img is not None: + hidden_states = hidden_states + hidden_states_img + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +class WanAttnProcessor2_0: + def __new__(cls, *args, **kwargs): + deprecation_message = ( + "The WanAttnProcessor2_0 class is deprecated and will be removed in a future version. " + "Please use WanAttnProcessor instead. " + ) + deprecate("WanAttnProcessor2_0", "1.0.0", deprecation_message, standard_warn=False) + return WanAttnProcessor(*args, **kwargs) + + +class WanAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = WanAttnProcessor + _available_processors = [WanAttnProcessor] + + def __init__( + self, + dim: int, + heads: int = 8, + dim_head: int = 64, + eps: float = 1e-5, + dropout: float = 0.0, + added_kv_proj_dim: int | None = None, + cross_attention_dim_head: int | None = None, + processor=None, + is_cross_attention=None, + ): + super().__init__() + + self.inner_dim = dim_head * heads + self.heads = heads + self.added_kv_proj_dim = added_kv_proj_dim + self.cross_attention_dim_head = cross_attention_dim_head + self.kv_inner_dim = self.inner_dim if cross_attention_dim_head is None else cross_attention_dim_head * heads + + self.to_q = torch.nn.Linear(dim, self.inner_dim, bias=True) + self.to_k = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_v = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_out = torch.nn.ModuleList( + [ + torch.nn.Linear(self.inner_dim, dim, bias=True), + torch.nn.Dropout(dropout), + ] + ) + self.norm_q = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + self.norm_k = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + + self.add_k_proj = self.add_v_proj = None + if added_kv_proj_dim is not None: + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.norm_added_k = torch.nn.RMSNorm(dim_head * heads, eps=eps) + + if is_cross_attention is not None: + self.is_cross_attention = is_cross_attention + else: + self.is_cross_attention = cross_attention_dim_head is not None + + self.set_processor(processor) + + def fuse_projections(self): + if getattr(self, "fused_projections", False): + return + + if not self.is_cross_attention: + concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_qkv = nn.Linear(in_features, out_features, bias=True) + self.to_qkv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + else: + concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_kv = nn.Linear(in_features, out_features, bias=True) + self.to_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + if self.added_kv_proj_dim is not None: + concatenated_weights = torch.cat([self.add_k_proj.weight.data, self.add_v_proj.weight.data]) + concatenated_bias = torch.cat([self.add_k_proj.bias.data, self.add_v_proj.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_added_kv = nn.Linear(in_features, out_features, bias=True) + self.to_added_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + self.fused_projections = True + + @torch.no_grad() + def unfuse_projections(self): + if not getattr(self, "fused_projections", False): + return + + if hasattr(self, "to_qkv"): + delattr(self, "to_qkv") + if hasattr(self, "to_kv"): + delattr(self, "to_kv") + if hasattr(self, "to_added_kv"): + delattr(self, "to_added_kv") + + self.fused_projections = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + **kwargs, + ) -> torch.Tensor: + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, rotary_emb, **kwargs) + + +class WanImageEmbedding(torch.nn.Module): + def __init__(self, in_features: int, out_features: int, pos_embed_seq_len=None): + super().__init__() + + self.norm1 = FP32LayerNorm(in_features) + self.ff = FeedForward(in_features, out_features, mult=1, activation_fn="gelu") + self.norm2 = FP32LayerNorm(out_features) + if pos_embed_seq_len is not None: + self.pos_embed = nn.Parameter(torch.zeros(1, pos_embed_seq_len, in_features)) + else: + self.pos_embed = None + + def forward(self, encoder_hidden_states_image: torch.Tensor) -> torch.Tensor: + if self.pos_embed is not None: + batch_size, seq_len, embed_dim = encoder_hidden_states_image.shape + encoder_hidden_states_image = encoder_hidden_states_image.view(-1, 2 * seq_len, embed_dim) + encoder_hidden_states_image = encoder_hidden_states_image + self.pos_embed + + hidden_states = self.norm1(encoder_hidden_states_image) + hidden_states = self.ff(hidden_states) + hidden_states = self.norm2(hidden_states) + return hidden_states + + +class WanTimeTextImageEmbedding(nn.Module): + def __init__( + self, + dim: int, + time_freq_dim: int, + time_proj_dim: int, + text_embed_dim: int, + image_embed_dim: int | None = None, + pos_embed_seq_len: int | None = None, + ): + super().__init__() + + self.timesteps_proj = Timesteps(num_channels=time_freq_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.time_embedder = TimestepEmbedding(in_channels=time_freq_dim, time_embed_dim=dim) + self.act_fn = nn.SiLU() + self.time_proj = nn.Linear(dim, time_proj_dim) + self.text_embedder = PixArtAlphaTextProjection(text_embed_dim, dim, act_fn="gelu_tanh") + + self.image_embedder = None + if image_embed_dim is not None: + self.image_embedder = WanImageEmbedding(image_embed_dim, dim, pos_embed_seq_len=pos_embed_seq_len) + + def forward( + self, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + timestep_seq_len: int | None = None, + ): + timestep = self.timesteps_proj(timestep) + if timestep_seq_len is not None: + timestep = timestep.unflatten(0, (-1, timestep_seq_len)) + + time_embedder_dtype = next(iter(self.time_embedder.parameters())).dtype + if timestep.dtype != time_embedder_dtype and time_embedder_dtype != torch.int8: + timestep = timestep.to(time_embedder_dtype) + temb = self.time_embedder(timestep).type_as(encoder_hidden_states) + timestep_proj = self.time_proj(self.act_fn(temb)) + + encoder_hidden_states = self.text_embedder(encoder_hidden_states) + if encoder_hidden_states_image is not None: + encoder_hidden_states_image = self.image_embedder(encoder_hidden_states_image) + + return temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image + + +class WanRotaryPosEmbed(nn.Module): + def __init__( + self, + attention_head_dim: int, + patch_size: tuple[int, int, int], + max_seq_len: int, + theta: float = 10000.0, + ): + super().__init__() + + self.attention_head_dim = attention_head_dim + self.patch_size = patch_size + self.max_seq_len = max_seq_len + + h_dim = w_dim = 2 * (attention_head_dim // 6) + t_dim = attention_head_dim - h_dim - w_dim + + self.t_dim = t_dim + self.h_dim = h_dim + self.w_dim = w_dim + + freqs_dtype = torch.float32 if torch.backends.mps.is_available() else torch.float64 + + freqs_cos = [] + freqs_sin = [] + + for dim in [t_dim, h_dim, w_dim]: + freq_cos, freq_sin = get_1d_rotary_pos_embed( + dim, + max_seq_len, + theta, + use_real=True, + repeat_interleave_real=True, + freqs_dtype=freqs_dtype, + ) + freqs_cos.append(freq_cos) + freqs_sin.append(freq_sin) + + self.register_buffer("freqs_cos", torch.cat(freqs_cos, dim=1), persistent=False) + self.register_buffer("freqs_sin", torch.cat(freqs_sin, dim=1), persistent=False) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.patch_size + ppf, pph, ppw = num_frames // p_t, height // p_h, width // p_w + + split_sizes = [self.t_dim, self.h_dim, self.w_dim] + + freqs_cos = self.freqs_cos.split(split_sizes, dim=1) + freqs_sin = self.freqs_sin.split(split_sizes, dim=1) + + freqs_cos_f = freqs_cos[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_h = freqs_cos[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_w = freqs_cos[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_sin_f = freqs_sin[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_h = freqs_sin[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_w = freqs_sin[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_cos = torch.cat([freqs_cos_f, freqs_cos_h, freqs_cos_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + freqs_sin = torch.cat([freqs_sin_f, freqs_sin_h, freqs_sin_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + + return freqs_cos, freqs_sin + + +@maybe_allow_in_graph +class WanTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + ffn_dim: int, + num_heads: int, + qk_norm: str = "rms_norm_across_heads", + cross_attn_norm: bool = False, + eps: float = 1e-6, + added_kv_proj_dim: int | None = None, + ): + super().__init__() + + # 1. Self-attention + self.norm1 = FP32LayerNorm(dim, eps, elementwise_affine=False) + self.attn1 = WanAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + cross_attention_dim_head=None, + processor=WanAttnProcessor(), + ) + + # 2. Cross-attention + self.attn2 = WanAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + added_kv_proj_dim=added_kv_proj_dim, + cross_attention_dim_head=dim // num_heads, + processor=WanAttnProcessor(), + ) + self.norm2 = FP32LayerNorm(dim, eps, elementwise_affine=True) if cross_attn_norm else nn.Identity() + + # 3. Feed-forward + self.ffn = FeedForward(dim, inner_dim=ffn_dim, activation_fn="gelu-approximate") + self.norm3 = FP32LayerNorm(dim, eps, elementwise_affine=False) + + self.scale_shift_table = nn.Parameter(torch.randn(1, 6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + rotary_emb: torch.Tensor, + ) -> torch.Tensor: + if temb.ndim == 4: + # temb: batch_size, seq_len, 6, inner_dim (wan2.2 ti2v) + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table.unsqueeze(0) + temb.float() + ).chunk(6, dim=2) + # batch_size, seq_len, 1, inner_dim + shift_msa = shift_msa.squeeze(2) + scale_msa = scale_msa.squeeze(2) + gate_msa = gate_msa.squeeze(2) + c_shift_msa = c_shift_msa.squeeze(2) + c_scale_msa = c_scale_msa.squeeze(2) + c_gate_msa = c_gate_msa.squeeze(2) + else: + # temb: batch_size, 6, inner_dim (wan2.1/wan2.2 14B) + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table + temb.float() + ).chunk(6, dim=1) + + # 1. Self-attention + norm_hidden_states = (self.norm1(hidden_states.float()) * (1 + scale_msa) + shift_msa).type_as(hidden_states) + attn_output = self.attn1(norm_hidden_states, None, None, rotary_emb) + hidden_states = (hidden_states.float() + attn_output * gate_msa).type_as(hidden_states) + + # 2. Cross-attention + norm_hidden_states = self.norm2(hidden_states.float()).type_as(hidden_states) + attn_output = self.attn2(norm_hidden_states, encoder_hidden_states, None, None) + hidden_states = hidden_states + attn_output + + # 3. Feed-forward + norm_hidden_states = (self.norm3(hidden_states.float()) * (1 + c_scale_msa) + c_shift_msa).type_as( + hidden_states + ) + ff_output = self.ffn(norm_hidden_states) + hidden_states = (hidden_states.float() + ff_output.float() * c_gate_msa).type_as(hidden_states) + + return hidden_states + + +class WanTransformer3DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin +): + r""" + A Transformer model for video-like data used in the Wan model. + + Args: + patch_size (`tuple[int]`, defaults to `(1, 2, 2)`): + 3D patch dimensions for video embedding (t_patch, h_patch, w_patch). + num_attention_heads (`int`, defaults to `40`): + Fixed length for text embeddings. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_dim (`int`, defaults to `512`): + Input dimension for text embeddings. + freq_dim (`int`, defaults to `256`): + Dimension for sinusoidal time embeddings. + ffn_dim (`int`, defaults to `13824`): + Intermediate dimension in feed-forward network. + num_layers (`int`, defaults to `40`): + The number of layers of transformer blocks to use. + window_size (`tuple[int]`, defaults to `(-1, -1)`): + Window size for local attention (-1 indicates global attention). + cross_attn_norm (`bool`, defaults to `True`): + Enable cross-attention normalization. + qk_norm (`bool`, defaults to `True`): + Enable query/key normalization. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + add_img_emb (`bool`, defaults to `False`): + Whether to use img_emb. + added_kv_proj_dim (`int`, *optional*, defaults to `None`): + The number of channels to use for the added key and value projections. If `None`, no projection is used. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["patch_embedding", "condition_embedder", "norm"] + _no_split_modules = ["WanTransformerBlock"] + _keep_in_fp32_modules = ["time_embedder", "scale_shift_table", "norm1", "norm2", "norm3"] + _keys_to_ignore_on_load_unexpected = ["norm_added_q"] + _repeated_blocks = ["WanTransformerBlock"] + _cp_plan = { + "rope": { + 0: ContextParallelInput(split_dim=1, expected_dims=4, split_output=True), + 1: ContextParallelInput(split_dim=1, expected_dims=4, split_output=True), + }, + "blocks.0": { + "hidden_states": ContextParallelInput(split_dim=1, expected_dims=3, split_output=False), + }, + # Reference: https://github.com/huggingface/diffusers/pull/12909 + # We need to disable the splitting of encoder_hidden_states because the image_encoder + # (Wan 2.1 I2V) consistently generates 257 tokens for image_embed. This causes the shape + # of encoder_hidden_states—whose token count is always 769 (512 + 257) after concatenation + # —to be indivisible by the number of devices in the CP. + "proj_out": ContextParallelOutput(gather_dim=1, expected_dims=3), + "": { + "timestep": ContextParallelInput(split_dim=1, expected_dims=2, split_output=False), + }, + } + + @register_to_config + def __init__( + self, + patch_size: tuple[int, ...] = (1, 2, 2), + num_attention_heads: int = 40, + attention_head_dim: int = 128, + in_channels: int = 16, + out_channels: int = 16, + text_dim: int = 4096, + freq_dim: int = 256, + ffn_dim: int = 13824, + num_layers: int = 40, + cross_attn_norm: bool = True, + qk_norm: str | None = "rms_norm_across_heads", + eps: float = 1e-6, + image_dim: int | None = None, + added_kv_proj_dim: int | None = None, + rope_max_seq_len: int = 1024, + pos_embed_seq_len: int | None = None, + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + # 1. Patch & position embedding + self.rope = WanRotaryPosEmbed(attention_head_dim, patch_size, rope_max_seq_len) + self.patch_embedding = nn.Conv3d(in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + + # 2. Condition embeddings + # image_embedding_dim=1280 for I2V model + self.condition_embedder = WanTimeTextImageEmbedding( + dim=inner_dim, + time_freq_dim=freq_dim, + time_proj_dim=inner_dim * 6, + text_embed_dim=text_dim, + image_embed_dim=image_dim, + pos_embed_seq_len=pos_embed_seq_len, + ) + + # 3. Transformer blocks + self.blocks = nn.ModuleList( + [ + WanTransformerBlock( + inner_dim, ffn_dim, num_attention_heads, qk_norm, cross_attn_norm, eps, added_kv_proj_dim + ) + for _ in range(num_layers) + ] + ) + + # 4. Output norm & projection + self.norm_out = FP32LayerNorm(inner_dim, eps, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, out_channels * math.prod(patch_size)) + self.scale_shift_table = nn.Parameter(torch.randn(1, 2, inner_dim) / inner_dim**0.5) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + return_dict: bool = True, + attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor | dict[str, torch.Tensor]: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.config.patch_size + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p_h + post_patch_width = width // p_w + + rotary_emb = self.rope(hidden_states) + + hidden_states = self.patch_embedding(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) + + # timestep shape: batch_size, or batch_size, seq_len (wan 2.2 ti2v) + if timestep.ndim == 2: + ts_seq_len = timestep.shape[1] + timestep = timestep.flatten() # batch_size * seq_len + else: + ts_seq_len = None + + temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image = self.condition_embedder( + timestep, encoder_hidden_states, encoder_hidden_states_image, timestep_seq_len=ts_seq_len + ) + if ts_seq_len is not None: + # batch_size, seq_len, 6, inner_dim + timestep_proj = timestep_proj.unflatten(2, (6, -1)) + else: + # batch_size, 6, inner_dim + timestep_proj = timestep_proj.unflatten(1, (6, -1)) + + if encoder_hidden_states_image is not None: + encoder_hidden_states = torch.concat([encoder_hidden_states_image, encoder_hidden_states], dim=1) + + # 4. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + for block in self.blocks: + hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, timestep_proj, rotary_emb + ) + else: + for block in self.blocks: + hidden_states = block(hidden_states, encoder_hidden_states, timestep_proj, rotary_emb) + + # 5. Output norm, projection & unpatchify + if temb.ndim == 3: + # batch_size, seq_len, inner_dim (wan 2.2 ti2v) + shift, scale = (self.scale_shift_table.unsqueeze(0).to(temb.device) + temb.unsqueeze(2)).chunk(2, dim=2) + shift = shift.squeeze(2) + scale = scale.squeeze(2) + else: + # batch_size, inner_dim + shift, scale = (self.scale_shift_table.to(temb.device) + temb.unsqueeze(1)).chunk(2, dim=1) + + # Move the shift and scale tensors to the same device as hidden_states. + # When using multi-GPU inference via accelerate these will be on the + # first device rather than the last device, which hidden_states ends up + # on. + shift = shift.to(hidden_states.device) + scale = scale.to(hidden_states.device) + + hidden_states = (self.norm_out(hidden_states.float()) * (1 + scale) + shift).type_as(hidden_states) + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, p_t, p_h, p_w, -1 + ) + hidden_states = hidden_states.permute(0, 7, 1, 4, 2, 5, 3, 6) + output = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan_animate.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan_animate.py new file mode 100644 index 0000000000000000000000000000000000000000..b5b15fe060994204569d55d705f2686884dcbaa8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan_animate.py @@ -0,0 +1,1292 @@ +# Copyright 2025 The Wan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward +from ..attention_dispatch import dispatch_attention_fn +from ..cache_utils import CacheMixin +from ..embeddings import PixArtAlphaTextProjection, TimestepEmbedding, Timesteps, get_1d_rotary_pos_embed +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import FP32LayerNorm + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +WAN_ANIMATE_MOTION_ENCODER_CHANNEL_SIZES = { + "4": 512, + "8": 512, + "16": 512, + "32": 512, + "64": 256, + "128": 128, + "256": 64, + "512": 32, + "1024": 16, +} + + +# Copied from diffusers.models.transformers.transformer_wan._get_qkv_projections +def _get_qkv_projections(attn: "WanAttention", hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor): + # encoder_hidden_states is only passed for cross-attention + if encoder_hidden_states is None: + encoder_hidden_states = hidden_states + + if attn.fused_projections: + if not attn.is_cross_attention: + # In self-attention layers, we can fuse the entire QKV projection into a single linear + query, key, value = attn.to_qkv(hidden_states).chunk(3, dim=-1) + else: + # In cross-attention layers, we can only fuse the KV projections into a single linear + query = attn.to_q(hidden_states) + key, value = attn.to_kv(encoder_hidden_states).chunk(2, dim=-1) + else: + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + return query, key, value + + +# Copied from diffusers.models.transformers.transformer_wan._get_added_kv_projections +def _get_added_kv_projections(attn: "WanAttention", encoder_hidden_states_img: torch.Tensor): + if attn.fused_projections: + key_img, value_img = attn.to_added_kv(encoder_hidden_states_img).chunk(2, dim=-1) + else: + key_img = attn.add_k_proj(encoder_hidden_states_img) + value_img = attn.add_v_proj(encoder_hidden_states_img) + return key_img, value_img + + +class FusedLeakyReLU(nn.Module): + """ + Fused LeakyRelu with scale factor and channel-wise bias. + """ + + def __init__(self, negative_slope: float = 0.2, scale: float = 2**0.5, bias_channels: int | None = None): + super().__init__() + self.negative_slope = negative_slope + self.scale = scale + self.channels = bias_channels + + if self.channels is not None: + self.bias = nn.Parameter( + torch.zeros( + self.channels, + ) + ) + else: + self.bias = None + + def forward(self, x: torch.Tensor, channel_dim: int = 1) -> torch.Tensor: + if self.bias is not None: + # Expand self.bias to have all singleton dims except at self.channel_dim + expanded_shape = [1] * x.ndim + expanded_shape[channel_dim] = self.bias.shape[0] + bias = self.bias.reshape(*expanded_shape) + x = x + bias + return F.leaky_relu(x, self.negative_slope) * self.scale + + +class MotionConv2d(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + bias: bool = True, + blur_kernel: tuple[int, ...] | None = None, + blur_upsample_factor: int = 1, + use_activation: bool = True, + ): + super().__init__() + self.use_activation = use_activation + self.in_channels = in_channels + + # Handle blurring (applying a FIR filter with the given kernel) if available + self.blur = False + if blur_kernel is not None: + p = (len(blur_kernel) - stride) + (kernel_size - 1) + self.blur_padding = ((p + 1) // 2, p // 2) + + kernel = torch.tensor(blur_kernel) + # Convert kernel to 2D if necessary + if kernel.ndim == 1: + kernel = kernel[None, :] * kernel[:, None] + # Normalize kernel + kernel = kernel / kernel.sum() + if blur_upsample_factor > 1: + kernel = kernel * (blur_upsample_factor**2) + self.register_buffer("blur_kernel", kernel, persistent=False) + self.blur = True + + # Main Conv2d parameters (with scale factor) + self.weight = nn.Parameter(torch.randn(out_channels, in_channels, kernel_size, kernel_size)) + self.scale = 1 / math.sqrt(in_channels * kernel_size**2) + + self.stride = stride + self.padding = padding + + # If using an activation function, the bias will be fused into the activation + if bias and not self.use_activation: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + + if self.use_activation: + self.act_fn = FusedLeakyReLU(bias_channels=out_channels) + else: + self.act_fn = None + + def forward(self, x: torch.Tensor, channel_dim: int = 1) -> torch.Tensor: + # Apply blur if using + if self.blur: + # NOTE: the original implementation uses a 2D upfirdn operation with the upsampling and downsampling rates + # set to 1, which should be equivalent to a 2D convolution + expanded_kernel = self.blur_kernel[None, None, :, :].expand(self.in_channels, 1, -1, -1) + x = x.to(expanded_kernel.dtype) + x = F.conv2d(x, expanded_kernel, padding=self.blur_padding, groups=self.in_channels) + + # Main Conv2D with scaling + x = x.to(self.weight.dtype) + x = F.conv2d(x, self.weight * self.scale, bias=self.bias, stride=self.stride, padding=self.padding) + + # Activation with fused bias, if using + if self.use_activation: + x = self.act_fn(x, channel_dim=channel_dim) + return x + + def __repr__(self): + return ( + f"{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]}," + f" kernel_size={self.weight.shape[2]}, stride={self.stride}, padding={self.padding})" + ) + + +class MotionLinear(nn.Module): + def __init__( + self, + in_dim: int, + out_dim: int, + bias: bool = True, + use_activation: bool = False, + ): + super().__init__() + self.use_activation = use_activation + + # Linear weight with scale factor + self.weight = nn.Parameter(torch.randn(out_dim, in_dim)) + self.scale = 1 / math.sqrt(in_dim) + + # If an activation is present, the bias will be fused to it + if bias and not self.use_activation: + self.bias = nn.Parameter(torch.zeros(out_dim)) + else: + self.bias = None + + if self.use_activation: + self.act_fn = FusedLeakyReLU(bias_channels=out_dim) + else: + self.act_fn = None + + def forward(self, input: torch.Tensor, channel_dim: int = 1) -> torch.Tensor: + out = F.linear(input, self.weight * self.scale, bias=self.bias) + if self.use_activation: + out = self.act_fn(out, channel_dim=channel_dim) + return out + + def __repr__(self): + return ( + f"{self.__class__.__name__}(in_features={self.weight.shape[1]}, out_features={self.weight.shape[0]}," + f" bias={self.bias is not None})" + ) + + +class MotionEncoderResBlock(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + kernel_size_skip: int = 1, + blur_kernel: tuple[int, ...] = (1, 3, 3, 1), + downsample_factor: int = 2, + ): + super().__init__() + self.downsample_factor = downsample_factor + + # 3 x 3 Conv + fused leaky ReLU + self.conv1 = MotionConv2d( + in_channels, + in_channels, + kernel_size, + stride=1, + padding=kernel_size // 2, + use_activation=True, + ) + + # 3 x 3 Conv that downsamples 2x + fused leaky ReLU + self.conv2 = MotionConv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=self.downsample_factor, + padding=0, + blur_kernel=blur_kernel, + use_activation=True, + ) + + # 1 x 1 Conv that downsamples 2x in skip connection + self.conv_skip = MotionConv2d( + in_channels, + out_channels, + kernel_size=kernel_size_skip, + stride=self.downsample_factor, + padding=0, + bias=False, + blur_kernel=blur_kernel, + use_activation=False, + ) + + def forward(self, x: torch.Tensor, channel_dim: int = 1) -> torch.Tensor: + x_out = self.conv1(x, channel_dim) + x_out = self.conv2(x_out, channel_dim) + + x_skip = self.conv_skip(x, channel_dim) + + x_out = (x_out + x_skip) / math.sqrt(2) + return x_out + + +class WanAnimateMotionEncoder(nn.Module): + def __init__( + self, + size: int = 512, + style_dim: int = 512, + motion_dim: int = 20, + out_dim: int = 512, + motion_blocks: int = 5, + channels: dict[str, int] | None = None, + ): + super().__init__() + self.size = size + + # Appearance encoder: conv layers + if channels is None: + channels = WAN_ANIMATE_MOTION_ENCODER_CHANNEL_SIZES + + self.conv_in = MotionConv2d(3, channels[str(size)], 1, use_activation=True) + + self.res_blocks = nn.ModuleList() + in_channels = channels[str(size)] + log_size = int(math.log(size, 2)) + for i in range(log_size, 2, -1): + out_channels = channels[str(2 ** (i - 1))] + self.res_blocks.append(MotionEncoderResBlock(in_channels, out_channels)) + in_channels = out_channels + + self.conv_out = MotionConv2d(in_channels, style_dim, 4, padding=0, bias=False, use_activation=False) + + # Motion encoder: linear layers + # NOTE: there are no activations in between the linear layers here, which is weird but I believe matches the + # original code. + linears = [MotionLinear(style_dim, style_dim) for _ in range(motion_blocks - 1)] + linears.append(MotionLinear(style_dim, motion_dim)) + self.motion_network = nn.ModuleList(linears) + + self.motion_synthesis_weight = nn.Parameter(torch.randn(out_dim, motion_dim)) + + def forward(self, face_image: torch.Tensor, channel_dim: int = 1) -> torch.Tensor: + if (face_image.shape[-2] != self.size) or (face_image.shape[-1] != self.size): + raise ValueError( + f"Face pixel values has resolution ({face_image.shape[-1]}, {face_image.shape[-2]}) but is expected" + f" to have resolution ({self.size}, {self.size})" + ) + + # Appearance encoding through convs + face_image = self.conv_in(face_image, channel_dim) + for block in self.res_blocks: + face_image = block(face_image, channel_dim) + face_image = self.conv_out(face_image, channel_dim) + motion_feat = face_image.squeeze(-1).squeeze(-1) + + # Motion feature extraction + for linear_layer in self.motion_network: + motion_feat = linear_layer(motion_feat, channel_dim=channel_dim) + + # Motion synthesis via Linear Motion Decomposition + weight = self.motion_synthesis_weight + 1e-8 + # Upcast the QR orthogonalization operation to FP32 + original_motion_dtype = motion_feat.dtype + motion_feat = motion_feat.to(torch.float32) + weight = weight.to(torch.float32) + + Q = torch.linalg.qr(weight)[0].to(device=motion_feat.device) + + motion_feat_diag = torch.diag_embed(motion_feat) # Alpha, diagonal matrix + motion_decomposition = torch.matmul(motion_feat_diag, Q.T) + motion_vec = torch.sum(motion_decomposition, dim=1) + + motion_vec = motion_vec.to(dtype=original_motion_dtype) + + return motion_vec + + +class WanAnimateFaceEncoder(nn.Module): + def __init__( + self, + in_dim: int, + out_dim: int, + hidden_dim: int = 1024, + num_heads: int = 4, + kernel_size: int = 3, + eps: float = 1e-6, + pad_mode: str = "replicate", + ): + super().__init__() + self.num_heads = num_heads + self.time_causal_padding = (kernel_size - 1, 0) + self.pad_mode = pad_mode + + self.act = nn.SiLU() + + self.conv1_local = nn.Conv1d(in_dim, hidden_dim * num_heads, kernel_size=kernel_size, stride=1) + self.conv2 = nn.Conv1d(hidden_dim, hidden_dim, kernel_size, stride=2) + self.conv3 = nn.Conv1d(hidden_dim, hidden_dim, kernel_size, stride=2) + + self.norm1 = nn.LayerNorm(hidden_dim, eps, elementwise_affine=False) + self.norm2 = nn.LayerNorm(hidden_dim, eps, elementwise_affine=False) + self.norm3 = nn.LayerNorm(hidden_dim, eps, elementwise_affine=False) + + self.out_proj = nn.Linear(hidden_dim, out_dim) + + self.padding_tokens = nn.Parameter(torch.zeros(1, 1, 1, out_dim)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + batch_size = x.shape[0] + + # Reshape to channels-first to apply causal Conv1d over frame dim + x = x.permute(0, 2, 1) + x = F.pad(x, self.time_causal_padding, mode=self.pad_mode) + x = self.conv1_local(x) # [B, C, T_padded] --> [B, N * C, T] + x = x.unflatten(1, (self.num_heads, -1)).flatten(0, 1) # [B, N * C, T] --> [B * N, C, T] + # Reshape back to channels-last to apply LayerNorm over channel dim + x = x.permute(0, 2, 1) + x = self.norm1(x) + x = self.act(x) + + x = x.permute(0, 2, 1) + x = F.pad(x, self.time_causal_padding, mode=self.pad_mode) + x = self.conv2(x) + x = x.permute(0, 2, 1) + x = self.norm2(x) + x = self.act(x) + + x = x.permute(0, 2, 1) + x = F.pad(x, self.time_causal_padding, mode=self.pad_mode) + x = self.conv3(x) + x = x.permute(0, 2, 1) + x = self.norm3(x) + x = self.act(x) + + x = self.out_proj(x) + x = x.unflatten(0, (batch_size, -1)).permute(0, 2, 1, 3) # [B * N, T, C_out] --> [B, T, N, C_out] + + padding = self.padding_tokens.repeat(batch_size, x.shape[1], 1, 1).to(device=x.device) + x = torch.cat([x, padding], dim=-2) # [B, T, N, C_out] --> [B, T, N + 1, C_out] + + return x + + +class WanAnimateFaceBlockAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + f"{self.__class__.__name__} requires PyTorch 2.0. To use it, please upgrade PyTorch to version 2.0 or" + f" higher." + ) + + def __call__( + self, + attn: "WanAnimateFaceBlockCrossAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + ) -> torch.Tensor: + # encoder_hidden_states corresponds to the motion vec + # attention_mask corresponds to the motion mask (if any) + hidden_states = attn.pre_norm_q(hidden_states) + encoder_hidden_states = attn.pre_norm_kv(encoder_hidden_states) + + # B --> batch_size, T --> reduced inference segment len, N --> face_encoder_num_heads + 1, C --> attn.dim + B, T, N, C = encoder_hidden_states.shape + + query, key, value = _get_qkv_projections(attn, hidden_states, encoder_hidden_states) + + query = query.unflatten(2, (attn.heads, -1)) # [B, S, H * D] --> [B, S, H, D] + key = key.view(B, T, N, attn.heads, -1) # [B, T, N, H * D_kv] --> [B, T, N, H, D_kv] + value = value.view(B, T, N, attn.heads, -1) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + # NOTE: the below line (which follows the official code) means that in practice, the number of frames T in + # encoder_hidden_states (the motion vector after applying the face encoder) must evenly divide the + # post-patchify sequence length S of the transformer hidden_states. Is it possible to remove this dependency? + query = query.unflatten(1, (T, -1)).flatten(0, 1) # [B, S, H, D] --> [B * T, S / T, H, D] + key = key.flatten(0, 1) # [B, T, N, H, D_kv] --> [B * T, N, H, D_kv] + value = value.flatten(0, 1) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=None, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.type_as(query) + hidden_states = hidden_states.unflatten(0, (B, T)).flatten(1, 2) + + hidden_states = attn.to_out(hidden_states) + + if attention_mask is not None: + # NOTE: attention_mask is assumed to be a multiplicative mask + attention_mask = attention_mask.flatten(start_dim=1) + hidden_states = hidden_states * attention_mask + + return hidden_states + + +class WanAnimateFaceBlockCrossAttention(nn.Module, AttentionModuleMixin): + """ + Temporally-aligned cross attention with the face motion signal in the Wan Animate Face Blocks. + """ + + _default_processor_cls = WanAnimateFaceBlockAttnProcessor + _available_processors = [WanAnimateFaceBlockAttnProcessor] + + def __init__( + self, + dim: int, + heads: int = 8, + dim_head: int = 64, + eps: float = 1e-6, + cross_attention_dim_head: int | None = None, + bias: bool = True, + processor=None, + ): + super().__init__() + self.inner_dim = dim_head * heads + self.heads = heads + self.cross_attention_dim_head = cross_attention_dim_head + self.kv_inner_dim = self.inner_dim if cross_attention_dim_head is None else cross_attention_dim_head * heads + self.use_bias = bias + self.is_cross_attention = cross_attention_dim_head is not None + + # 1. Pre-Attention Norms for the hidden_states (video latents) and encoder_hidden_states (motion vector). + # NOTE: this is not used in "vanilla" WanAttention + self.pre_norm_q = nn.LayerNorm(dim, eps, elementwise_affine=False) + self.pre_norm_kv = nn.LayerNorm(dim, eps, elementwise_affine=False) + + # 2. QKV and Output Projections + self.to_q = torch.nn.Linear(dim, self.inner_dim, bias=bias) + self.to_k = torch.nn.Linear(dim, self.kv_inner_dim, bias=bias) + self.to_v = torch.nn.Linear(dim, self.kv_inner_dim, bias=bias) + self.to_out = torch.nn.Linear(self.inner_dim, dim, bias=bias) + + # 3. QK Norm + # NOTE: this is applied after the reshape, so only over dim_head rather than dim_head * heads + self.norm_q = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=True) + self.norm_k = torch.nn.RMSNorm(dim_head, eps=eps, elementwise_affine=True) + + # 4. Set attention processor + if processor is None: + processor = self._default_processor_cls() + self.set_processor(processor) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask) + + +# Copied from diffusers.models.transformers.transformer_wan.WanAttnProcessor +class WanAttnProcessor: + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "WanAttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to version 2.0 or higher." + ) + + def __call__( + self, + attn: "WanAttention", + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + ) -> torch.Tensor: + encoder_hidden_states_img = None + if attn.add_k_proj is not None: + # 512 is the context length of the text encoder, hardcoded for now + image_context_length = encoder_hidden_states.shape[1] - 512 + encoder_hidden_states_img = encoder_hidden_states[:, :image_context_length] + encoder_hidden_states = encoder_hidden_states[:, image_context_length:] + + query, key, value = _get_qkv_projections(attn, hidden_states, encoder_hidden_states) + + query = attn.norm_q(query) + key = attn.norm_k(key) + + query = query.unflatten(2, (attn.heads, -1)) + key = key.unflatten(2, (attn.heads, -1)) + value = value.unflatten(2, (attn.heads, -1)) + + if rotary_emb is not None: + + def apply_rotary_emb( + hidden_states: torch.Tensor, + freqs_cos: torch.Tensor, + freqs_sin: torch.Tensor, + ): + x1, x2 = hidden_states.unflatten(-1, (-1, 2)).unbind(-1) + cos = freqs_cos[..., 0::2] + sin = freqs_sin[..., 1::2] + out = torch.empty_like(hidden_states) + out[..., 0::2] = x1 * cos - x2 * sin + out[..., 1::2] = x1 * sin + x2 * cos + return out.type_as(hidden_states) + + query = apply_rotary_emb(query, *rotary_emb) + key = apply_rotary_emb(key, *rotary_emb) + + # I2V task + hidden_states_img = None + if encoder_hidden_states_img is not None: + key_img, value_img = _get_added_kv_projections(attn, encoder_hidden_states_img) + key_img = attn.norm_added_k(key_img) + + key_img = key_img.unflatten(2, (attn.heads, -1)) + value_img = value_img.unflatten(2, (attn.heads, -1)) + + hidden_states_img = dispatch_attention_fn( + query, + key_img, + value_img, + attn_mask=None, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + # Reference: https://github.com/huggingface/diffusers/pull/12909 + parallel_config=None, + ) + hidden_states_img = hidden_states_img.flatten(2, 3) + hidden_states_img = hidden_states_img.type_as(query) + + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + # Reference: https://github.com/huggingface/diffusers/pull/12909 + parallel_config=(self._parallel_config if encoder_hidden_states is None else None), + ) + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.type_as(query) + + if hidden_states_img is not None: + hidden_states = hidden_states + hidden_states_img + + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + return hidden_states + + +# Copied from diffusers.models.transformers.transformer_wan.WanAttention +class WanAttention(torch.nn.Module, AttentionModuleMixin): + _default_processor_cls = WanAttnProcessor + _available_processors = [WanAttnProcessor] + + def __init__( + self, + dim: int, + heads: int = 8, + dim_head: int = 64, + eps: float = 1e-5, + dropout: float = 0.0, + added_kv_proj_dim: int | None = None, + cross_attention_dim_head: int | None = None, + processor=None, + is_cross_attention=None, + ): + super().__init__() + + self.inner_dim = dim_head * heads + self.heads = heads + self.added_kv_proj_dim = added_kv_proj_dim + self.cross_attention_dim_head = cross_attention_dim_head + self.kv_inner_dim = self.inner_dim if cross_attention_dim_head is None else cross_attention_dim_head * heads + + self.to_q = torch.nn.Linear(dim, self.inner_dim, bias=True) + self.to_k = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_v = torch.nn.Linear(dim, self.kv_inner_dim, bias=True) + self.to_out = torch.nn.ModuleList( + [ + torch.nn.Linear(self.inner_dim, dim, bias=True), + torch.nn.Dropout(dropout), + ] + ) + self.norm_q = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + self.norm_k = torch.nn.RMSNorm(dim_head * heads, eps=eps, elementwise_affine=True) + + self.add_k_proj = self.add_v_proj = None + if added_kv_proj_dim is not None: + self.add_k_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True) + self.norm_added_k = torch.nn.RMSNorm(dim_head * heads, eps=eps) + + if is_cross_attention is not None: + self.is_cross_attention = is_cross_attention + else: + self.is_cross_attention = cross_attention_dim_head is not None + + self.set_processor(processor) + + def fuse_projections(self): + if getattr(self, "fused_projections", False): + return + + if not self.is_cross_attention: + concatenated_weights = torch.cat([self.to_q.weight.data, self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_q.bias.data, self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_qkv = nn.Linear(in_features, out_features, bias=True) + self.to_qkv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + else: + concatenated_weights = torch.cat([self.to_k.weight.data, self.to_v.weight.data]) + concatenated_bias = torch.cat([self.to_k.bias.data, self.to_v.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_kv = nn.Linear(in_features, out_features, bias=True) + self.to_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + if self.added_kv_proj_dim is not None: + concatenated_weights = torch.cat([self.add_k_proj.weight.data, self.add_v_proj.weight.data]) + concatenated_bias = torch.cat([self.add_k_proj.bias.data, self.add_v_proj.bias.data]) + out_features, in_features = concatenated_weights.shape + with torch.device("meta"): + self.to_added_kv = nn.Linear(in_features, out_features, bias=True) + self.to_added_kv.load_state_dict( + {"weight": concatenated_weights, "bias": concatenated_bias}, strict=True, assign=True + ) + + self.fused_projections = True + + @torch.no_grad() + def unfuse_projections(self): + if not getattr(self, "fused_projections", False): + return + + if hasattr(self, "to_qkv"): + delattr(self, "to_qkv") + if hasattr(self, "to_kv"): + delattr(self, "to_kv") + if hasattr(self, "to_added_kv"): + delattr(self, "to_added_kv") + + self.fused_projections = False + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + **kwargs, + ) -> torch.Tensor: + return self.processor(self, hidden_states, encoder_hidden_states, attention_mask, rotary_emb, **kwargs) + + +# Copied from diffusers.models.transformers.transformer_wan.WanImageEmbedding +class WanImageEmbedding(torch.nn.Module): + def __init__(self, in_features: int, out_features: int, pos_embed_seq_len=None): + super().__init__() + + self.norm1 = FP32LayerNorm(in_features) + self.ff = FeedForward(in_features, out_features, mult=1, activation_fn="gelu") + self.norm2 = FP32LayerNorm(out_features) + if pos_embed_seq_len is not None: + self.pos_embed = nn.Parameter(torch.zeros(1, pos_embed_seq_len, in_features)) + else: + self.pos_embed = None + + def forward(self, encoder_hidden_states_image: torch.Tensor) -> torch.Tensor: + if self.pos_embed is not None: + batch_size, seq_len, embed_dim = encoder_hidden_states_image.shape + encoder_hidden_states_image = encoder_hidden_states_image.view(-1, 2 * seq_len, embed_dim) + encoder_hidden_states_image = encoder_hidden_states_image + self.pos_embed + + hidden_states = self.norm1(encoder_hidden_states_image) + hidden_states = self.ff(hidden_states) + hidden_states = self.norm2(hidden_states) + return hidden_states + + +# Modified from diffusers.models.transformers.transformer_wan.WanTimeTextImageEmbedding +class WanTimeTextImageEmbedding(nn.Module): + def __init__( + self, + dim: int, + time_freq_dim: int, + time_proj_dim: int, + text_embed_dim: int, + image_embed_dim: int | None = None, + pos_embed_seq_len: int | None = None, + ): + super().__init__() + + self.timesteps_proj = Timesteps(num_channels=time_freq_dim, flip_sin_to_cos=True, downscale_freq_shift=0) + self.time_embedder = TimestepEmbedding(in_channels=time_freq_dim, time_embed_dim=dim) + self.act_fn = nn.SiLU() + self.time_proj = nn.Linear(dim, time_proj_dim) + self.text_embedder = PixArtAlphaTextProjection(text_embed_dim, dim, act_fn="gelu_tanh") + + self.image_embedder = None + if image_embed_dim is not None: + self.image_embedder = WanImageEmbedding(image_embed_dim, dim, pos_embed_seq_len=pos_embed_seq_len) + + def forward( + self, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + timestep_seq_len: int | None = None, + ): + timestep = self.timesteps_proj(timestep) + if timestep_seq_len is not None: + timestep = timestep.unflatten(0, (-1, timestep_seq_len)) + + if self.time_embedder.linear_1.weight.dtype.is_floating_point: + time_embedder_dtype = self.time_embedder.linear_1.weight.dtype + else: + time_embedder_dtype = encoder_hidden_states.dtype + + temb = self.time_embedder(timestep.to(time_embedder_dtype)).type_as(encoder_hidden_states) + timestep_proj = self.time_proj(self.act_fn(temb)) + + encoder_hidden_states = self.text_embedder(encoder_hidden_states) + if encoder_hidden_states_image is not None: + encoder_hidden_states_image = self.image_embedder(encoder_hidden_states_image) + + return temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image + + +# Copied from diffusers.models.transformers.transformer_wan.WanRotaryPosEmbed +class WanRotaryPosEmbed(nn.Module): + def __init__( + self, + attention_head_dim: int, + patch_size: tuple[int, int, int], + max_seq_len: int, + theta: float = 10000.0, + ): + super().__init__() + + self.attention_head_dim = attention_head_dim + self.patch_size = patch_size + self.max_seq_len = max_seq_len + + h_dim = w_dim = 2 * (attention_head_dim // 6) + t_dim = attention_head_dim - h_dim - w_dim + + self.t_dim = t_dim + self.h_dim = h_dim + self.w_dim = w_dim + + freqs_dtype = torch.float32 if torch.backends.mps.is_available() else torch.float64 + + freqs_cos = [] + freqs_sin = [] + + for dim in [t_dim, h_dim, w_dim]: + freq_cos, freq_sin = get_1d_rotary_pos_embed( + dim, + max_seq_len, + theta, + use_real=True, + repeat_interleave_real=True, + freqs_dtype=freqs_dtype, + ) + freqs_cos.append(freq_cos) + freqs_sin.append(freq_sin) + + self.register_buffer("freqs_cos", torch.cat(freqs_cos, dim=1), persistent=False) + self.register_buffer("freqs_sin", torch.cat(freqs_sin, dim=1), persistent=False) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.patch_size + ppf, pph, ppw = num_frames // p_t, height // p_h, width // p_w + + split_sizes = [self.t_dim, self.h_dim, self.w_dim] + + freqs_cos = self.freqs_cos.split(split_sizes, dim=1) + freqs_sin = self.freqs_sin.split(split_sizes, dim=1) + + freqs_cos_f = freqs_cos[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_h = freqs_cos[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_cos_w = freqs_cos[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_sin_f = freqs_sin[0][:ppf].view(ppf, 1, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_h = freqs_sin[1][:pph].view(1, pph, 1, -1).expand(ppf, pph, ppw, -1) + freqs_sin_w = freqs_sin[2][:ppw].view(1, 1, ppw, -1).expand(ppf, pph, ppw, -1) + + freqs_cos = torch.cat([freqs_cos_f, freqs_cos_h, freqs_cos_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + freqs_sin = torch.cat([freqs_sin_f, freqs_sin_h, freqs_sin_w], dim=-1).reshape(1, ppf * pph * ppw, 1, -1) + + return freqs_cos, freqs_sin + + +# Copied from diffusers.models.transformers.transformer_wan.WanTransformerBlock +class WanTransformerBlock(nn.Module): + def __init__( + self, + dim: int, + ffn_dim: int, + num_heads: int, + qk_norm: str = "rms_norm_across_heads", + cross_attn_norm: bool = False, + eps: float = 1e-6, + added_kv_proj_dim: int | None = None, + ): + super().__init__() + + # 1. Self-attention + self.norm1 = FP32LayerNorm(dim, eps, elementwise_affine=False) + self.attn1 = WanAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + cross_attention_dim_head=None, + processor=WanAttnProcessor(), + ) + + # 2. Cross-attention + self.attn2 = WanAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + added_kv_proj_dim=added_kv_proj_dim, + cross_attention_dim_head=dim // num_heads, + processor=WanAttnProcessor(), + ) + self.norm2 = FP32LayerNorm(dim, eps, elementwise_affine=True) if cross_attn_norm else nn.Identity() + + # 3. Feed-forward + self.ffn = FeedForward(dim, inner_dim=ffn_dim, activation_fn="gelu-approximate") + self.norm3 = FP32LayerNorm(dim, eps, elementwise_affine=False) + + self.scale_shift_table = nn.Parameter(torch.randn(1, 6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + temb: torch.Tensor, + rotary_emb: torch.Tensor, + ) -> torch.Tensor: + if temb.ndim == 4: + # temb: batch_size, seq_len, 6, inner_dim (wan2.2 ti2v) + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table.unsqueeze(0) + temb.float() + ).chunk(6, dim=2) + # batch_size, seq_len, 1, inner_dim + shift_msa = shift_msa.squeeze(2) + scale_msa = scale_msa.squeeze(2) + gate_msa = gate_msa.squeeze(2) + c_shift_msa = c_shift_msa.squeeze(2) + c_scale_msa = c_scale_msa.squeeze(2) + c_gate_msa = c_gate_msa.squeeze(2) + else: + # temb: batch_size, 6, inner_dim (wan2.1/wan2.2 14B) + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table + temb.float() + ).chunk(6, dim=1) + + # 1. Self-attention + norm_hidden_states = (self.norm1(hidden_states.float()) * (1 + scale_msa) + shift_msa).type_as(hidden_states) + attn_output = self.attn1(norm_hidden_states, None, None, rotary_emb) + hidden_states = (hidden_states.float() + attn_output * gate_msa).type_as(hidden_states) + + # 2. Cross-attention + norm_hidden_states = self.norm2(hidden_states.float()).type_as(hidden_states) + attn_output = self.attn2(norm_hidden_states, encoder_hidden_states, None, None) + hidden_states = hidden_states + attn_output + + # 3. Feed-forward + norm_hidden_states = (self.norm3(hidden_states.float()) * (1 + c_scale_msa) + c_shift_msa).type_as( + hidden_states + ) + ff_output = self.ffn(norm_hidden_states) + hidden_states = (hidden_states.float() + ff_output.float() * c_gate_msa).type_as(hidden_states) + + return hidden_states + + +class WanAnimateTransformer3DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin +): + r""" + A Transformer model for video-like data used in the WanAnimate model. + + Args: + patch_size (`tuple[int]`, defaults to `(1, 2, 2)`): + 3D patch dimensions for video embedding (t_patch, h_patch, w_patch). + num_attention_heads (`int`, defaults to `40`): + Fixed length for text embeddings. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_dim (`int`, defaults to `512`): + Input dimension for text embeddings. + freq_dim (`int`, defaults to `256`): + Dimension for sinusoidal time embeddings. + ffn_dim (`int`, defaults to `13824`): + Intermediate dimension in feed-forward network. + num_layers (`int`, defaults to `40`): + The number of layers of transformer blocks to use. + window_size (`tuple[int]`, defaults to `(-1, -1)`): + Window size for local attention (-1 indicates global attention). + cross_attn_norm (`bool`, defaults to `True`): + Enable cross-attention normalization. + qk_norm (`bool`, defaults to `True`): + Enable query/key normalization. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + image_dim (`int`, *optional*, defaults to `1280`): + The number of channels to use for the image embedding. If `None`, no projection is used. + added_kv_proj_dim (`int`, *optional*, defaults to `5120`): + The number of channels to use for the added key and value projections. If `None`, no projection is used. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["patch_embedding", "condition_embedder", "norm"] + _no_split_modules = ["WanTransformerBlock", "MotionEncoderResBlock"] + _keep_in_fp32_modules = [ + "time_embedder", + "scale_shift_table", + "norm1", + "norm2", + "norm3", + "motion_synthesis_weight", + ] + _keys_to_ignore_on_load_unexpected = ["norm_added_q"] + _repeated_blocks = ["WanTransformerBlock"] + + @register_to_config + def __init__( + self, + patch_size: tuple[int] = (1, 2, 2), + num_attention_heads: int = 40, + attention_head_dim: int = 128, + in_channels: int | None = 36, + latent_channels: int | None = 16, + out_channels: int | None = 16, + text_dim: int = 4096, + freq_dim: int = 256, + ffn_dim: int = 13824, + num_layers: int = 40, + cross_attn_norm: bool = True, + qk_norm: str | None = "rms_norm_across_heads", + eps: float = 1e-6, + image_dim: int | None = 1280, + added_kv_proj_dim: int | None = None, + rope_max_seq_len: int = 1024, + pos_embed_seq_len: int | None = None, + motion_encoder_channel_sizes: dict[str, int] | None = None, # Start of Wan Animate-specific args + motion_encoder_size: int = 512, + motion_style_dim: int = 512, + motion_dim: int = 20, + motion_encoder_dim: int = 512, + face_encoder_hidden_dim: int = 1024, + face_encoder_num_heads: int = 4, + inject_face_latents_blocks: int = 5, + motion_encoder_batch_size: int = 8, + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + # Allow either only in_channels or only latent_channels to be set for convenience + if in_channels is None and latent_channels is not None: + in_channels = 2 * latent_channels + 4 + elif in_channels is not None and latent_channels is None: + latent_channels = (in_channels - 4) // 2 + elif in_channels is not None and latent_channels is not None: + # TODO: should this always be true? + assert in_channels == 2 * latent_channels + 4, "in_channels should be 2 * latent_channels + 4" + else: + raise ValueError("At least one of `in_channels` and `latent_channels` must be supplied.") + out_channels = out_channels or latent_channels + + # 1. Patch & position embedding + self.rope = WanRotaryPosEmbed(attention_head_dim, patch_size, rope_max_seq_len) + self.patch_embedding = nn.Conv3d(in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + self.pose_patch_embedding = nn.Conv3d(latent_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + + # 2. Condition embeddings + self.condition_embedder = WanTimeTextImageEmbedding( + dim=inner_dim, + time_freq_dim=freq_dim, + time_proj_dim=inner_dim * 6, + text_embed_dim=text_dim, + image_embed_dim=image_dim, + pos_embed_seq_len=pos_embed_seq_len, + ) + + # Motion encoder + self.motion_encoder = WanAnimateMotionEncoder( + size=motion_encoder_size, + style_dim=motion_style_dim, + motion_dim=motion_dim, + out_dim=motion_encoder_dim, + channels=motion_encoder_channel_sizes, + ) + + # Face encoder + self.face_encoder = WanAnimateFaceEncoder( + in_dim=motion_encoder_dim, + out_dim=inner_dim, + hidden_dim=face_encoder_hidden_dim, + num_heads=face_encoder_num_heads, + ) + + # 3. Transformer blocks + self.blocks = nn.ModuleList( + [ + WanTransformerBlock( + dim=inner_dim, + ffn_dim=ffn_dim, + num_heads=num_attention_heads, + qk_norm=qk_norm, + cross_attn_norm=cross_attn_norm, + eps=eps, + added_kv_proj_dim=added_kv_proj_dim, + ) + for _ in range(num_layers) + ] + ) + + self.face_adapter = nn.ModuleList( + [ + WanAnimateFaceBlockCrossAttention( + dim=inner_dim, + heads=num_attention_heads, + dim_head=inner_dim // num_attention_heads, + eps=eps, + cross_attention_dim_head=inner_dim // num_attention_heads, + processor=WanAnimateFaceBlockAttnProcessor(), + ) + for _ in range(num_layers // inject_face_latents_blocks) + ] + ) + + # 4. Output norm & projection + self.norm_out = FP32LayerNorm(inner_dim, eps, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, out_channels * math.prod(patch_size)) + self.scale_shift_table = nn.Parameter(torch.randn(1, 2, inner_dim) / inner_dim**0.5) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + pose_hidden_states: torch.Tensor | None = None, + face_pixel_values: torch.Tensor | None = None, + motion_encode_batch_size: int | None = None, + return_dict: bool = True, + attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor | dict[str, torch.Tensor]: + """ + Forward pass of Wan2.2-Animate transformer model. + + Args: + hidden_states (`torch.Tensor` of shape `(B, 2C + 4, T + 1, H, W)`): + Input noisy video latents of shape `(B, 2C + 4, T + 1, H, W)`, where B is the batch size, C is the + number of latent channels (16 for Wan VAE), T is the number of latent frames in an inference segment, H + is the latent height, and W is the latent width. + timestep: (`torch.LongTensor`): + The current timestep in the denoising loop. + encoder_hidden_states (`torch.Tensor`): + Text embeddings from the text encoder (umT5 for Wan Animate). + encoder_hidden_states_image (`torch.Tensor`): + CLIP visual features of the reference (character) image. + pose_hidden_states (`torch.Tensor` of shape `(B, C, T, H, W)`): + Pose video latents. TODO: description + face_pixel_values (`torch.Tensor` of shape `(B, C', S, H', W')`): + Face video in pixel space (not latent space). Typically C' = 3 and H' and W' are the height/width of + the face video in pixels. Here S is the inference segment length, usually set to 77. + motion_encode_batch_size (`int`, *optional*): + The batch size for batched encoding of the face video via the motion encoder. Will default to + `self.config.motion_encoder_batch_size` if not set. + return_dict (`bool`, *optional*, defaults to `True`): + Whether to return the output as a dict or tuple. + """ + + # Check that shapes match up + if pose_hidden_states is not None and pose_hidden_states.shape[2] + 1 != hidden_states.shape[2]: + raise ValueError( + f"pose_hidden_states frame dim (dim 2) is {pose_hidden_states.shape[2]} but must be one less than the" + f" hidden_states's corresponding frame dim: {hidden_states.shape[2]}" + ) + + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.config.patch_size + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p_h + post_patch_width = width // p_w + + # 1. Rotary position embedding + rotary_emb = self.rope(hidden_states) + + # 2. Patch embedding + hidden_states = self.patch_embedding(hidden_states) + pose_hidden_states = self.pose_patch_embedding(pose_hidden_states) + # Add pose embeddings to hidden states + hidden_states[:, :, 1:] = hidden_states[:, :, 1:] + pose_hidden_states + # Calling contiguous() here is important so that we don't recompile when performing regional compilation + hidden_states = hidden_states.flatten(2).transpose(1, 2).contiguous() + + # 3. Condition embeddings (time, text, image) + # Wan Animate is based on Wan 2.1 and thus uses Wan 2.1's timestep logic + temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image = self.condition_embedder( + timestep, encoder_hidden_states, encoder_hidden_states_image, timestep_seq_len=None + ) + + # batch_size, 6, inner_dim + timestep_proj = timestep_proj.unflatten(1, (6, -1)) + + if encoder_hidden_states_image is not None: + encoder_hidden_states = torch.concat([encoder_hidden_states_image, encoder_hidden_states], dim=1) + + # 4. Get motion features from the face video + # Motion vector computation from face pixel values + batch_size, channels, num_face_frames, height, width = face_pixel_values.shape + # Rearrange from (B, C, T, H, W) to (B*T, C, H, W) + face_pixel_values = face_pixel_values.permute(0, 2, 1, 3, 4).reshape(-1, channels, height, width) + + # Extract motion features using motion encoder + # Perform batched motion encoder inference to allow trading off inference speed for memory usage + motion_encode_batch_size = motion_encode_batch_size or self.config.motion_encoder_batch_size + face_batches = torch.split(face_pixel_values, motion_encode_batch_size) + motion_vec_batches = [] + for face_batch in face_batches: + motion_vec_batch = self.motion_encoder(face_batch) + motion_vec_batches.append(motion_vec_batch) + motion_vec = torch.cat(motion_vec_batches) + motion_vec = motion_vec.view(batch_size, num_face_frames, -1) + + # Now get face features from the motion vector + motion_vec = self.face_encoder(motion_vec) + + # Add padding at the beginning (prepend zeros) + pad_face = torch.zeros_like(motion_vec[:, :1]) + motion_vec = torch.cat([pad_face, motion_vec], dim=1) + + # 5. Transformer blocks with face adapter integration + for block_idx, block in enumerate(self.blocks): + if torch.is_grad_enabled() and self.gradient_checkpointing: + hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, timestep_proj, rotary_emb + ) + else: + hidden_states = block(hidden_states, encoder_hidden_states, timestep_proj, rotary_emb) + + # Face adapter integration: apply after every 5th block (0, 5, 10, 15, ...) + if block_idx % self.config.inject_face_latents_blocks == 0: + face_adapter_block_idx = block_idx // self.config.inject_face_latents_blocks + face_adapter_output = self.face_adapter[face_adapter_block_idx](hidden_states, motion_vec) + # In case the face adapter and main transformer blocks are on different devices, which can happen when + # using model parallelism + face_adapter_output = face_adapter_output.to(device=hidden_states.device) + hidden_states = face_adapter_output + hidden_states + + # 6. Output norm, projection & unpatchify + # batch_size, inner_dim + shift, scale = (self.scale_shift_table.to(temb.device) + temb.unsqueeze(1)).chunk(2, dim=1) + + hidden_states_original_dtype = hidden_states.dtype + hidden_states = self.norm_out(hidden_states.float()) + # Move the shift and scale tensors to the same device as hidden_states. + # When using multi-GPU inference via accelerate these will be on the + # first device rather than the last device, which hidden_states ends up + # on. + shift = shift.to(hidden_states.device) + scale = scale.to(hidden_states.device) + hidden_states = (hidden_states * (1 + scale) + shift).to(dtype=hidden_states_original_dtype) + + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, p_t, p_h, p_w, -1 + ) + hidden_states = hidden_states.permute(0, 7, 1, 4, 2, 5, 3, 6) + output = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan_vace.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan_vace.py new file mode 100644 index 0000000000000000000000000000000000000000..7c2e205ee3ed7ba030812cbda277b748ceafb10e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_wan_vace.py @@ -0,0 +1,373 @@ +# Copyright 2025 The Wan Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Any + +import torch +import torch.nn as nn + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...utils import apply_lora_scale, logging +from ..attention import AttentionMixin, FeedForward +from ..cache_utils import CacheMixin +from ..modeling_outputs import Transformer2DModelOutput +from ..modeling_utils import ModelMixin +from ..normalization import FP32LayerNorm +from .transformer_wan import ( + WanAttention, + WanAttnProcessor, + WanRotaryPosEmbed, + WanTimeTextImageEmbedding, + WanTransformerBlock, +) + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class WanVACETransformerBlock(nn.Module): + def __init__( + self, + dim: int, + ffn_dim: int, + num_heads: int, + qk_norm: str = "rms_norm_across_heads", + cross_attn_norm: bool = False, + eps: float = 1e-6, + added_kv_proj_dim: int | None = None, + apply_input_projection: bool = False, + apply_output_projection: bool = False, + ): + super().__init__() + + # 1. Input projection + self.proj_in = None + if apply_input_projection: + self.proj_in = nn.Linear(dim, dim) + + # 2. Self-attention + self.norm1 = FP32LayerNorm(dim, eps, elementwise_affine=False) + self.attn1 = WanAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + processor=WanAttnProcessor(), + ) + + # 3. Cross-attention + self.attn2 = WanAttention( + dim=dim, + heads=num_heads, + dim_head=dim // num_heads, + eps=eps, + added_kv_proj_dim=added_kv_proj_dim, + processor=WanAttnProcessor(), + is_cross_attention=True, + ) + self.norm2 = FP32LayerNorm(dim, eps, elementwise_affine=True) if cross_attn_norm else nn.Identity() + + # 4. Feed-forward + self.ffn = FeedForward(dim, inner_dim=ffn_dim, activation_fn="gelu-approximate") + self.norm3 = FP32LayerNorm(dim, eps, elementwise_affine=False) + + # 5. Output projection + self.proj_out = None + if apply_output_projection: + self.proj_out = nn.Linear(dim, dim) + + self.scale_shift_table = nn.Parameter(torch.randn(1, 6, dim) / dim**0.5) + + def forward( + self, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor, + control_hidden_states: torch.Tensor, + temb: torch.Tensor, + rotary_emb: torch.Tensor, + ) -> torch.Tensor: + if self.proj_in is not None: + control_hidden_states = self.proj_in(control_hidden_states) + control_hidden_states = control_hidden_states + hidden_states + + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = ( + self.scale_shift_table.to(temb.device) + temb.float() + ).chunk(6, dim=1) + + # 1. Self-attention + norm_hidden_states = (self.norm1(control_hidden_states.float()) * (1 + scale_msa) + shift_msa).type_as( + control_hidden_states + ) + attn_output = self.attn1(norm_hidden_states, None, None, rotary_emb) + control_hidden_states = (control_hidden_states.float() + attn_output * gate_msa).type_as(control_hidden_states) + + # 2. Cross-attention + norm_hidden_states = self.norm2(control_hidden_states.float()).type_as(control_hidden_states) + attn_output = self.attn2(norm_hidden_states, encoder_hidden_states, None, None) + control_hidden_states = control_hidden_states + attn_output + + # 3. Feed-forward + norm_hidden_states = (self.norm3(control_hidden_states.float()) * (1 + c_scale_msa) + c_shift_msa).type_as( + control_hidden_states + ) + ff_output = self.ffn(norm_hidden_states) + control_hidden_states = (control_hidden_states.float() + ff_output.float() * c_gate_msa).type_as( + control_hidden_states + ) + + conditioning_states = None + if self.proj_out is not None: + conditioning_states = self.proj_out(control_hidden_states) + + return conditioning_states, control_hidden_states + + +class WanVACETransformer3DModel( + ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, CacheMixin, AttentionMixin +): + r""" + A Transformer model for video-like data used in the Wan model. + + Args: + patch_size (`tuple[int]`, defaults to `(1, 2, 2)`): + 3D patch dimensions for video embedding (t_patch, h_patch, w_patch). + num_attention_heads (`int`, defaults to `40`): + Fixed length for text embeddings. + attention_head_dim (`int`, defaults to `128`): + The number of channels in each head. + in_channels (`int`, defaults to `16`): + The number of channels in the input. + out_channels (`int`, defaults to `16`): + The number of channels in the output. + text_dim (`int`, defaults to `512`): + Input dimension for text embeddings. + freq_dim (`int`, defaults to `256`): + Dimension for sinusoidal time embeddings. + ffn_dim (`int`, defaults to `13824`): + Intermediate dimension in feed-forward network. + num_layers (`int`, defaults to `40`): + The number of layers of transformer blocks to use. + window_size (`tuple[int]`, defaults to `(-1, -1)`): + Window size for local attention (-1 indicates global attention). + cross_attn_norm (`bool`, defaults to `True`): + Enable cross-attention normalization. + qk_norm (`bool`, defaults to `True`): + Enable query/key normalization. + eps (`float`, defaults to `1e-6`): + Epsilon value for normalization layers. + add_img_emb (`bool`, defaults to `False`): + Whether to use img_emb. + added_kv_proj_dim (`int`, *optional*, defaults to `None`): + The number of channels to use for the added key and value projections. If `None`, no projection is used. + """ + + _supports_gradient_checkpointing = True + _skip_layerwise_casting_patterns = ["patch_embedding", "vace_patch_embedding", "condition_embedder", "norm"] + _no_split_modules = ["WanTransformerBlock", "WanVACETransformerBlock"] + _keep_in_fp32_modules = ["time_embedder", "scale_shift_table", "norm1", "norm2", "norm3"] + _keys_to_ignore_on_load_unexpected = ["norm_added_q"] + _repeated_blocks = ["WanTransformerBlock", "WanVACETransformerBlock"] + + @register_to_config + def __init__( + self, + patch_size: tuple[int, ...] = (1, 2, 2), + num_attention_heads: int = 40, + attention_head_dim: int = 128, + in_channels: int = 16, + out_channels: int = 16, + text_dim: int = 4096, + freq_dim: int = 256, + ffn_dim: int = 13824, + num_layers: int = 40, + cross_attn_norm: bool = True, + qk_norm: str | None = "rms_norm_across_heads", + eps: float = 1e-6, + image_dim: int | None = None, + added_kv_proj_dim: int | None = None, + rope_max_seq_len: int = 1024, + pos_embed_seq_len: int | None = None, + vace_layers: list[int] = [0, 5, 10, 15, 20, 25, 30, 35], + vace_in_channels: int = 96, + ) -> None: + super().__init__() + + inner_dim = num_attention_heads * attention_head_dim + out_channels = out_channels or in_channels + + if max(vace_layers) >= num_layers: + raise ValueError(f"VACE layers {vace_layers} exceed the number of transformer layers {num_layers}.") + if 0 not in vace_layers: + raise ValueError("VACE layers must include layer 0.") + + # 1. Patch & position embedding + self.rope = WanRotaryPosEmbed(attention_head_dim, patch_size, rope_max_seq_len) + self.patch_embedding = nn.Conv3d(in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + self.vace_patch_embedding = nn.Conv3d(vace_in_channels, inner_dim, kernel_size=patch_size, stride=patch_size) + + # 2. Condition embeddings + # image_embedding_dim=1280 for I2V model + self.condition_embedder = WanTimeTextImageEmbedding( + dim=inner_dim, + time_freq_dim=freq_dim, + time_proj_dim=inner_dim * 6, + text_embed_dim=text_dim, + image_embed_dim=image_dim, + pos_embed_seq_len=pos_embed_seq_len, + ) + + # 3. Transformer blocks + self.blocks = nn.ModuleList( + [ + WanTransformerBlock( + inner_dim, ffn_dim, num_attention_heads, qk_norm, cross_attn_norm, eps, added_kv_proj_dim + ) + for _ in range(num_layers) + ] + ) + + self.vace_blocks = nn.ModuleList( + [ + WanVACETransformerBlock( + inner_dim, + ffn_dim, + num_attention_heads, + qk_norm, + cross_attn_norm, + eps, + added_kv_proj_dim, + apply_input_projection=i == 0, # Layer 0 always has input projection and is in vace_layers + apply_output_projection=True, + ) + for i in range(len(vace_layers)) + ] + ) + + # 4. Output norm & projection + self.norm_out = FP32LayerNorm(inner_dim, eps, elementwise_affine=False) + self.proj_out = nn.Linear(inner_dim, out_channels * math.prod(patch_size)) + self.scale_shift_table = nn.Parameter(torch.randn(1, 2, inner_dim) / inner_dim**0.5) + + self.gradient_checkpointing = False + + @apply_lora_scale("attention_kwargs") + def forward( + self, + hidden_states: torch.Tensor, + timestep: torch.LongTensor, + encoder_hidden_states: torch.Tensor, + encoder_hidden_states_image: torch.Tensor | None = None, + control_hidden_states: torch.Tensor = None, + control_hidden_states_scale: torch.Tensor = None, + return_dict: bool = True, + attention_kwargs: dict[str, Any] | None = None, + ) -> torch.Tensor | dict[str, torch.Tensor]: + batch_size, num_channels, num_frames, height, width = hidden_states.shape + p_t, p_h, p_w = self.config.patch_size + post_patch_num_frames = num_frames // p_t + post_patch_height = height // p_h + post_patch_width = width // p_w + + if control_hidden_states_scale is None: + control_hidden_states_scale = control_hidden_states.new_ones(len(self.config.vace_layers)) + control_hidden_states_scale = torch.unbind(control_hidden_states_scale) + if len(control_hidden_states_scale) != len(self.config.vace_layers): + raise ValueError( + f"Length of `control_hidden_states_scale` {len(control_hidden_states_scale)} should be " + f"equal to {len(self.config.vace_layers)}." + ) + + # 1. Rotary position embedding + rotary_emb = self.rope(hidden_states) + + # 2. Patch embedding + hidden_states = self.patch_embedding(hidden_states) + hidden_states = hidden_states.flatten(2).transpose(1, 2) + + control_hidden_states = self.vace_patch_embedding(control_hidden_states) + control_hidden_states = control_hidden_states.flatten(2).transpose(1, 2) + control_hidden_states_padding = control_hidden_states.new_zeros( + batch_size, hidden_states.size(1) - control_hidden_states.size(1), control_hidden_states.size(2) + ) + control_hidden_states = torch.cat([control_hidden_states, control_hidden_states_padding], dim=1) + + # 3. Time embedding + temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image = self.condition_embedder( + timestep, encoder_hidden_states, encoder_hidden_states_image + ) + timestep_proj = timestep_proj.unflatten(1, (6, -1)) + + # 4. Image embedding + if encoder_hidden_states_image is not None: + encoder_hidden_states = torch.concat([encoder_hidden_states_image, encoder_hidden_states], dim=1) + + # 5. Transformer blocks + if torch.is_grad_enabled() and self.gradient_checkpointing: + # Prepare VACE hints + control_hidden_states_list = [] + for i, block in enumerate(self.vace_blocks): + conditioning_states, control_hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, control_hidden_states, timestep_proj, rotary_emb + ) + control_hidden_states_list.append((conditioning_states, control_hidden_states_scale[i])) + control_hidden_states_list = control_hidden_states_list[::-1] + + for i, block in enumerate(self.blocks): + hidden_states = self._gradient_checkpointing_func( + block, hidden_states, encoder_hidden_states, timestep_proj, rotary_emb + ) + if i in self.config.vace_layers: + control_hint, scale = control_hidden_states_list.pop() + hidden_states = hidden_states + control_hint * scale + else: + # Prepare VACE hints + control_hidden_states_list = [] + for i, block in enumerate(self.vace_blocks): + conditioning_states, control_hidden_states = block( + hidden_states, encoder_hidden_states, control_hidden_states, timestep_proj, rotary_emb + ) + control_hidden_states_list.append((conditioning_states, control_hidden_states_scale[i])) + control_hidden_states_list = control_hidden_states_list[::-1] + + for i, block in enumerate(self.blocks): + hidden_states = block(hidden_states, encoder_hidden_states, timestep_proj, rotary_emb) + if i in self.config.vace_layers: + control_hint, scale = control_hidden_states_list.pop() + hidden_states = hidden_states + control_hint * scale + + # 6. Output norm, projection & unpatchify + shift, scale = (self.scale_shift_table.to(temb.device) + temb.unsqueeze(1)).chunk(2, dim=1) + + # Move the shift and scale tensors to the same device as hidden_states. + # When using multi-GPU inference via accelerate these will be on the + # first device rather than the last device, which hidden_states ends up + # on. + shift = shift.to(hidden_states.device) + scale = scale.to(hidden_states.device) + + hidden_states = (self.norm_out(hidden_states.float()) * (1 + scale) + shift).type_as(hidden_states) + hidden_states = self.proj_out(hidden_states) + + hidden_states = hidden_states.reshape( + batch_size, post_patch_num_frames, post_patch_height, post_patch_width, p_t, p_h, p_w, -1 + ) + hidden_states = hidden_states.permute(0, 7, 1, 4, 2, 5, 3, 6) + output = hidden_states.flatten(6, 7).flatten(4, 5).flatten(2, 3) + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_z_image.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_z_image.py new file mode 100644 index 0000000000000000000000000000000000000000..3bbf78bc5e0149b63bc8f64112d44a8ac0efaacb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/transformers/transformer_z_image.py @@ -0,0 +1,1039 @@ +# Copyright 2025 Alibaba Z-Image Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.utils.rnn import pad_sequence + +from ...configuration_utils import ConfigMixin, register_to_config +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin +from ...models.attention_processor import Attention +from ...models.modeling_utils import ModelMixin +from ...models.normalization import RMSNorm +from ...utils.torch_utils import maybe_allow_in_graph +from ..attention_dispatch import dispatch_attention_fn +from ..modeling_outputs import Transformer2DModelOutput + + +ADALN_EMBED_DIM = 256 +SEQ_MULTI_OF = 32 +X_PAD_DIM = 64 + + +class TimestepEmbedder(nn.Module): + def __init__(self, out_size, mid_size=None, frequency_embedding_size=256): + super().__init__() + if mid_size is None: + mid_size = out_size + self.mlp = nn.Sequential( + nn.Linear(frequency_embedding_size, mid_size, bias=True), + nn.SiLU(), + nn.Linear(mid_size, out_size, bias=True), + ) + + self.frequency_embedding_size = frequency_embedding_size + + @staticmethod + def timestep_embedding(t, dim, max_period=10000): + with torch.amp.autocast("cuda", enabled=False): + half = dim // 2 + freqs = torch.exp( + -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=t.device) / half + ) + args = t[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + return embedding + + def forward(self, t): + t_freq = self.timestep_embedding(t, self.frequency_embedding_size) + weight_dtype = self.mlp[0].weight.dtype + compute_dtype = getattr(self.mlp[0], "compute_dtype", None) + if weight_dtype.is_floating_point: + t_freq = t_freq.to(weight_dtype) + elif compute_dtype is not None: + t_freq = t_freq.to(compute_dtype) + t_emb = self.mlp(t_freq) + return t_emb + + +class ZSingleStreamAttnProcessor: + """ + Processor for Z-Image single stream attention that adapts the existing Attention class to match the behavior of the + original Z-ImageAttention module. + """ + + _attention_backend = None + _parallel_config = None + + def __init__(self): + if not hasattr(F, "scaled_dot_product_attention"): + raise ImportError( + "ZSingleStreamAttnProcessor requires PyTorch 2.0. To use it, please upgrade PyTorch to version 2.0 or higher." + ) + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor | None = None, + attention_mask: torch.Tensor | None = None, + freqs_cis: torch.Tensor | None = None, + ) -> torch.Tensor: + query = attn.to_q(hidden_states) + key = attn.to_k(hidden_states) + value = attn.to_v(hidden_states) + + query = query.unflatten(-1, (attn.heads, -1)) + key = key.unflatten(-1, (attn.heads, -1)) + value = value.unflatten(-1, (attn.heads, -1)) + + # Apply Norms + if attn.norm_q is not None: + query = attn.norm_q(query) + if attn.norm_k is not None: + key = attn.norm_k(key) + + # Apply RoPE + def apply_rotary_emb(x_in: torch.Tensor, freqs_cis: torch.Tensor) -> torch.Tensor: + with torch.amp.autocast("cuda", enabled=False): + x = torch.view_as_complex(x_in.float().reshape(*x_in.shape[:-1], -1, 2)) + freqs_cis = freqs_cis.unsqueeze(2) + x_out = torch.view_as_real(x * freqs_cis).flatten(3) + return x_out.type_as(x_in) # todo + + if freqs_cis is not None: + query = apply_rotary_emb(query, freqs_cis) + key = apply_rotary_emb(key, freqs_cis) + + # Cast to correct dtype + dtype = query.dtype + query, key = query.to(dtype), key.to(dtype) + + # From [batch, seq_len] to [batch, 1, 1, seq_len] -> broadcast to [batch, heads, seq_len, seq_len] + if attention_mask is not None and attention_mask.ndim == 2: + attention_mask = attention_mask[:, None, None, :] + + # Compute joint attention + hidden_states = dispatch_attention_fn( + query, + key, + value, + attn_mask=attention_mask, + dropout_p=0.0, + is_causal=False, + backend=self._attention_backend, + parallel_config=self._parallel_config, + ) + + # Reshape back + hidden_states = hidden_states.flatten(2, 3) + hidden_states = hidden_states.to(dtype) + + output = attn.to_out[0](hidden_states) + if len(attn.to_out) > 1: # dropout + output = attn.to_out[1](output) + + return output + + +def select_per_token( + value_noisy: torch.Tensor, + value_clean: torch.Tensor, + noise_mask: torch.Tensor, + seq_len: int, +) -> torch.Tensor: + noise_mask_expanded = noise_mask.unsqueeze(-1) # (batch, seq_len, 1) + return torch.where( + noise_mask_expanded == 1, + value_noisy.unsqueeze(1).expand(-1, seq_len, -1), + value_clean.unsqueeze(1).expand(-1, seq_len, -1), + ) + + +class FeedForward(nn.Module): + def __init__(self, dim: int, hidden_dim: int): + super().__init__() + self.w1 = nn.Linear(dim, hidden_dim, bias=False) + self.w2 = nn.Linear(hidden_dim, dim, bias=False) + self.w3 = nn.Linear(dim, hidden_dim, bias=False) + + def _forward_silu_gating(self, x1, x3): + return F.silu(x1) * x3 + + def forward(self, x): + return self.w2(self._forward_silu_gating(self.w1(x), self.w3(x))) + + +@maybe_allow_in_graph +class ZImageTransformerBlock(nn.Module): + def __init__( + self, + layer_id: int, + dim: int, + n_heads: int, + n_kv_heads: int, + norm_eps: float, + qk_norm: bool, + modulation=True, + ): + super().__init__() + self.dim = dim + self.head_dim = dim // n_heads + + # Refactored to use diffusers Attention with custom processor + # Original Z-Image params: dim, n_heads, n_kv_heads, qk_norm + self.attention = Attention( + query_dim=dim, + cross_attention_dim=None, + dim_head=dim // n_heads, + heads=n_heads, + qk_norm="rms_norm" if qk_norm else None, + eps=1e-5, + bias=False, + out_bias=False, + processor=ZSingleStreamAttnProcessor(), + ) + + self.feed_forward = FeedForward(dim=dim, hidden_dim=int(dim / 3 * 8)) + self.layer_id = layer_id + + self.attention_norm1 = RMSNorm(dim, eps=norm_eps) + self.ffn_norm1 = RMSNorm(dim, eps=norm_eps) + + self.attention_norm2 = RMSNorm(dim, eps=norm_eps) + self.ffn_norm2 = RMSNorm(dim, eps=norm_eps) + + self.modulation = modulation + if modulation: + self.adaLN_modulation = nn.Sequential(nn.Linear(min(dim, ADALN_EMBED_DIM), 4 * dim, bias=True)) + + def forward( + self, + x: torch.Tensor, + attn_mask: torch.Tensor, + freqs_cis: torch.Tensor, + adaln_input: torch.Tensor | None = None, + noise_mask: torch.Tensor | None = None, + adaln_noisy: torch.Tensor | None = None, + adaln_clean: torch.Tensor | None = None, + ): + if self.modulation: + seq_len = x.shape[1] + + if noise_mask is not None: + # Per-token modulation: different modulation for noisy/clean tokens + mod_noisy = self.adaLN_modulation(adaln_noisy) + mod_clean = self.adaLN_modulation(adaln_clean) + + scale_msa_noisy, gate_msa_noisy, scale_mlp_noisy, gate_mlp_noisy = mod_noisy.chunk(4, dim=1) + scale_msa_clean, gate_msa_clean, scale_mlp_clean, gate_mlp_clean = mod_clean.chunk(4, dim=1) + + gate_msa_noisy, gate_mlp_noisy = gate_msa_noisy.tanh(), gate_mlp_noisy.tanh() + gate_msa_clean, gate_mlp_clean = gate_msa_clean.tanh(), gate_mlp_clean.tanh() + + scale_msa_noisy, scale_mlp_noisy = 1.0 + scale_msa_noisy, 1.0 + scale_mlp_noisy + scale_msa_clean, scale_mlp_clean = 1.0 + scale_msa_clean, 1.0 + scale_mlp_clean + + scale_msa = select_per_token(scale_msa_noisy, scale_msa_clean, noise_mask, seq_len) + scale_mlp = select_per_token(scale_mlp_noisy, scale_mlp_clean, noise_mask, seq_len) + gate_msa = select_per_token(gate_msa_noisy, gate_msa_clean, noise_mask, seq_len) + gate_mlp = select_per_token(gate_mlp_noisy, gate_mlp_clean, noise_mask, seq_len) + else: + # Global modulation: same modulation for all tokens (avoid double select) + mod = self.adaLN_modulation(adaln_input) + scale_msa, gate_msa, scale_mlp, gate_mlp = mod.unsqueeze(1).chunk(4, dim=2) + gate_msa, gate_mlp = gate_msa.tanh(), gate_mlp.tanh() + scale_msa, scale_mlp = 1.0 + scale_msa, 1.0 + scale_mlp + + # Attention block + attn_out = self.attention( + self.attention_norm1(x) * scale_msa, attention_mask=attn_mask, freqs_cis=freqs_cis + ) + x = x + gate_msa * self.attention_norm2(attn_out) + + # FFN block + x = x + gate_mlp * self.ffn_norm2(self.feed_forward(self.ffn_norm1(x) * scale_mlp)) + else: + # Attention block + attn_out = self.attention(self.attention_norm1(x), attention_mask=attn_mask, freqs_cis=freqs_cis) + x = x + self.attention_norm2(attn_out) + + # FFN block + x = x + self.ffn_norm2(self.feed_forward(self.ffn_norm1(x))) + + return x + + +class FinalLayer(nn.Module): + def __init__(self, hidden_size, out_channels): + super().__init__() + self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.linear = nn.Linear(hidden_size, out_channels, bias=True) + + self.adaLN_modulation = nn.Sequential( + nn.SiLU(), + nn.Linear(min(hidden_size, ADALN_EMBED_DIM), hidden_size, bias=True), + ) + + def forward(self, x, c=None, noise_mask=None, c_noisy=None, c_clean=None): + seq_len = x.shape[1] + + if noise_mask is not None: + # Per-token modulation + scale_noisy = 1.0 + self.adaLN_modulation(c_noisy) + scale_clean = 1.0 + self.adaLN_modulation(c_clean) + scale = select_per_token(scale_noisy, scale_clean, noise_mask, seq_len) + else: + # Original global modulation + assert c is not None, "Either c or (c_noisy, c_clean) must be provided" + scale = 1.0 + self.adaLN_modulation(c) + scale = scale.unsqueeze(1) + + x = self.norm_final(x) * scale + x = self.linear(x) + return x + + +class RopeEmbedder: + def __init__( + self, + theta: float = 256.0, + axes_dims: list[int] = (16, 56, 56), + axes_lens: list[int] = (64, 128, 128), + ): + self.theta = theta + self.axes_dims = axes_dims + self.axes_lens = axes_lens + assert len(axes_dims) == len(axes_lens), "axes_dims and axes_lens must have the same length" + self.freqs_cis = None + + @staticmethod + def precompute_freqs_cis(dim: list[int], end: list[int], theta: float = 256.0): + with torch.device("cpu"): + freqs_cis = [] + for i, (d, e) in enumerate(zip(dim, end)): + freqs = 1.0 / (theta ** (torch.arange(0, d, 2, dtype=torch.float64, device="cpu") / d)) + timestep = torch.arange(e, device=freqs.device, dtype=torch.float64) + freqs = torch.outer(timestep, freqs).float() + freqs_cis_i = torch.polar(torch.ones_like(freqs), freqs).to(torch.complex64) # complex64 + freqs_cis.append(freqs_cis_i) + + return freqs_cis + + def __call__(self, ids: torch.Tensor): + assert ids.ndim == 2 + assert ids.shape[-1] == len(self.axes_dims) + device = ids.device + + if self.freqs_cis is None: + self.freqs_cis = self.precompute_freqs_cis(self.axes_dims, self.axes_lens, theta=self.theta) + self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis] + else: + # Ensure freqs_cis are on the same device as ids + if self.freqs_cis[0].device != device: + self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis] + + result = [] + for i in range(len(self.axes_dims)): + index = ids[:, i] + result.append(self.freqs_cis[i][index]) + return torch.cat(result, dim=-1) + + +class ZImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): + _supports_gradient_checkpointing = True + _no_split_modules = ["ZImageTransformerBlock"] + _repeated_blocks = ["ZImageTransformerBlock"] + _skip_layerwise_casting_patterns = ["t_embedder", "cap_embedder"] # precision sensitive layers + + @register_to_config + def __init__( + self, + all_patch_size=(2,), + all_f_patch_size=(1,), + in_channels=16, + dim=3840, + n_layers=30, + n_refiner_layers=2, + n_heads=30, + n_kv_heads=30, + norm_eps=1e-5, + qk_norm=True, + cap_feat_dim=2560, + siglip_feat_dim=None, # Optional: set to enable SigLIP support for Omni + rope_theta=256.0, + t_scale=1000.0, + axes_dims=[32, 48, 48], + axes_lens=[1024, 512, 512], + ) -> None: + super().__init__() + self.in_channels = in_channels + self.out_channels = in_channels + self.all_patch_size = all_patch_size + self.all_f_patch_size = all_f_patch_size + self.dim = dim + self.n_heads = n_heads + + self.rope_theta = rope_theta + self.t_scale = t_scale + self.gradient_checkpointing = False + + assert len(all_patch_size) == len(all_f_patch_size) + + all_x_embedder = {} + all_final_layer = {} + for patch_idx, (patch_size, f_patch_size) in enumerate(zip(all_patch_size, all_f_patch_size)): + x_embedder = nn.Linear(f_patch_size * patch_size * patch_size * in_channels, dim, bias=True) + all_x_embedder[f"{patch_size}-{f_patch_size}"] = x_embedder + + final_layer = FinalLayer(dim, patch_size * patch_size * f_patch_size * self.out_channels) + all_final_layer[f"{patch_size}-{f_patch_size}"] = final_layer + + self.all_x_embedder = nn.ModuleDict(all_x_embedder) + self.all_final_layer = nn.ModuleDict(all_final_layer) + self.noise_refiner = nn.ModuleList( + [ + ZImageTransformerBlock( + 1000 + layer_id, + dim, + n_heads, + n_kv_heads, + norm_eps, + qk_norm, + modulation=True, + ) + for layer_id in range(n_refiner_layers) + ] + ) + self.context_refiner = nn.ModuleList( + [ + ZImageTransformerBlock( + layer_id, + dim, + n_heads, + n_kv_heads, + norm_eps, + qk_norm, + modulation=False, + ) + for layer_id in range(n_refiner_layers) + ] + ) + self.t_embedder = TimestepEmbedder(min(dim, ADALN_EMBED_DIM), mid_size=1024) + self.cap_embedder = nn.Sequential(RMSNorm(cap_feat_dim, eps=norm_eps), nn.Linear(cap_feat_dim, dim, bias=True)) + + # Optional SigLIP components (for Omni variant) + if siglip_feat_dim is not None: + self.siglip_embedder = nn.Sequential( + RMSNorm(siglip_feat_dim, eps=norm_eps), nn.Linear(siglip_feat_dim, dim, bias=True) + ) + self.siglip_refiner = nn.ModuleList( + [ + ZImageTransformerBlock( + 2000 + layer_id, + dim, + n_heads, + n_kv_heads, + norm_eps, + qk_norm, + modulation=False, + ) + for layer_id in range(n_refiner_layers) + ] + ) + self.siglip_pad_token = nn.Parameter(torch.empty((1, dim))) + else: + self.siglip_embedder = None + self.siglip_refiner = None + self.siglip_pad_token = None + + self.x_pad_token = nn.Parameter(torch.empty((1, dim))) + self.cap_pad_token = nn.Parameter(torch.empty((1, dim))) + + self.layers = nn.ModuleList( + [ + ZImageTransformerBlock(layer_id, dim, n_heads, n_kv_heads, norm_eps, qk_norm) + for layer_id in range(n_layers) + ] + ) + head_dim = dim // n_heads + assert head_dim == sum(axes_dims) + self.axes_dims = axes_dims + self.axes_lens = axes_lens + + self.rope_embedder = RopeEmbedder(theta=rope_theta, axes_dims=axes_dims, axes_lens=axes_lens) + + def unpatchify( + self, + x: list[torch.Tensor], + size: list[tuple], + patch_size, + f_patch_size, + x_pos_offsets: list[tuple[int, int]] | None = None, + ) -> list[torch.Tensor]: + pH = pW = patch_size + pF = f_patch_size + bsz = len(x) + assert len(size) == bsz + + if x_pos_offsets is not None: + # Omni: extract target image from unified sequence (cond_images + target) + result = [] + for i in range(bsz): + unified_x = x[i][x_pos_offsets[i][0] : x_pos_offsets[i][1]] + cu_len = 0 + x_item = None + for j in range(len(size[i])): + if size[i][j] is None: + ori_len = 0 + pad_len = SEQ_MULTI_OF + cu_len += pad_len + ori_len + else: + F, H, W = size[i][j] + ori_len = (F // pF) * (H // pH) * (W // pW) + pad_len = (-ori_len) % SEQ_MULTI_OF + x_item = ( + unified_x[cu_len : cu_len + ori_len] + .view(F // pF, H // pH, W // pW, pF, pH, pW, self.out_channels) + .permute(6, 0, 3, 1, 4, 2, 5) + .reshape(self.out_channels, F, H, W) + ) + cu_len += ori_len + pad_len + result.append(x_item) # Return only the last (target) image + return result + else: + # Original mode: simple unpatchify + for i in range(bsz): + F, H, W = size[i] + ori_len = (F // pF) * (H // pH) * (W // pW) + # "f h w pf ph pw c -> c (f pf) (h ph) (w pw)" + x[i] = ( + x[i][:ori_len] + .view(F // pF, H // pH, W // pW, pF, pH, pW, self.out_channels) + .permute(6, 0, 3, 1, 4, 2, 5) + .reshape(self.out_channels, F, H, W) + ) + return x + + @staticmethod + def create_coordinate_grid(size, start=None, device=None): + if start is None: + start = (0 for _ in size) + axes = [torch.arange(x0, x0 + span, dtype=torch.int32, device=device) for x0, span in zip(start, size)] + grids = torch.meshgrid(axes, indexing="ij") + return torch.stack(grids, dim=-1) + + def _patchify_image(self, image: torch.Tensor, patch_size: int, f_patch_size: int): + """Patchify a single image tensor: (C, F, H, W) -> (num_patches, patch_dim).""" + pH, pW, pF = patch_size, patch_size, f_patch_size + C, F, H, W = image.size() + F_tokens, H_tokens, W_tokens = F // pF, H // pH, W // pW + image = image.view(C, F_tokens, pF, H_tokens, pH, W_tokens, pW) + image = image.permute(1, 3, 5, 2, 4, 6, 0).reshape(F_tokens * H_tokens * W_tokens, pF * pH * pW * C) + return image, (F, H, W), (F_tokens, H_tokens, W_tokens) + + def _pad_with_ids( + self, + feat: torch.Tensor, + pos_grid_size: tuple, + pos_start: tuple, + device: torch.device, + noise_mask_val: int | None = None, + ): + """Pad feature to SEQ_MULTI_OF, create position IDs and pad mask.""" + ori_len = len(feat) + pad_len = (-ori_len) % SEQ_MULTI_OF + total_len = ori_len + pad_len + + # Pos IDs + ori_pos_ids = self.create_coordinate_grid(size=pos_grid_size, start=pos_start, device=device).flatten(0, 2) + if pad_len > 0: + pad_pos_ids = ( + self.create_coordinate_grid(size=(1, 1, 1), start=(0, 0, 0), device=device) + .flatten(0, 2) + .repeat(pad_len, 1) + ) + pos_ids = torch.cat([ori_pos_ids, pad_pos_ids], dim=0) + padded_feat = torch.cat([feat, feat[-1:].repeat(pad_len, 1)], dim=0) + pad_mask = torch.cat( + [ + torch.zeros(ori_len, dtype=torch.bool, device=device), + torch.ones(pad_len, dtype=torch.bool, device=device), + ] + ) + else: + pos_ids = ori_pos_ids + padded_feat = feat + pad_mask = torch.zeros(ori_len, dtype=torch.bool, device=device) + + noise_mask = [noise_mask_val] * total_len if noise_mask_val is not None else None # token level + return padded_feat, pos_ids, pad_mask, total_len, noise_mask + + def patchify_and_embed( + self, all_image: list[torch.Tensor], all_cap_feats: list[torch.Tensor], patch_size: int, f_patch_size: int + ): + """Patchify for basic mode: single image per batch item.""" + device = all_image[0].device + all_img_out, all_img_size, all_img_pos_ids, all_img_pad_mask = [], [], [], [] + all_cap_out, all_cap_pos_ids, all_cap_pad_mask = [], [], [] + + for image, cap_feat in zip(all_image, all_cap_feats): + # Caption + cap_out, cap_pos_ids, cap_pad_mask, cap_len, _ = self._pad_with_ids( + cap_feat, (len(cap_feat) + (-len(cap_feat)) % SEQ_MULTI_OF, 1, 1), (1, 0, 0), device + ) + all_cap_out.append(cap_out) + all_cap_pos_ids.append(cap_pos_ids) + all_cap_pad_mask.append(cap_pad_mask) + + # Image + img_patches, size, (F_t, H_t, W_t) = self._patchify_image(image, patch_size, f_patch_size) + img_out, img_pos_ids, img_pad_mask, _, _ = self._pad_with_ids( + img_patches, (F_t, H_t, W_t), (cap_len + 1, 0, 0), device + ) + all_img_out.append(img_out) + all_img_size.append(size) + all_img_pos_ids.append(img_pos_ids) + all_img_pad_mask.append(img_pad_mask) + + return ( + all_img_out, + all_cap_out, + all_img_size, + all_img_pos_ids, + all_cap_pos_ids, + all_img_pad_mask, + all_cap_pad_mask, + ) + + def patchify_and_embed_omni( + self, + all_x: list[list[torch.Tensor]], + all_cap_feats: list[list[torch.Tensor]], + all_siglip_feats: list[list[torch.Tensor]], + patch_size: int, + f_patch_size: int, + images_noise_mask: list[list[int]], + ): + """Patchify for omni mode: multiple images per batch item with noise masks.""" + bsz = len(all_x) + device = all_x[0][-1].device + dtype = all_x[0][-1].dtype + + all_x_out, all_x_size, all_x_pos_ids, all_x_pad_mask, all_x_len, all_x_noise_mask = [], [], [], [], [], [] + all_cap_out, all_cap_pos_ids, all_cap_pad_mask, all_cap_len, all_cap_noise_mask = [], [], [], [], [] + all_sig_out, all_sig_pos_ids, all_sig_pad_mask, all_sig_len, all_sig_noise_mask = [], [], [], [], [] + + for i in range(bsz): + num_images = len(all_x[i]) + cap_feats_list, cap_pos_list, cap_mask_list, cap_lens, cap_noise = [], [], [], [], [] + cap_end_pos = [] + cap_cu_len = 1 + + # Process captions + for j, cap_item in enumerate(all_cap_feats[i]): + noise_val = images_noise_mask[i][j] if j < len(images_noise_mask[i]) else 1 + cap_out, cap_pos, cap_mask, cap_len, cap_nm = self._pad_with_ids( + cap_item, + (len(cap_item) + (-len(cap_item)) % SEQ_MULTI_OF, 1, 1), + (cap_cu_len, 0, 0), + device, + noise_val, + ) + cap_feats_list.append(cap_out) + cap_pos_list.append(cap_pos) + cap_mask_list.append(cap_mask) + cap_lens.append(cap_len) + cap_noise.extend(cap_nm) + cap_cu_len += len(cap_item) + cap_end_pos.append(cap_cu_len) + cap_cu_len += 2 # for image vae and siglip tokens + + all_cap_out.append(torch.cat(cap_feats_list, dim=0)) + all_cap_pos_ids.append(torch.cat(cap_pos_list, dim=0)) + all_cap_pad_mask.append(torch.cat(cap_mask_list, dim=0)) + all_cap_len.append(cap_lens) + all_cap_noise_mask.append(cap_noise) + + # Process images + x_feats_list, x_pos_list, x_mask_list, x_lens, x_size, x_noise = [], [], [], [], [], [] + for j, x_item in enumerate(all_x[i]): + noise_val = images_noise_mask[i][j] + if x_item is not None: + x_patches, size, (F_t, H_t, W_t) = self._patchify_image(x_item, patch_size, f_patch_size) + x_out, x_pos, x_mask, x_len, x_nm = self._pad_with_ids( + x_patches, (F_t, H_t, W_t), (cap_end_pos[j], 0, 0), device, noise_val + ) + x_size.append(size) + else: + x_len = SEQ_MULTI_OF + x_out = torch.zeros((x_len, X_PAD_DIM), dtype=dtype, device=device) + x_pos = self.create_coordinate_grid((1, 1, 1), (0, 0, 0), device).flatten(0, 2).repeat(x_len, 1) + x_mask = torch.ones(x_len, dtype=torch.bool, device=device) + x_nm = [noise_val] * x_len + x_size.append(None) + x_feats_list.append(x_out) + x_pos_list.append(x_pos) + x_mask_list.append(x_mask) + x_lens.append(x_len) + x_noise.extend(x_nm) + + all_x_out.append(torch.cat(x_feats_list, dim=0)) + all_x_pos_ids.append(torch.cat(x_pos_list, dim=0)) + all_x_pad_mask.append(torch.cat(x_mask_list, dim=0)) + all_x_size.append(x_size) + all_x_len.append(x_lens) + all_x_noise_mask.append(x_noise) + + # Process siglip + if all_siglip_feats[i] is None: + all_sig_len.append([0] * num_images) + all_sig_out.append(None) + else: + sig_feats_list, sig_pos_list, sig_mask_list, sig_lens, sig_noise = [], [], [], [], [] + for j, sig_item in enumerate(all_siglip_feats[i]): + noise_val = images_noise_mask[i][j] + if sig_item is not None: + sig_H, sig_W, sig_C = sig_item.size() + sig_flat = sig_item.permute(2, 0, 1).reshape(sig_H * sig_W, sig_C) + sig_out, sig_pos, sig_mask, sig_len, sig_nm = self._pad_with_ids( + sig_flat, (1, sig_H, sig_W), (cap_end_pos[j] + 1, 0, 0), device, noise_val + ) + # Scale position IDs to match x resolution + if x_size[j] is not None: + sig_pos = sig_pos.float() + sig_pos[..., 1] = sig_pos[..., 1] / max(sig_H - 1, 1) * (x_size[j][1] - 1) + sig_pos[..., 2] = sig_pos[..., 2] / max(sig_W - 1, 1) * (x_size[j][2] - 1) + sig_pos = sig_pos.to(torch.int32) + else: + sig_len = SEQ_MULTI_OF + sig_out = torch.zeros((sig_len, self.config.siglip_feat_dim), dtype=dtype, device=device) + sig_pos = ( + self.create_coordinate_grid((1, 1, 1), (0, 0, 0), device).flatten(0, 2).repeat(sig_len, 1) + ) + sig_mask = torch.ones(sig_len, dtype=torch.bool, device=device) + sig_nm = [noise_val] * sig_len + sig_feats_list.append(sig_out) + sig_pos_list.append(sig_pos) + sig_mask_list.append(sig_mask) + sig_lens.append(sig_len) + sig_noise.extend(sig_nm) + + all_sig_out.append(torch.cat(sig_feats_list, dim=0)) + all_sig_pos_ids.append(torch.cat(sig_pos_list, dim=0)) + all_sig_pad_mask.append(torch.cat(sig_mask_list, dim=0)) + all_sig_len.append(sig_lens) + all_sig_noise_mask.append(sig_noise) + + # Compute x position offsets + all_x_pos_offsets = [(sum(all_cap_len[i]), sum(all_cap_len[i]) + sum(all_x_len[i])) for i in range(bsz)] + + return ( + all_x_out, + all_cap_out, + all_sig_out, + all_x_size, + all_x_pos_ids, + all_cap_pos_ids, + all_sig_pos_ids, + all_x_pad_mask, + all_cap_pad_mask, + all_sig_pad_mask, + all_x_pos_offsets, + all_x_noise_mask, + all_cap_noise_mask, + all_sig_noise_mask, + ) + + def _prepare_sequence( + self, + feats: list[torch.Tensor], + pos_ids: list[torch.Tensor], + inner_pad_mask: list[torch.Tensor], + pad_token: torch.nn.Parameter, + noise_mask: list[list[int]] | None = None, + device: torch.device = None, + ): + """Prepare sequence: apply pad token, RoPE embed, pad to batch, create attention mask.""" + item_seqlens = [len(f) for f in feats] + max_seqlen = max(item_seqlens) + bsz = len(feats) + + # Pad token + feats_cat = torch.cat(feats, dim=0) + feats_cat[torch.cat(inner_pad_mask)] = pad_token + feats = list(feats_cat.split(item_seqlens, dim=0)) + + # RoPE + freqs_cis = list(self.rope_embedder(torch.cat(pos_ids, dim=0)).split([len(p) for p in pos_ids], dim=0)) + + # Pad to batch + feats = pad_sequence(feats, batch_first=True, padding_value=0.0) + freqs_cis = pad_sequence(freqs_cis, batch_first=True, padding_value=0.0)[:, : feats.shape[1]] + + # Attention mask + attn_mask = torch.zeros((bsz, max_seqlen), dtype=torch.bool, device=device) + for i, seq_len in enumerate(item_seqlens): + attn_mask[i, :seq_len] = 1 + + # Noise mask + noise_mask_tensor = None + if noise_mask is not None: + noise_mask_tensor = pad_sequence( + [torch.tensor(m, dtype=torch.long, device=device) for m in noise_mask], + batch_first=True, + padding_value=0, + )[:, : feats.shape[1]] + + return feats, freqs_cis, attn_mask, item_seqlens, noise_mask_tensor + + def _build_unified_sequence( + self, + x: torch.Tensor, + x_freqs: torch.Tensor, + x_seqlens: list[int], + x_noise_mask: list[list[int]] | None, + cap: torch.Tensor, + cap_freqs: torch.Tensor, + cap_seqlens: list[int], + cap_noise_mask: list[list[int]] | None, + siglip: torch.Tensor | None, + siglip_freqs: torch.Tensor | None, + siglip_seqlens: list[int] | None, + siglip_noise_mask: list[list[int]] | None, + omni_mode: bool, + device: torch.device, + ): + """Build unified sequence: x, cap, and optionally siglip. + Basic mode order: [x, cap]; Omni mode order: [cap, x, siglip] + """ + bsz = len(x_seqlens) + unified = [] + unified_freqs = [] + unified_noise_mask = [] + + for i in range(bsz): + x_len, cap_len = x_seqlens[i], cap_seqlens[i] + + if omni_mode: + # Omni: [cap, x, siglip] + if siglip is not None and siglip_seqlens is not None: + sig_len = siglip_seqlens[i] + unified.append(torch.cat([cap[i][:cap_len], x[i][:x_len], siglip[i][:sig_len]])) + unified_freqs.append( + torch.cat([cap_freqs[i][:cap_len], x_freqs[i][:x_len], siglip_freqs[i][:sig_len]]) + ) + unified_noise_mask.append( + torch.tensor( + cap_noise_mask[i] + x_noise_mask[i] + siglip_noise_mask[i], dtype=torch.long, device=device + ) + ) + else: + unified.append(torch.cat([cap[i][:cap_len], x[i][:x_len]])) + unified_freqs.append(torch.cat([cap_freqs[i][:cap_len], x_freqs[i][:x_len]])) + unified_noise_mask.append( + torch.tensor(cap_noise_mask[i] + x_noise_mask[i], dtype=torch.long, device=device) + ) + else: + # Basic: [x, cap] + unified.append(torch.cat([x[i][:x_len], cap[i][:cap_len]])) + unified_freqs.append(torch.cat([x_freqs[i][:x_len], cap_freqs[i][:cap_len]])) + + # Compute unified seqlens + if omni_mode: + if siglip is not None and siglip_seqlens is not None: + unified_seqlens = [a + b + c for a, b, c in zip(cap_seqlens, x_seqlens, siglip_seqlens)] + else: + unified_seqlens = [a + b for a, b in zip(cap_seqlens, x_seqlens)] + else: + unified_seqlens = [a + b for a, b in zip(x_seqlens, cap_seqlens)] + + max_seqlen = max(unified_seqlens) + + # Pad to batch + unified = pad_sequence(unified, batch_first=True, padding_value=0.0) + unified_freqs = pad_sequence(unified_freqs, batch_first=True, padding_value=0.0) + + # Attention mask + attn_mask = torch.zeros((bsz, max_seqlen), dtype=torch.bool, device=device) + for i, seq_len in enumerate(unified_seqlens): + attn_mask[i, :seq_len] = 1 + + # Noise mask + noise_mask_tensor = None + if omni_mode: + noise_mask_tensor = pad_sequence(unified_noise_mask, batch_first=True, padding_value=0)[ + :, : unified.shape[1] + ] + + return unified, unified_freqs, attn_mask, noise_mask_tensor + + def forward( + self, + x: list[torch.Tensor, list[list[torch.Tensor]]], + t, + cap_feats: list[torch.Tensor, list[list[torch.Tensor]]], + return_dict: bool = True, + controlnet_block_samples: dict[int, torch.Tensor] | None = None, + siglip_feats: list[list[torch.Tensor]] | None = None, + image_noise_mask: list[list[int]] | None = None, + patch_size: int = 2, + f_patch_size: int = 1, + ): + """ + Flow: patchify -> t_embed -> x_embed -> x_refine -> cap_embed -> cap_refine + -> [siglip_embed -> siglip_refine] -> build_unified -> main_layers -> final_layer -> unpatchify + """ + assert patch_size in self.all_patch_size and f_patch_size in self.all_f_patch_size + omni_mode = isinstance(x[0], list) + device = x[0][-1].device if omni_mode else x[0].device + + if omni_mode: + # Dual embeddings: noisy (t) and clean (t=1) + t_noisy = self.t_embedder(t * self.t_scale).type_as(x[0][-1]) + t_clean = self.t_embedder(torch.ones_like(t) * self.t_scale).type_as(x[0][-1]) + adaln_input = None + else: + # Single embedding for all tokens + adaln_input = self.t_embedder(t * self.t_scale).type_as(x[0]) + t_noisy = t_clean = None + + # Patchify + if omni_mode: + ( + x, + cap_feats, + siglip_feats, + x_size, + x_pos_ids, + cap_pos_ids, + siglip_pos_ids, + x_pad_mask, + cap_pad_mask, + siglip_pad_mask, + x_pos_offsets, + x_noise_mask, + cap_noise_mask, + siglip_noise_mask, + ) = self.patchify_and_embed_omni(x, cap_feats, siglip_feats, patch_size, f_patch_size, image_noise_mask) + else: + ( + x, + cap_feats, + x_size, + x_pos_ids, + cap_pos_ids, + x_pad_mask, + cap_pad_mask, + ) = self.patchify_and_embed(x, cap_feats, patch_size, f_patch_size) + x_pos_offsets = x_noise_mask = cap_noise_mask = siglip_noise_mask = None + + # X embed & refine + x_seqlens = [len(xi) for xi in x] + x = self.all_x_embedder[f"{patch_size}-{f_patch_size}"](torch.cat(x, dim=0)) # embed + x, x_freqs, x_mask, _, x_noise_tensor = self._prepare_sequence( + list(x.split(x_seqlens, dim=0)), x_pos_ids, x_pad_mask, self.x_pad_token, x_noise_mask, device + ) + + for layer in self.noise_refiner: + x = ( + self._gradient_checkpointing_func( + layer, x, x_mask, x_freqs, adaln_input, x_noise_tensor, t_noisy, t_clean + ) + if torch.is_grad_enabled() and self.gradient_checkpointing + else layer(x, x_mask, x_freqs, adaln_input, x_noise_tensor, t_noisy, t_clean) + ) + + # Cap embed & refine + cap_seqlens = [len(ci) for ci in cap_feats] + cap_feats = self.cap_embedder(torch.cat(cap_feats, dim=0)) # embed + cap_feats, cap_freqs, cap_mask, _, _ = self._prepare_sequence( + list(cap_feats.split(cap_seqlens, dim=0)), cap_pos_ids, cap_pad_mask, self.cap_pad_token, None, device + ) + + for layer in self.context_refiner: + cap_feats = ( + self._gradient_checkpointing_func(layer, cap_feats, cap_mask, cap_freqs) + if torch.is_grad_enabled() and self.gradient_checkpointing + else layer(cap_feats, cap_mask, cap_freqs) + ) + + # Siglip embed & refine + siglip_seqlens = siglip_freqs = None + if omni_mode and siglip_feats[0] is not None and self.siglip_embedder is not None: + siglip_seqlens = [len(si) for si in siglip_feats] + siglip_feats = self.siglip_embedder(torch.cat(siglip_feats, dim=0)) # embed + siglip_feats, siglip_freqs, siglip_mask, _, _ = self._prepare_sequence( + list(siglip_feats.split(siglip_seqlens, dim=0)), + siglip_pos_ids, + siglip_pad_mask, + self.siglip_pad_token, + None, + device, + ) + + for layer in self.siglip_refiner: + siglip_feats = ( + self._gradient_checkpointing_func(layer, siglip_feats, siglip_mask, siglip_freqs) + if torch.is_grad_enabled() and self.gradient_checkpointing + else layer(siglip_feats, siglip_mask, siglip_freqs) + ) + + # Unified sequence + unified, unified_freqs, unified_mask, unified_noise_tensor = self._build_unified_sequence( + x, + x_freqs, + x_seqlens, + x_noise_mask, + cap_feats, + cap_freqs, + cap_seqlens, + cap_noise_mask, + siglip_feats, + siglip_freqs, + siglip_seqlens, + siglip_noise_mask, + omni_mode, + device, + ) + + # Main transformer layers + for layer_idx, layer in enumerate(self.layers): + unified = ( + self._gradient_checkpointing_func( + layer, unified, unified_mask, unified_freqs, adaln_input, unified_noise_tensor, t_noisy, t_clean + ) + if torch.is_grad_enabled() and self.gradient_checkpointing + else layer(unified, unified_mask, unified_freqs, adaln_input, unified_noise_tensor, t_noisy, t_clean) + ) + if controlnet_block_samples is not None and layer_idx in controlnet_block_samples: + unified = unified + controlnet_block_samples[layer_idx] + + unified = ( + self.all_final_layer[f"{patch_size}-{f_patch_size}"]( + unified, noise_mask=unified_noise_tensor, c_noisy=t_noisy, c_clean=t_clean + ) + if omni_mode + else self.all_final_layer[f"{patch_size}-{f_patch_size}"](unified, c=adaln_input) + ) + + # Unpatchify + x = self.unpatchify(list(unified.unbind(dim=0)), x_size, patch_size, f_patch_size, x_pos_offsets) + + return (x,) if not return_dict else Transformer2DModelOutput(sample=x) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_1d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_1d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb52f259a26a96d9143d64164288d1aff1c06e0f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_1d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2267e904737f01ad4f33a637d4f6ccf144ab344 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d_blocks.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d_blocks.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96c0fc8850b94249a09579585b79c2169863bf03 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d_blocks.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d_blocks_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d_blocks_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac88e4b8d70c9564226995a2929e3c77993ed0f3 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_2d_blocks_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_spatio_temporal_condition.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_spatio_temporal_condition.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b30a90fd7fb49a2e3cdc6a4811cdccbd33505f5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/unets/__pycache__/unet_spatio_temporal_condition.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/upsampling.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/upsampling.py new file mode 100644 index 0000000000000000000000000000000000000000..cd39862873030512c9e5e1eca0f2c9e3e2debdfd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/upsampling.py @@ -0,0 +1,515 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import deprecate +from ..utils.import_utils import is_torch_version +from .normalization import RMSNorm + + +class Upsample1D(nn.Module): + """A 1D upsampling layer with an optional convolution. + + Parameters: + channels (`int`): + number of channels in the inputs and outputs. + use_conv (`bool`, default `False`): + option to use a convolution. + use_conv_transpose (`bool`, default `False`): + option to use a convolution transpose. + out_channels (`int`, optional): + number of output channels. Defaults to `channels`. + name (`str`, default `conv`): + name of the upsampling 1D layer. + """ + + def __init__( + self, + channels: int, + use_conv: bool = False, + use_conv_transpose: bool = False, + out_channels: int | None = None, + name: str = "conv", + ): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + + self.conv = None + if use_conv_transpose: + self.conv = nn.ConvTranspose1d(channels, self.out_channels, 4, 2, 1) + elif use_conv: + self.conv = nn.Conv1d(self.channels, self.out_channels, 3, padding=1) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + assert inputs.shape[1] == self.channels + if self.use_conv_transpose: + return self.conv(inputs) + + outputs = F.interpolate(inputs, scale_factor=2.0, mode="nearest") + + if self.use_conv: + outputs = self.conv(outputs) + + return outputs + + +class Upsample2D(nn.Module): + """A 2D upsampling layer with an optional convolution. + + Parameters: + channels (`int`): + number of channels in the inputs and outputs. + use_conv (`bool`, default `False`): + option to use a convolution. + use_conv_transpose (`bool`, default `False`): + option to use a convolution transpose. + out_channels (`int`, optional): + number of output channels. Defaults to `channels`. + name (`str`, default `conv`): + name of the upsampling 2D layer. + """ + + def __init__( + self, + channels: int, + use_conv: bool = False, + use_conv_transpose: bool = False, + out_channels: int | None = None, + name: str = "conv", + kernel_size: int | None = None, + padding=1, + norm_type=None, + eps=None, + elementwise_affine=None, + bias=True, + interpolate=True, + ): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + self.interpolate = interpolate + + if norm_type == "ln_norm": + self.norm = nn.LayerNorm(channels, eps, elementwise_affine) + elif norm_type == "rms_norm": + self.norm = RMSNorm(channels, eps, elementwise_affine) + elif norm_type is None: + self.norm = None + else: + raise ValueError(f"unknown norm_type: {norm_type}") + + conv = None + if use_conv_transpose: + if kernel_size is None: + kernel_size = 4 + conv = nn.ConvTranspose2d( + channels, self.out_channels, kernel_size=kernel_size, stride=2, padding=padding, bias=bias + ) + elif use_conv: + if kernel_size is None: + kernel_size = 3 + conv = nn.Conv2d(self.channels, self.out_channels, kernel_size=kernel_size, padding=padding, bias=bias) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.conv = conv + else: + self.Conv2d_0 = conv + + def forward(self, hidden_states: torch.Tensor, output_size: int | None = None, *args, **kwargs) -> torch.Tensor: + if len(args) > 0 or kwargs.get("scale", None) is not None: + deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." + deprecate("scale", "1.0.0", deprecation_message) + + assert hidden_states.shape[1] == self.channels + + if self.norm is not None: + hidden_states = self.norm(hidden_states.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) + + if self.use_conv_transpose: + return self.conv(hidden_states) + + # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 until PyTorch 2.1 + # https://github.com/pytorch/pytorch/issues/86679#issuecomment-1783978767 + dtype = hidden_states.dtype + if dtype == torch.bfloat16 and is_torch_version("<", "2.1"): + hidden_states = hidden_states.to(torch.float32) + + # upsample_nearest_nhwc fails with large batch sizes. see https://github.com/huggingface/diffusers/issues/984 + if hidden_states.shape[0] >= 64: + hidden_states = hidden_states.contiguous() + + # if `output_size` is passed we force the interpolation output + # size and do not make use of `scale_factor=2` + if self.interpolate: + # upsample_nearest_nhwc also fails when the number of output elements is large + # https://github.com/pytorch/pytorch/issues/141831 + scale_factor = ( + 2 if output_size is None else max([f / s for f, s in zip(output_size, hidden_states.shape[-2:])]) + ) + if hidden_states.numel() * scale_factor > pow(2, 31): + hidden_states = hidden_states.contiguous() + + if output_size is None: + hidden_states = F.interpolate(hidden_states, scale_factor=2.0, mode="nearest") + else: + hidden_states = F.interpolate(hidden_states, size=output_size, mode="nearest") + + # Cast back to original dtype + if dtype == torch.bfloat16 and is_torch_version("<", "2.1"): + hidden_states = hidden_states.to(dtype) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if self.use_conv: + if self.name == "conv": + hidden_states = self.conv(hidden_states) + else: + hidden_states = self.Conv2d_0(hidden_states) + + return hidden_states + + +class FirUpsample2D(nn.Module): + """A 2D FIR upsampling layer with an optional convolution. + + Parameters: + channels (`int`, optional): + number of channels in the inputs and outputs. + use_conv (`bool`, default `False`): + option to use a convolution. + out_channels (`int`, optional): + number of output channels. Defaults to `channels`. + fir_kernel (`tuple`, default `(1, 3, 3, 1)`): + kernel for the FIR filter. + """ + + def __init__( + self, + channels: int | None = None, + out_channels: int | None = None, + use_conv: bool = False, + fir_kernel: tuple[int, int, int, int] = (1, 3, 3, 1), + ): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2d(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.use_conv = use_conv + self.fir_kernel = fir_kernel + self.out_channels = out_channels + + def _upsample_2d( + self, + hidden_states: torch.Tensor, + weight: torch.Tensor | None = None, + kernel: torch.Tensor | None = None, + factor: int = 2, + gain: float = 1, + ) -> torch.Tensor: + """Fused `upsample_2d()` followed by `Conv2d()`. + + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of + arbitrary order. + + Args: + hidden_states (`torch.Tensor`): + Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + weight (`torch.Tensor`, *optional*): + Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. Grouped convolution can be + performed by `inChannels = x.shape[0] // numGroups`. + kernel (`torch.Tensor`, *optional*): + FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which + corresponds to nearest-neighbor upsampling. + factor (`int`, *optional*): Integer upsampling factor (default: 2). + gain (`float`, *optional*): Scaling factor for signal magnitude (default: 1.0). + + Returns: + output (`torch.Tensor`): + Tensor of the shape `[N, C, H * factor, W * factor]` or `[N, H * factor, W * factor, C]`, and same + datatype as `hidden_states`. + """ + + assert isinstance(factor, int) and factor >= 1 + + # Setup filter kernel. + if kernel is None: + kernel = [1] * factor + + # setup kernel + kernel = torch.tensor(kernel, dtype=torch.float32) + if kernel.ndim == 1: + kernel = torch.outer(kernel, kernel) + kernel /= torch.sum(kernel) + + kernel = kernel * (gain * (factor**2)) + + if self.use_conv: + convH = weight.shape[2] + convW = weight.shape[3] + inC = weight.shape[1] + + pad_value = (kernel.shape[0] - factor) - (convW - 1) + + stride = (factor, factor) + # Determine data dimensions. + output_shape = ( + (hidden_states.shape[2] - 1) * factor + convH, + (hidden_states.shape[3] - 1) * factor + convW, + ) + output_padding = ( + output_shape[0] - (hidden_states.shape[2] - 1) * stride[0] - convH, + output_shape[1] - (hidden_states.shape[3] - 1) * stride[1] - convW, + ) + assert output_padding[0] >= 0 and output_padding[1] >= 0 + num_groups = hidden_states.shape[1] // inC + + # Transpose weights. + weight = torch.reshape(weight, (num_groups, -1, inC, convH, convW)) + weight = torch.flip(weight, dims=[3, 4]).permute(0, 2, 1, 3, 4) + weight = torch.reshape(weight, (num_groups * inC, -1, convH, convW)) + + inverse_conv = F.conv_transpose2d( + hidden_states, + weight, + stride=stride, + output_padding=output_padding, + padding=0, + ) + + output = upfirdn2d_native( + inverse_conv, + torch.tensor(kernel, device=inverse_conv.device), + pad=((pad_value + 1) // 2 + factor - 1, pad_value // 2 + 1), + ) + else: + pad_value = kernel.shape[0] - factor + output = upfirdn2d_native( + hidden_states, + torch.tensor(kernel, device=hidden_states.device), + up=factor, + pad=((pad_value + 1) // 2 + factor - 1, pad_value // 2), + ) + + return output + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + if self.use_conv: + height = self._upsample_2d(hidden_states, self.Conv2d_0.weight, kernel=self.fir_kernel) + height = height + self.Conv2d_0.bias.reshape(1, -1, 1, 1) + else: + height = self._upsample_2d(hidden_states, kernel=self.fir_kernel, factor=2) + + return height + + +class KUpsample2D(nn.Module): + r"""A 2D K-upsampling layer. + + Parameters: + pad_mode (`str`, *optional*, default to `"reflect"`): the padding mode to use. + """ + + def __init__(self, pad_mode: str = "reflect"): + super().__init__() + self.pad_mode = pad_mode + kernel_1d = torch.tensor([[1 / 8, 3 / 8, 3 / 8, 1 / 8]]) * 2 + self.pad = kernel_1d.shape[1] // 2 - 1 + self.register_buffer("kernel", kernel_1d.T @ kernel_1d, persistent=False) + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + inputs = F.pad(inputs, ((self.pad + 1) // 2,) * 4, self.pad_mode) + weight = inputs.new_zeros( + [ + inputs.shape[1], + inputs.shape[1], + self.kernel.shape[0], + self.kernel.shape[1], + ] + ) + indices = torch.arange(inputs.shape[1], device=inputs.device) + kernel = self.kernel.to(weight)[None, :].expand(inputs.shape[1], -1, -1) + weight[indices, indices] = kernel + return F.conv_transpose2d(inputs, weight, stride=2, padding=self.pad * 2 + 1) + + +class CogVideoXUpsample3D(nn.Module): + r""" + A 3D Upsample layer using in CogVideoX by Tsinghua University & ZhipuAI # Todo: Wait for paper release. + + Args: + in_channels (`int`): + Number of channels in the input image. + out_channels (`int`): + Number of channels produced by the convolution. + kernel_size (`int`, defaults to `3`): + Size of the convolving kernel. + stride (`int`, defaults to `1`): + Stride of the convolution. + padding (`int`, defaults to `1`): + Padding added to all four sides of the input. + compress_time (`bool`, defaults to `False`): + Whether or not to compress the time dimension. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + stride: int = 1, + padding: int = 1, + compress_time: bool = False, + ) -> None: + super().__init__() + + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding) + self.compress_time = compress_time + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + if self.compress_time: + if inputs.shape[2] > 1 and inputs.shape[2] % 2 == 1: + # split first frame + x_first, x_rest = inputs[:, :, 0], inputs[:, :, 1:] + + x_first = F.interpolate(x_first, scale_factor=2.0) + x_rest = F.interpolate(x_rest, scale_factor=2.0) + x_first = x_first[:, :, None, :, :] + inputs = torch.cat([x_first, x_rest], dim=2) + elif inputs.shape[2] > 1: + inputs = F.interpolate(inputs, scale_factor=2.0) + else: + inputs = inputs.squeeze(2) + inputs = F.interpolate(inputs, scale_factor=2.0) + inputs = inputs[:, :, None, :, :] + else: + # only interpolate 2D + b, c, t, h, w = inputs.shape + inputs = inputs.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w) + inputs = F.interpolate(inputs, scale_factor=2.0) + inputs = inputs.reshape(b, t, c, *inputs.shape[2:]).permute(0, 2, 1, 3, 4) + + b, c, t, h, w = inputs.shape + inputs = inputs.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w) + inputs = self.conv(inputs) + inputs = inputs.reshape(b, t, *inputs.shape[1:]).permute(0, 2, 1, 3, 4) + + return inputs + + +def upfirdn2d_native( + tensor: torch.Tensor, + kernel: torch.Tensor, + up: int = 1, + down: int = 1, + pad: tuple[int, int] = (0, 0), +) -> torch.Tensor: + up_x = up_y = up + down_x = down_y = down + pad_x0 = pad_y0 = pad[0] + pad_x1 = pad_y1 = pad[1] + + _, channel, in_h, in_w = tensor.shape + tensor = tensor.reshape(-1, in_h, in_w, 1) + + _, in_h, in_w, minor = tensor.shape + kernel_h, kernel_w = kernel.shape + + out = tensor.view(-1, in_h, 1, in_w, 1, minor) + out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.view(-1, in_h * up_y, in_w * up_x, minor) + + out = F.pad(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) + out = out.to(tensor.device) # Move back to mps if necessary + out = out[ + :, + max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0), + max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0), + :, + ] + + out = out.permute(0, 3, 1, 2) + out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) + out = F.conv2d(out, w) + out = out.reshape( + -1, + minor, + in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, + in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, + ) + out = out.permute(0, 2, 3, 1) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.view(-1, channel, out_h, out_w) + + +def upsample_2d( + hidden_states: torch.Tensor, + kernel: torch.Tensor | None = None, + factor: int = 2, + gain: float = 1, +) -> torch.Tensor: + r"""Upsample2D a batch of 2D images with the given filter. + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and upsamples each image with the given + filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified + `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its shape is + a: multiple of the upsampling factor. + + Args: + hidden_states (`torch.Tensor`): + Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + kernel (`torch.Tensor`, *optional*): + FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which + corresponds to nearest-neighbor upsampling. + factor (`int`, *optional*, default to `2`): + Integer upsampling factor. + gain (`float`, *optional*, default to `1.0`): + Scaling factor for signal magnitude (default: 1.0). + + Returns: + output (`torch.Tensor`): + Tensor of the shape `[N, C, H * factor, W * factor]` + """ + assert isinstance(factor, int) and factor >= 1 + if kernel is None: + kernel = [1] * factor + + kernel = torch.tensor(kernel, dtype=torch.float32) + if kernel.ndim == 1: + kernel = torch.outer(kernel, kernel) + kernel /= torch.sum(kernel) + + kernel = kernel * (gain * (factor**2)) + pad_value = kernel.shape[0] - factor + output = upfirdn2d_native( + hidden_states, + kernel.to(device=hidden_states.device), + up=factor, + pad=((pad_value + 1) // 2 + factor - 1, pad_value // 2), + ) + return output diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/vae_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/vae_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..c7042840e4e027e29069acbe1a430c2006e128ac --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/vae_flax.py @@ -0,0 +1,927 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# JAX implementation of VQGAN from taming-transformers https://github.com/CompVis/taming-transformers + +import math +from functools import partial + +import flax +import flax.linen as nn +import jax +import jax.numpy as jnp +from flax.core.frozen_dict import FrozenDict + +from ..configuration_utils import ConfigMixin, flax_register_to_config +from ..utils import BaseOutput, logging +from .modeling_flax_utils import FlaxModelMixin + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class FlaxDecoderOutput(BaseOutput): + """ + Output of decoding method. + + Args: + sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)`): + The decoded output sample from the last layer of the model. + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + The `dtype` of the parameters. + """ + + sample: jnp.ndarray + + +@flax.struct.dataclass +class FlaxAutoencoderKLOutput(BaseOutput): + """ + Output of AutoencoderKL encoding method. + + Args: + latent_dist (`FlaxDiagonalGaussianDistribution`): + Encoded outputs of `Encoder` represented as the mean and logvar of `FlaxDiagonalGaussianDistribution`. + `FlaxDiagonalGaussianDistribution` allows for sampling latents from the distribution. + """ + + latent_dist: "FlaxDiagonalGaussianDistribution" + + +class FlaxUpsample2D(nn.Module): + """ + Flax implementation of 2D Upsample layer + + Args: + in_channels (`int`): + Input channels + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + in_channels: int + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + self.conv = nn.Conv( + self.in_channels, + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + def __call__(self, hidden_states): + batch, height, width, channels = hidden_states.shape + hidden_states = jax.image.resize( + hidden_states, + shape=(batch, height * 2, width * 2, channels), + method="nearest", + ) + hidden_states = self.conv(hidden_states) + return hidden_states + + +class FlaxDownsample2D(nn.Module): + """ + Flax implementation of 2D Downsample layer + + Args: + in_channels (`int`): + Input channels + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + in_channels: int + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + self.conv = nn.Conv( + self.in_channels, + kernel_size=(3, 3), + strides=(2, 2), + padding="VALID", + dtype=self.dtype, + ) + + def __call__(self, hidden_states): + pad = ((0, 0), (0, 1), (0, 1), (0, 0)) # pad height and width dim + hidden_states = jnp.pad(hidden_states, pad_width=pad) + hidden_states = self.conv(hidden_states) + return hidden_states + + +class FlaxResnetBlock2D(nn.Module): + """ + Flax implementation of 2D Resnet Block. + + Args: + in_channels (`int`): + Input channels + out_channels (`int`): + Output channels + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + groups (:obj:`int`, *optional*, defaults to `32`): + The number of groups to use for group norm. + use_nin_shortcut (:obj:`bool`, *optional*, defaults to `None`): + Whether to use `nin_shortcut`. This activates a new layer inside ResNet block + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + in_channels: int + out_channels: int = None + dropout: float = 0.0 + groups: int = 32 + use_nin_shortcut: bool = None + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + out_channels = self.in_channels if self.out_channels is None else self.out_channels + + self.norm1 = nn.GroupNorm(num_groups=self.groups, epsilon=1e-6) + self.conv1 = nn.Conv( + out_channels, + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + self.norm2 = nn.GroupNorm(num_groups=self.groups, epsilon=1e-6) + self.dropout_layer = nn.Dropout(self.dropout) + self.conv2 = nn.Conv( + out_channels, + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + use_nin_shortcut = self.in_channels != out_channels if self.use_nin_shortcut is None else self.use_nin_shortcut + + self.conv_shortcut = None + if use_nin_shortcut: + self.conv_shortcut = nn.Conv( + out_channels, + kernel_size=(1, 1), + strides=(1, 1), + padding="VALID", + dtype=self.dtype, + ) + + def __call__(self, hidden_states, deterministic=True): + residual = hidden_states + hidden_states = self.norm1(hidden_states) + hidden_states = nn.swish(hidden_states) + hidden_states = self.conv1(hidden_states) + + hidden_states = self.norm2(hidden_states) + hidden_states = nn.swish(hidden_states) + hidden_states = self.dropout_layer(hidden_states, deterministic) + hidden_states = self.conv2(hidden_states) + + if self.conv_shortcut is not None: + residual = self.conv_shortcut(residual) + + return hidden_states + residual + + +class FlaxAttentionBlock(nn.Module): + r""" + Flax Convolutional based multi-head attention block for diffusion-based VAE. + + Parameters: + channels (:obj:`int`): + Input channels + num_head_channels (:obj:`int`, *optional*, defaults to `None`): + Number of attention heads + num_groups (:obj:`int`, *optional*, defaults to `32`): + The number of groups to use for group norm + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + + """ + + channels: int + num_head_channels: int = None + num_groups: int = 32 + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + self.num_heads = self.channels // self.num_head_channels if self.num_head_channels is not None else 1 + + dense = partial(nn.Dense, self.channels, dtype=self.dtype) + + self.group_norm = nn.GroupNorm(num_groups=self.num_groups, epsilon=1e-6) + self.query, self.key, self.value = dense(), dense(), dense() + self.proj_attn = dense() + + def transpose_for_scores(self, projection): + new_projection_shape = projection.shape[:-1] + (self.num_heads, -1) + # move heads to 2nd position (B, T, H * D) -> (B, T, H, D) + new_projection = projection.reshape(new_projection_shape) + # (B, T, H, D) -> (B, H, T, D) + new_projection = jnp.transpose(new_projection, (0, 2, 1, 3)) + return new_projection + + def __call__(self, hidden_states): + residual = hidden_states + batch, height, width, channels = hidden_states.shape + + hidden_states = self.group_norm(hidden_states) + + hidden_states = hidden_states.reshape((batch, height * width, channels)) + + query = self.query(hidden_states) + key = self.key(hidden_states) + value = self.value(hidden_states) + + # transpose + query = self.transpose_for_scores(query) + key = self.transpose_for_scores(key) + value = self.transpose_for_scores(value) + + # compute attentions + scale = 1 / math.sqrt(math.sqrt(self.channels / self.num_heads)) + attn_weights = jnp.einsum("...qc,...kc->...qk", query * scale, key * scale) + attn_weights = nn.softmax(attn_weights, axis=-1) + + # attend to values + hidden_states = jnp.einsum("...kc,...qk->...qc", value, attn_weights) + + hidden_states = jnp.transpose(hidden_states, (0, 2, 1, 3)) + new_hidden_states_shape = hidden_states.shape[:-2] + (self.channels,) + hidden_states = hidden_states.reshape(new_hidden_states_shape) + + hidden_states = self.proj_attn(hidden_states) + hidden_states = hidden_states.reshape((batch, height, width, channels)) + hidden_states = hidden_states + residual + return hidden_states + + +class FlaxDownEncoderBlock2D(nn.Module): + r""" + Flax Resnet blocks-based Encoder block for diffusion-based VAE. + + Parameters: + in_channels (:obj:`int`): + Input channels + out_channels (:obj:`int`): + Output channels + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + num_layers (:obj:`int`, *optional*, defaults to 1): + Number of Resnet layer block + resnet_groups (:obj:`int`, *optional*, defaults to `32`): + The number of groups to use for the Resnet block group norm + add_downsample (:obj:`bool`, *optional*, defaults to `True`): + Whether to add downsample layer + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + in_channels: int + out_channels: int + dropout: float = 0.0 + num_layers: int = 1 + resnet_groups: int = 32 + add_downsample: bool = True + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + resnets = [] + for i in range(self.num_layers): + in_channels = self.in_channels if i == 0 else self.out_channels + + res_block = FlaxResnetBlock2D( + in_channels=in_channels, + out_channels=self.out_channels, + dropout=self.dropout, + groups=self.resnet_groups, + dtype=self.dtype, + ) + resnets.append(res_block) + self.resnets = resnets + + if self.add_downsample: + self.downsamplers_0 = FlaxDownsample2D(self.out_channels, dtype=self.dtype) + + def __call__(self, hidden_states, deterministic=True): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, deterministic=deterministic) + + if self.add_downsample: + hidden_states = self.downsamplers_0(hidden_states) + + return hidden_states + + +class FlaxUpDecoderBlock2D(nn.Module): + r""" + Flax Resnet blocks-based Decoder block for diffusion-based VAE. + + Parameters: + in_channels (:obj:`int`): + Input channels + out_channels (:obj:`int`): + Output channels + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + num_layers (:obj:`int`, *optional*, defaults to 1): + Number of Resnet layer block + resnet_groups (:obj:`int`, *optional*, defaults to `32`): + The number of groups to use for the Resnet block group norm + add_upsample (:obj:`bool`, *optional*, defaults to `True`): + Whether to add upsample layer + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + in_channels: int + out_channels: int + dropout: float = 0.0 + num_layers: int = 1 + resnet_groups: int = 32 + add_upsample: bool = True + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + resnets = [] + for i in range(self.num_layers): + in_channels = self.in_channels if i == 0 else self.out_channels + res_block = FlaxResnetBlock2D( + in_channels=in_channels, + out_channels=self.out_channels, + dropout=self.dropout, + groups=self.resnet_groups, + dtype=self.dtype, + ) + resnets.append(res_block) + + self.resnets = resnets + + if self.add_upsample: + self.upsamplers_0 = FlaxUpsample2D(self.out_channels, dtype=self.dtype) + + def __call__(self, hidden_states, deterministic=True): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, deterministic=deterministic) + + if self.add_upsample: + hidden_states = self.upsamplers_0(hidden_states) + + return hidden_states + + +class FlaxUNetMidBlock2D(nn.Module): + r""" + Flax Unet Mid-Block module. + + Parameters: + in_channels (:obj:`int`): + Input channels + dropout (:obj:`float`, *optional*, defaults to 0.0): + Dropout rate + num_layers (:obj:`int`, *optional*, defaults to 1): + Number of Resnet layer block + resnet_groups (:obj:`int`, *optional*, defaults to `32`): + The number of groups to use for the Resnet and Attention block group norm + num_attention_heads (:obj:`int`, *optional*, defaults to `1`): + Number of attention heads for each attention block + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + in_channels: int + dropout: float = 0.0 + num_layers: int = 1 + resnet_groups: int = 32 + num_attention_heads: int = 1 + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + resnet_groups = self.resnet_groups if self.resnet_groups is not None else min(self.in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + FlaxResnetBlock2D( + in_channels=self.in_channels, + out_channels=self.in_channels, + dropout=self.dropout, + groups=resnet_groups, + dtype=self.dtype, + ) + ] + + attentions = [] + + for _ in range(self.num_layers): + attn_block = FlaxAttentionBlock( + channels=self.in_channels, + num_head_channels=self.num_attention_heads, + num_groups=resnet_groups, + dtype=self.dtype, + ) + attentions.append(attn_block) + + res_block = FlaxResnetBlock2D( + in_channels=self.in_channels, + out_channels=self.in_channels, + dropout=self.dropout, + groups=resnet_groups, + dtype=self.dtype, + ) + resnets.append(res_block) + + self.resnets = resnets + self.attentions = attentions + + def __call__(self, hidden_states, deterministic=True): + hidden_states = self.resnets[0](hidden_states, deterministic=deterministic) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + hidden_states = attn(hidden_states) + hidden_states = resnet(hidden_states, deterministic=deterministic) + + return hidden_states + + +class FlaxEncoder(nn.Module): + r""" + Flax Implementation of VAE Encoder. + + This model is a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module) + subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to + general usage and behavior. + + Finally, this model supports inherent JAX features such as: + - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit) + - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation) + - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap) + - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap) + + Parameters: + in_channels (:obj:`int`, *optional*, defaults to 3): + Input channels + out_channels (:obj:`int`, *optional*, defaults to 3): + Output channels + down_block_types (:obj:`tuple[str]`, *optional*, defaults to `(DownEncoderBlock2D)`): + DownEncoder block type + block_out_channels (:obj:`tuple[str]`, *optional*, defaults to `(64,)`): + tuple[ containing the number of output channels for each block + layers_per_block (:obj:`int`, *optional*, defaults to `2`): + Number of Resnet layer for each block + norm_num_groups (:obj:`int`, *optional*, defaults to `32`): + norm num group + act_fn (:obj:`str`, *optional*, defaults to `silu`): + Activation function + double_z (:obj:`bool`, *optional*, defaults to `False`): + Whether to double the last output channels + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + Parameters `dtype` + """ + + in_channels: int = 3 + out_channels: int = 3 + down_block_types: tuple[str, ...] = ("DownEncoderBlock2D",) + block_out_channels: tuple[int, ...] = (64,) + layers_per_block: int = 2 + norm_num_groups: int = 32 + act_fn: str = "silu" + double_z: bool = False + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + block_out_channels = self.block_out_channels + # in + self.conv_in = nn.Conv( + block_out_channels[0], + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + # downsampling + down_blocks = [] + output_channel = block_out_channels[0] + for i, _ in enumerate(self.down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = FlaxDownEncoderBlock2D( + in_channels=input_channel, + out_channels=output_channel, + num_layers=self.layers_per_block, + resnet_groups=self.norm_num_groups, + add_downsample=not is_final_block, + dtype=self.dtype, + ) + down_blocks.append(down_block) + self.down_blocks = down_blocks + + # middle + self.mid_block = FlaxUNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_groups=self.norm_num_groups, + num_attention_heads=None, + dtype=self.dtype, + ) + + # end + conv_out_channels = 2 * self.out_channels if self.double_z else self.out_channels + self.conv_norm_out = nn.GroupNorm(num_groups=self.norm_num_groups, epsilon=1e-6) + self.conv_out = nn.Conv( + conv_out_channels, + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + def __call__(self, sample, deterministic: bool = True): + # in + sample = self.conv_in(sample) + + # downsampling + for block in self.down_blocks: + sample = block(sample, deterministic=deterministic) + + # middle + sample = self.mid_block(sample, deterministic=deterministic) + + # end + sample = self.conv_norm_out(sample) + sample = nn.swish(sample) + sample = self.conv_out(sample) + + return sample + + +class FlaxDecoder(nn.Module): + r""" + Flax Implementation of VAE Decoder. + + This model is a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module) + subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to + general usage and behavior. + + Finally, this model supports inherent JAX features such as: + - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit) + - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation) + - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap) + - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap) + + Parameters: + in_channels (:obj:`int`, *optional*, defaults to 3): + Input channels + out_channels (:obj:`int`, *optional*, defaults to 3): + Output channels + up_block_types (:obj:`tuple[str]`, *optional*, defaults to `(UpDecoderBlock2D)`): + UpDecoder block type + block_out_channels (:obj:`tuple[str]`, *optional*, defaults to `(64,)`): + tuple[ containing the number of output channels for each block + layers_per_block (:obj:`int`, *optional*, defaults to `2`): + Number of Resnet layer for each block + norm_num_groups (:obj:`int`, *optional*, defaults to `32`): + norm num group + act_fn (:obj:`str`, *optional*, defaults to `silu`): + Activation function + double_z (:obj:`bool`, *optional*, defaults to `False`): + Whether to double the last output channels + dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): + parameters `dtype` + """ + + in_channels: int = 3 + out_channels: int = 3 + up_block_types: tuple[str, ...] = ("UpDecoderBlock2D",) + block_out_channels: tuple[int, ...] = (64,) + layers_per_block: int = 2 + norm_num_groups: int = 32 + act_fn: str = "silu" + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + block_out_channels = self.block_out_channels + + # z to block_in + self.conv_in = nn.Conv( + block_out_channels[-1], + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + # middle + self.mid_block = FlaxUNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_groups=self.norm_num_groups, + num_attention_heads=None, + dtype=self.dtype, + ) + + # upsampling + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + up_blocks = [] + for i, _ in enumerate(self.up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = FlaxUpDecoderBlock2D( + in_channels=prev_output_channel, + out_channels=output_channel, + num_layers=self.layers_per_block + 1, + resnet_groups=self.norm_num_groups, + add_upsample=not is_final_block, + dtype=self.dtype, + ) + up_blocks.append(up_block) + prev_output_channel = output_channel + + self.up_blocks = up_blocks + + # end + self.conv_norm_out = nn.GroupNorm(num_groups=self.norm_num_groups, epsilon=1e-6) + self.conv_out = nn.Conv( + self.out_channels, + kernel_size=(3, 3), + strides=(1, 1), + padding=((1, 1), (1, 1)), + dtype=self.dtype, + ) + + def __call__(self, sample, deterministic: bool = True): + # z to block_in + sample = self.conv_in(sample) + + # middle + sample = self.mid_block(sample, deterministic=deterministic) + + # upsampling + for block in self.up_blocks: + sample = block(sample, deterministic=deterministic) + + sample = self.conv_norm_out(sample) + sample = nn.swish(sample) + sample = self.conv_out(sample) + + return sample + + +class FlaxDiagonalGaussianDistribution(object): + def __init__(self, parameters, deterministic=False): + # Last axis to account for channels-last + self.mean, self.logvar = jnp.split(parameters, 2, axis=-1) + self.logvar = jnp.clip(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = jnp.exp(0.5 * self.logvar) + self.var = jnp.exp(self.logvar) + if self.deterministic: + self.var = self.std = jnp.zeros_like(self.mean) + + def sample(self, key): + return self.mean + self.std * jax.random.normal(key, self.mean.shape) + + def kl(self, other=None): + if self.deterministic: + return jnp.array([0.0]) + + if other is None: + return 0.5 * jnp.sum(self.mean**2 + self.var - 1.0 - self.logvar, axis=[1, 2, 3]) + + return 0.5 * jnp.sum( + jnp.square(self.mean - other.mean) / other.var + self.var / other.var - 1.0 - self.logvar + other.logvar, + axis=[1, 2, 3], + ) + + def nll(self, sample, axis=[1, 2, 3]): + if self.deterministic: + return jnp.array([0.0]) + + logtwopi = jnp.log(2.0 * jnp.pi) + return 0.5 * jnp.sum(logtwopi + self.logvar + jnp.square(sample - self.mean) / self.var, axis=axis) + + def mode(self): + return self.mean + + +@flax_register_to_config +class FlaxAutoencoderKL(nn.Module, FlaxModelMixin, ConfigMixin): + r""" + Flax implementation of a VAE model with KL loss for decoding latent representations. + + This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it's generic methods + implemented for all models (such as downloading or saving). + + This model is a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module) + subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matter related to its + general usage and behavior. + + Inherent JAX features such as the following are supported: + + - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit) + - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation) + - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap) + - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap) + + Parameters: + in_channels (`int`, *optional*, defaults to 3): + Number of channels in the input image. + out_channels (`int`, *optional*, defaults to 3): + Number of channels in the output. + down_block_types (`tuple[str]`, *optional*, defaults to `(DownEncoderBlock2D)`): + tuple[ of downsample block types. + up_block_types (`tuple[str]`, *optional*, defaults to `(UpDecoderBlock2D)`): + tuple[ of upsample block types. + block_out_channels (`tuple[str]`, *optional*, defaults to `(64,)`): + tuple[ of block output channels. + layers_per_block (`int`, *optional*, defaults to `2`): + Number of ResNet layer for each block. + act_fn (`str`, *optional*, defaults to `silu`): + The activation function to use. + latent_channels (`int`, *optional*, defaults to `4`): + Number of channels in the latent space. + norm_num_groups (`int`, *optional*, defaults to `32`): + The number of groups for normalization. + sample_size (`int`, *optional*, defaults to 32): + Sample input size. + scaling_factor (`float`, *optional*, defaults to 0.18215): + The component-wise standard deviation of the trained latent space computed using the first batch of the + training set. This is used to scale the latent space to have unit variance when training the diffusion + model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the + diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z = 1 + / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution Image + Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper. + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + The `dtype` of the parameters. + """ + + in_channels: int = 3 + out_channels: int = 3 + down_block_types: tuple[str, ...] = ("DownEncoderBlock2D",) + up_block_types: tuple[str, ...] = ("UpDecoderBlock2D",) + block_out_channels: tuple[int, ...] = (64,) + layers_per_block: int = 1 + act_fn: str = "silu" + latent_channels: int = 4 + norm_num_groups: int = 32 + sample_size: int = 32 + scaling_factor: float = 0.18215 + dtype: jnp.dtype = jnp.float32 + + def setup(self): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + self.encoder = FlaxEncoder( + in_channels=self.config.in_channels, + out_channels=self.config.latent_channels, + down_block_types=self.config.down_block_types, + block_out_channels=self.config.block_out_channels, + layers_per_block=self.config.layers_per_block, + act_fn=self.config.act_fn, + norm_num_groups=self.config.norm_num_groups, + double_z=True, + dtype=self.dtype, + ) + self.decoder = FlaxDecoder( + in_channels=self.config.latent_channels, + out_channels=self.config.out_channels, + up_block_types=self.config.up_block_types, + block_out_channels=self.config.block_out_channels, + layers_per_block=self.config.layers_per_block, + norm_num_groups=self.config.norm_num_groups, + act_fn=self.config.act_fn, + dtype=self.dtype, + ) + self.quant_conv = nn.Conv( + 2 * self.config.latent_channels, + kernel_size=(1, 1), + strides=(1, 1), + padding="VALID", + dtype=self.dtype, + ) + self.post_quant_conv = nn.Conv( + self.config.latent_channels, + kernel_size=(1, 1), + strides=(1, 1), + padding="VALID", + dtype=self.dtype, + ) + + def init_weights(self, rng: jax.Array) -> FrozenDict: + # init input tensors + sample_shape = (1, self.in_channels, self.sample_size, self.sample_size) + sample = jnp.zeros(sample_shape, dtype=jnp.float32) + + params_rng, dropout_rng, gaussian_rng = jax.random.split(rng, 3) + rngs = {"params": params_rng, "dropout": dropout_rng, "gaussian": gaussian_rng} + + return self.init(rngs, sample)["params"] + + def encode(self, sample, deterministic: bool = True, return_dict: bool = True): + sample = jnp.transpose(sample, (0, 2, 3, 1)) + + hidden_states = self.encoder(sample, deterministic=deterministic) + moments = self.quant_conv(hidden_states) + posterior = FlaxDiagonalGaussianDistribution(moments) + + if not return_dict: + return (posterior,) + + return FlaxAutoencoderKLOutput(latent_dist=posterior) + + def decode(self, latents, deterministic: bool = True, return_dict: bool = True): + if latents.shape[-1] != self.config.latent_channels: + latents = jnp.transpose(latents, (0, 2, 3, 1)) + + hidden_states = self.post_quant_conv(latents) + hidden_states = self.decoder(hidden_states, deterministic=deterministic) + + hidden_states = jnp.transpose(hidden_states, (0, 3, 1, 2)) + + if not return_dict: + return (hidden_states,) + + return FlaxDecoderOutput(sample=hidden_states) + + def __call__(self, sample, sample_posterior=False, deterministic: bool = True, return_dict: bool = True): + posterior = self.encode(sample, deterministic=deterministic, return_dict=return_dict) + if sample_posterior: + rng = self.make_rng("gaussian") + hidden_states = posterior.latent_dist.sample(rng) + else: + hidden_states = posterior.latent_dist.mode() + + sample = self.decode(hidden_states, return_dict=return_dict).sample + + if not return_dict: + return (sample,) + + return FlaxDecoderOutput(sample=sample) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/vq_model.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/vq_model.py new file mode 100644 index 0000000000000000000000000000000000000000..63dee5eec5543d495cbe631600679088320a6f34 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/models/vq_model.py @@ -0,0 +1,29 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ..utils import deprecate +from .autoencoders.vq_model import VQEncoderOutput, VQModel + + +class VQEncoderOutput(VQEncoderOutput): + def __init__(self, *args, **kwargs): + deprecation_message = "Importing `VQEncoderOutput` from `diffusers.models.vq_model` is deprecated and this will be removed in a future version. Please use `from diffusers.models.autoencoders.vq_model import VQEncoderOutput`, instead." + deprecate("VQEncoderOutput", "0.31", deprecation_message) + super().__init__(*args, **kwargs) + + +class VQModel(VQModel): + def __init__(self, *args, **kwargs): + deprecation_message = "Importing `VQModel` from `diffusers.models.vq_model` is deprecated and this will be removed in a future version. Please use `from diffusers.models.autoencoders.vq_model import VQModel`, instead." + deprecate("VQModel", "0.31", deprecation_message) + super().__init__(*args, **kwargs) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9bebd8644f7c84cfda735eb1b6b0946c2c25d9f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/__init__.py @@ -0,0 +1,149 @@ +from typing import TYPE_CHECKING + +from ..utils import ( + DIFFUSERS_SLOW_IMPORT, + OptionalDependencyNotAvailable, + _LazyModule, + get_objects_from_module, + is_torch_available, + is_transformers_available, + logging, +) + + +logger = logging.get_logger(__name__) +logger.warning( + "Modular Diffusers is currently an experimental feature under active development. The API is subject to breaking changes in future releases." +) + +# These modules contain pipelines from multiple libraries/frameworks +_dummy_objects = {} +_import_structure = {} + +try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_pt_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_pt_objects)) +else: + _import_structure["modular_pipeline"] = [ + "ModularPipelineBlocks", + "ModularPipeline", + "AutoPipelineBlocks", + "SequentialPipelineBlocks", + "ConditionalPipelineBlocks", + "LoopSequentialPipelineBlocks", + "PipelineState", + "BlockState", + ] + _import_structure["modular_pipeline_utils"] = [ + "ComponentSpec", + "ConfigSpec", + "InputParam", + "OutputParam", + "InsertableDict", + ] + _import_structure["stable_diffusion_xl"] = ["StableDiffusionXLAutoBlocks", "StableDiffusionXLModularPipeline"] + _import_structure["wan"] = [ + "WanBlocks", + "Wan22Blocks", + "WanImage2VideoAutoBlocks", + "Wan22Image2VideoBlocks", + "WanModularPipeline", + "Wan22ModularPipeline", + "WanImage2VideoModularPipeline", + "Wan22Image2VideoModularPipeline", + ] + _import_structure["flux"] = [ + "FluxAutoBlocks", + "FluxModularPipeline", + "FluxKontextAutoBlocks", + "FluxKontextModularPipeline", + ] + _import_structure["flux2"] = [ + "Flux2AutoBlocks", + "Flux2KleinAutoBlocks", + "Flux2KleinBaseAutoBlocks", + "Flux2ModularPipeline", + "Flux2KleinModularPipeline", + "Flux2KleinBaseModularPipeline", + ] + _import_structure["qwenimage"] = [ + "QwenImageAutoBlocks", + "QwenImageModularPipeline", + "QwenImageEditModularPipeline", + "QwenImageEditAutoBlocks", + "QwenImageEditPlusModularPipeline", + "QwenImageEditPlusAutoBlocks", + "QwenImageLayeredModularPipeline", + "QwenImageLayeredAutoBlocks", + ] + _import_structure["z_image"] = [ + "ZImageAutoBlocks", + "ZImageModularPipeline", + ] + _import_structure["components_manager"] = ["ComponentsManager"] + +if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: + try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_pt_objects import * # noqa F403 + else: + from .components_manager import ComponentsManager + from .flux import FluxAutoBlocks, FluxKontextAutoBlocks, FluxKontextModularPipeline, FluxModularPipeline + from .flux2 import ( + Flux2AutoBlocks, + Flux2KleinAutoBlocks, + Flux2KleinBaseAutoBlocks, + Flux2KleinBaseModularPipeline, + Flux2KleinModularPipeline, + Flux2ModularPipeline, + ) + from .modular_pipeline import ( + AutoPipelineBlocks, + BlockState, + ConditionalPipelineBlocks, + LoopSequentialPipelineBlocks, + ModularPipeline, + ModularPipelineBlocks, + PipelineState, + SequentialPipelineBlocks, + ) + from .modular_pipeline_utils import ComponentSpec, ConfigSpec, InputParam, InsertableDict, OutputParam + from .qwenimage import ( + QwenImageAutoBlocks, + QwenImageEditAutoBlocks, + QwenImageEditModularPipeline, + QwenImageEditPlusAutoBlocks, + QwenImageEditPlusModularPipeline, + QwenImageLayeredAutoBlocks, + QwenImageLayeredModularPipeline, + QwenImageModularPipeline, + ) + from .stable_diffusion_xl import StableDiffusionXLAutoBlocks, StableDiffusionXLModularPipeline + from .wan import ( + Wan22Blocks, + Wan22Image2VideoBlocks, + Wan22Image2VideoModularPipeline, + Wan22ModularPipeline, + WanBlocks, + WanImage2VideoAutoBlocks, + WanImage2VideoModularPipeline, + WanModularPipeline, + ) + from .z_image import ZImageAutoBlocks, ZImageModularPipeline +else: + import sys + + sys.modules[__name__] = _LazyModule( + __name__, + globals()["__file__"], + _import_structure, + module_spec=__spec__, + ) + for name, value in _dummy_objects.items(): + setattr(sys.modules[__name__], name, value) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/components_manager.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/components_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..e018381ba8598100a069aeac7be06e5f48b6039e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/components_manager.py @@ -0,0 +1,1109 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import copy +import time +from collections import OrderedDict +from itertools import combinations +from typing import Any + +import torch + +from ..hooks import ModelHook +from ..utils import ( + is_accelerate_available, + logging, +) +from ..utils.torch_utils import get_device + + +if is_accelerate_available(): + from accelerate.hooks import add_hook_to_module, remove_hook_from_module + from accelerate.state import PartialState + from accelerate.utils import send_to_device + from accelerate.utils.memory import clear_device_cache + from accelerate.utils.modeling import convert_file_size_to_int + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class CustomOffloadHook(ModelHook): + """ + A hook that offloads a model on the CPU until its forward pass is called. It ensures the model and its inputs are + on the given device. Optionally offloads other models to the CPU before the forward pass is called. + + Args: + execution_device(`str`, `int` or `torch.device`, *optional*): + The device on which the model should be executed. Will default to the MPS device if it's available, then + GPU 0 if there is a GPU, and finally to the CPU. + """ + + no_grad = False + + def __init__( + self, + execution_device: str | int | torch.device | None = None, + other_hooks: list["UserCustomOffloadHook"] | None = None, + offload_strategy: "AutoOffloadStrategy" | None = None, + ): + self.execution_device = execution_device if execution_device is not None else PartialState().default_device + self.other_hooks = other_hooks + self.offload_strategy = offload_strategy + self.model_id = None + + def set_strategy(self, offload_strategy: "AutoOffloadStrategy"): + self.offload_strategy = offload_strategy + + def add_other_hook(self, hook: "UserCustomOffloadHook"): + """ + Add a hook to the list of hooks to consider for offloading. + """ + if self.other_hooks is None: + self.other_hooks = [] + self.other_hooks.append(hook) + + def init_hook(self, module): + return module.to("cpu") + + def pre_forward(self, module, *args, **kwargs): + if module.device != self.execution_device: + if self.other_hooks is not None: + hooks_to_offload = [hook for hook in self.other_hooks if hook.model.device == self.execution_device] + # offload all other hooks + start_time = time.perf_counter() + if self.offload_strategy is not None: + hooks_to_offload = self.offload_strategy( + hooks=hooks_to_offload, + model_id=self.model_id, + model=module, + execution_device=self.execution_device, + ) + end_time = time.perf_counter() + logger.info( + f" time taken to apply offload strategy for {self.model_id}: {(end_time - start_time):.2f} seconds" + ) + + for hook in hooks_to_offload: + logger.info( + f"moving {self.model_id} to {self.execution_device}, offloading {hook.model_id} to cpu" + ) + hook.offload() + + if hooks_to_offload: + clear_device_cache() + module.to(self.execution_device) + return send_to_device(args, self.execution_device), send_to_device(kwargs, self.execution_device) + + +class UserCustomOffloadHook: + """ + A simple hook grouping a model and a `CustomOffloadHook`, which provides easy APIs for to call the init method of + the hook or remove it entirely. + """ + + def __init__(self, model_id, model, hook): + self.model_id = model_id + self.model = model + self.hook = hook + + def offload(self): + self.hook.init_hook(self.model) + + def attach(self): + add_hook_to_module(self.model, self.hook) + self.hook.model_id = self.model_id + + def remove(self): + remove_hook_from_module(self.model) + self.hook.model_id = None + + def add_other_hook(self, hook: "UserCustomOffloadHook"): + self.hook.add_other_hook(hook) + + +def custom_offload_with_hook( + model_id: str, + model: torch.nn.Module, + execution_device: str | int | torch.device = None, + offload_strategy: "AutoOffloadStrategy" | None = None, +): + hook = CustomOffloadHook(execution_device=execution_device, offload_strategy=offload_strategy) + user_hook = UserCustomOffloadHook(model_id=model_id, model=model, hook=hook) + user_hook.attach() + return user_hook + + +# this is the class that user can customize to implement their own offload strategy +class AutoOffloadStrategy: + """ + Offload strategy that should be used with `CustomOffloadHook` to automatically offload models to the CPU based on + the available memory on the device. + """ + + # YiYi TODO: instead of memory_reserve_margin, we should let user set the maximum_total_models_size to keep on device + # the actual memory usage would be higher. But it's simpler this way, and can be tested + def __init__(self, memory_reserve_margin="3GB"): + self.memory_reserve_margin = convert_file_size_to_int(memory_reserve_margin) + + def __call__(self, hooks, model_id, model, execution_device): + if len(hooks) == 0: + return [] + + try: + current_module_size = model.get_memory_footprint() + except AttributeError: + raise AttributeError(f"Do not know how to compute memory footprint of `{model.__class__.__name__}.") + + device_type = execution_device.type + device_module = getattr(torch, device_type, torch.cuda) + try: + mem_on_device = device_module.mem_get_info(execution_device.index)[0] + except AttributeError: + raise AttributeError(f"Do not know how to obtain obtain memory info for {str(device_module)}.") + + mem_on_device = mem_on_device - self.memory_reserve_margin + if current_module_size < mem_on_device: + return [] + + min_memory_offload = current_module_size - mem_on_device + logger.info(f" search for models to offload in order to free up {min_memory_offload / 1024**3:.2f} GB memory") + + # exlucde models that's not currently loaded on the device + module_sizes = dict( + sorted( + {hook.model_id: hook.model.get_memory_footprint() for hook in hooks}.items(), + key=lambda x: x[1], + reverse=True, + ) + ) + + # YiYi/Dhruv TODO: sort smallest to largest, and offload in that order we would tend to keep the larger models on GPU more often + def search_best_candidate(module_sizes, min_memory_offload): + """ + search the optimal combination of models to offload to cpu, given a dictionary of module sizes and a + minimum memory offload size. the combination of models should add up to the smallest modulesize that is + larger than `min_memory_offload` + """ + model_ids = list(module_sizes.keys()) + best_candidate = None + best_size = float("inf") + for r in range(1, len(model_ids) + 1): + for candidate_model_ids in combinations(model_ids, r): + candidate_size = sum( + module_sizes[candidate_model_id] for candidate_model_id in candidate_model_ids + ) + if candidate_size < min_memory_offload: + continue + else: + if best_candidate is None or candidate_size < best_size: + best_candidate = candidate_model_ids + best_size = candidate_size + + return best_candidate + + best_offload_model_ids = search_best_candidate(module_sizes, min_memory_offload) + + if best_offload_model_ids is None: + # if no combination is found, meaning that we cannot meet the memory requirement, offload all models + logger.warning("no combination of models to offload to cpu is found, offloading all models") + hooks_to_offload = hooks + else: + hooks_to_offload = [hook for hook in hooks if hook.model_id in best_offload_model_ids] + + return hooks_to_offload + + +# utils for display component info in a readable format +# TODO: move to a different file +def summarize_dict_by_value_and_parts(d: dict[str, Any]) -> dict[str, Any]: + """Summarizes a dictionary by finding common prefixes that share the same value. + + For a dictionary with dot-separated keys like: { + 'down_blocks.1.attentions.1.transformer_blocks.0.attn2.processor': [0.6], + 'down_blocks.1.attentions.1.transformer_blocks.1.attn2.processor': [0.6], + 'up_blocks.1.attentions.0.transformer_blocks.0.attn2.processor': [0.3], + } + + Returns a dictionary where keys are the shortest common prefixes and values are their shared values: { + 'down_blocks': [0.6], 'up_blocks': [0.3] + } + """ + # First group by values - convert lists to tuples to make them hashable + value_to_keys = {} + for key, value in d.items(): + value_tuple = tuple(value) if isinstance(value, list) else value + if value_tuple not in value_to_keys: + value_to_keys[value_tuple] = [] + value_to_keys[value_tuple].append(key) + + def find_common_prefix(keys: list[str]) -> str: + """Find the shortest common prefix among a list of dot-separated keys.""" + if not keys: + return "" + if len(keys) == 1: + return keys[0] + + # Split all keys into parts + key_parts = [k.split(".") for k in keys] + + # Find how many initial parts are common + common_length = 0 + for parts in zip(*key_parts): + if len(set(parts)) == 1: # All parts at this position are the same + common_length += 1 + else: + break + + if common_length == 0: + return "" + + # Return the common prefix + return ".".join(key_parts[0][:common_length]) + + # Create summary by finding common prefixes for each value group + summary = {} + for value_tuple, keys in value_to_keys.items(): + prefix = find_common_prefix(keys) + if prefix: # Only add if we found a common prefix + # Convert tuple back to list if it was originally a list + value = list(value_tuple) if isinstance(d[keys[0]], list) else value_tuple + summary[prefix] = value + else: + summary[""] = value # Use empty string if no common prefix + + return summary + + +class ComponentsManager: + """ + A central registry and management system for model components across multiple pipelines. + + [`ComponentsManager`] provides a unified way to register, track, and reuse model components (like UNet, VAE, text + encoders, etc.) across different modular pipelines. It includes features for duplicate detection, memory + management, and component organization. + + > [!WARNING] > This is an experimental feature and is likely to change in the future. + + Example: + ```python + from diffusers import ComponentsManager + + # Create a components manager + cm = ComponentsManager() + + # Add components + cm.add("unet", unet_model, collection="sdxl") + cm.add("vae", vae_model, collection="sdxl") + + # Enable auto offloading + cm.enable_auto_cpu_offload() + + # Retrieve components + unet = cm.get_one(name="unet", collection="sdxl") + ``` + """ + + _available_info_fields = [ + "model_id", + "added_time", + "collection", + "class_name", + "size_gb", + "adapters", + "has_hook", + "execution_device", + "ip_adapter", + "quantization", + ] + + def __init__(self): + self.components = OrderedDict() + # YiYi TODO: can remove once confirm we don't need this in mellon + self.added_time = OrderedDict() # Store when components were added + self.collections = OrderedDict() # collection_name -> set of component_names + self.model_hooks = None + self._auto_offload_enabled = False + + def _lookup_ids( + self, + name: str | None = None, + collection: str | None = None, + load_id: str | None = None, + components: OrderedDict | None = None, + ): + """ + Lookup component_ids by name, collection, or load_id. Does not support pattern matching. Returns a set of + component_ids + """ + if components is None: + components = self.components + + if name: + ids_by_name = set() + for component_id, component in components.items(): + comp_name = self._id_to_name(component_id) + if comp_name == name: + ids_by_name.add(component_id) + else: + ids_by_name = set(components.keys()) + if collection and collection not in self.collections: + return set() + elif collection and collection in self.collections: + ids_by_collection = set() + for component_id, component in components.items(): + if component_id in self.collections[collection]: + ids_by_collection.add(component_id) + else: + ids_by_collection = set(components.keys()) + if load_id: + ids_by_load_id = set() + for name, component in components.items(): + if hasattr(component, "_diffusers_load_id") and component._diffusers_load_id == load_id: + ids_by_load_id.add(name) + else: + ids_by_load_id = set(components.keys()) + + ids = ids_by_name.intersection(ids_by_collection).intersection(ids_by_load_id) + return ids + + @staticmethod + def _id_to_name(component_id: str): + return "_".join(component_id.split("_")[:-1]) + + def add(self, name: str, component: Any, collection: str | None = None): + """ + Add a component to the ComponentsManager. + + Args: + name (str): The name of the component + component (Any): The component to add + collection (str | None): The collection to add the component to + + Returns: + str: The unique component ID, which is generated as "{name}_{id(component)}" where + id(component) is Python's built-in unique identifier for the object + """ + component_id = f"{name}_{id(component)}" + is_new_component = True + + # check for duplicated components + for comp_id, comp in self.components.items(): + if comp == component: + comp_name = self._id_to_name(comp_id) + if comp_name == name: + logger.warning(f"ComponentsManager: component '{name}' already exists as '{comp_id}'") + component_id = comp_id + is_new_component = False + break + else: + logger.warning( + f"ComponentsManager: adding component '{name}' as '{component_id}', but it is duplicate of '{comp_id}'" + f"To remove a duplicate, call `components_manager.remove('')`." + ) + + # check for duplicated load_id and warn (we do not delete for you) + if hasattr(component, "_diffusers_load_id") and component._diffusers_load_id != "null": + components_with_same_load_id = self._lookup_ids(load_id=component._diffusers_load_id) + components_with_same_load_id = [id for id in components_with_same_load_id if id != component_id] + + if components_with_same_load_id: + existing = ", ".join(components_with_same_load_id) + logger.warning( + f"ComponentsManager: adding component '{component_id}', but it has duplicate load_id '{component._diffusers_load_id}' with existing components: {existing}. " + f"To remove a duplicate, call `components_manager.remove('')`." + ) + + # add component to components manager + self.components[component_id] = component + if is_new_component: + self.added_time[component_id] = time.time() + + if collection: + if collection not in self.collections: + self.collections[collection] = set() + if component_id not in self.collections[collection]: + comp_ids_in_collection = self._lookup_ids(name=name, collection=collection) + for comp_id in comp_ids_in_collection: + logger.warning( + f"ComponentsManager: removing existing {name} from collection '{collection}': {comp_id}" + ) + # remove existing component from this collection (if it is not in any other collection, will be removed from ComponentsManager) + self.remove_from_collection(comp_id, collection) + + self.collections[collection].add(component_id) + logger.info( + f"ComponentsManager: added component '{name}' in collection '{collection}': {component_id}" + ) + else: + logger.info(f"ComponentsManager: added component '{name}' as '{component_id}'") + + if self._auto_offload_enabled and is_new_component: + self.enable_auto_cpu_offload(self._auto_offload_device) + + return component_id + + def remove_from_collection(self, component_id: str, collection: str): + """ + Remove a component from a collection. + """ + if collection not in self.collections: + logger.warning(f"Collection '{collection}' not found in ComponentsManager") + return + if component_id not in self.collections[collection]: + logger.warning(f"Component '{component_id}' not found in collection '{collection}'") + return + # remove from the collection + self.collections[collection].remove(component_id) + # check if this component is in any other collection + comp_colls = [coll for coll, comps in self.collections.items() if component_id in comps] + if not comp_colls: # only if no other collection contains this component, remove it + logger.warning(f"ComponentsManager: removing component '{component_id}' from ComponentsManager") + self.remove(component_id) + + def remove(self, component_id: str = None): + """ + Remove a component from the ComponentsManager. + + Args: + component_id (str): The ID of the component to remove + """ + if component_id not in self.components: + logger.warning(f"Component '{component_id}' not found in ComponentsManager") + return + + component = self.components.pop(component_id) + self.added_time.pop(component_id) + + for collection in self.collections: + if component_id in self.collections[collection]: + self.collections[collection].remove(component_id) + + if self._auto_offload_enabled: + self.enable_auto_cpu_offload(self._auto_offload_device) + else: + if isinstance(component, torch.nn.Module): + component.to("cpu") + del component + import gc + + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + if torch.xpu.is_available(): + torch.xpu.empty_cache() + + # YiYi TODO: rename to search_components for now, may remove this method + def search_components( + self, + names: str | None = None, + collection: str | None = None, + load_id: str | None = None, + return_dict_with_names: bool = True, + ): + """ + Search components by name with simple pattern matching. Optionally filter by collection or load_id. + + Args: + names: Component name(s) or pattern(s) + Patterns: + - "unet" : match any component with base name "unet" (e.g., unet_123abc) + - "!unet" : everything except components with base name "unet" + - "unet*" : anything with base name starting with "unet" + - "!unet*" : anything with base name NOT starting with "unet" + - "*unet*" : anything with base name containing "unet" + - "!*unet*" : anything with base name NOT containing "unet" + - "refiner|vae|unet" : anything with base name exactly matching "refiner", "vae", or "unet" + - "!refiner|vae|unet" : anything with base name NOT exactly matching "refiner", "vae", or "unet" + - "unet*|vae*" : anything with base name starting with "unet" OR starting with "vae" + collection: Optional collection to filter by + load_id: Optional load_id to filter by + return_dict_with_names: + If True, returns a dictionary with component names as keys, throw an error if + multiple components with the same name are found If False, returns a dictionary + with component IDs as keys + + Returns: + Dictionary mapping component names to components if return_dict_with_names=True, or a dictionary mapping + component IDs to components if return_dict_with_names=False + """ + + # select components based on collection and load_id filters + selected_ids = self._lookup_ids(collection=collection, load_id=load_id) + components = {k: self.components[k] for k in selected_ids} + + def get_return_dict(components, return_dict_with_names): + """ + Create a dictionary mapping component names to components if return_dict_with_names=True, or a dictionary + mapping component IDs to components if return_dict_with_names=False, throw an error if duplicate component + names are found when return_dict_with_names=True + """ + if return_dict_with_names: + dict_to_return = {} + for comp_id, comp in components.items(): + comp_name = self._id_to_name(comp_id) + if comp_name in dict_to_return: + raise ValueError( + f"Duplicate component names found in the search results: {comp_name}, please set `return_dict_with_names=False` to return a dictionary with component IDs as keys" + ) + dict_to_return[comp_name] = comp + return dict_to_return + else: + return components + + # if no names are provided, return the filtered components as it is + if names is None: + return get_return_dict(components, return_dict_with_names) + + # if names is not a string, raise an error + elif not isinstance(names, str): + raise ValueError(f"Invalid type for `names: {type(names)}, only support string") + + # Create mapping from component_id to base_name for components to be used for pattern matching + base_names = {comp_id: self._id_to_name(comp_id) for comp_id in components.keys()} + + # Helper function to check if a component matches a pattern based on its base name + def matches_pattern(component_id, pattern, exact_match=False): + """ + Helper function to check if a component matches a pattern based on its base name. + + Args: + component_id: The component ID to check + pattern: The pattern to match against + exact_match: If True, only exact matches to base_name are considered + """ + base_name = base_names[component_id] + + # Exact match with base name + if exact_match: + return pattern == base_name + + # Prefix match (ends with *) + elif pattern.endswith("*"): + prefix = pattern[:-1] + return base_name.startswith(prefix) + + # Contains match (starts with *) + elif pattern.startswith("*"): + search = pattern[1:-1] if pattern.endswith("*") else pattern[1:] + return search in base_name + + # Exact match (no wildcards) + else: + return pattern == base_name + + # Check if this is a "not" pattern + is_not_pattern = names.startswith("!") + if is_not_pattern: + names = names[1:] # Remove the ! prefix + + # Handle OR patterns (containing |) + if "|" in names: + terms = names.split("|") + matches = {} + + for comp_id, comp in components.items(): + # For OR patterns with exact names (no wildcards), we do exact matching on base names + exact_match = all(not (term.startswith("*") or term.endswith("*")) for term in terms) + + # Check if any of the terms match this component + should_include = any(matches_pattern(comp_id, term, exact_match) for term in terms) + + # Flip the decision if this is a NOT pattern + if is_not_pattern: + should_include = not should_include + + if should_include: + matches[comp_id] = comp + + log_msg = "NOT " if is_not_pattern else "" + match_type = "exactly matching" if exact_match else "matching any of patterns" + logger.info(f"Getting components {log_msg}{match_type} {terms}: {list(matches.keys())}") + + # Try exact match with a base name + elif any(names == base_name for base_name in base_names.values()): + # Find all components with this base name + matches = { + comp_id: comp + for comp_id, comp in components.items() + if (base_names[comp_id] == names) != is_not_pattern + } + + if is_not_pattern: + logger.info(f"Getting all components except those with base name '{names}': {list(matches.keys())}") + else: + logger.info(f"Getting components with base name '{names}': {list(matches.keys())}") + + # Prefix match (ends with *) + elif names.endswith("*"): + prefix = names[:-1] + matches = { + comp_id: comp + for comp_id, comp in components.items() + if base_names[comp_id].startswith(prefix) != is_not_pattern + } + if is_not_pattern: + logger.info(f"Getting components NOT starting with '{prefix}': {list(matches.keys())}") + else: + logger.info(f"Getting components starting with '{prefix}': {list(matches.keys())}") + + # Contains match (starts with *) + elif names.startswith("*"): + search = names[1:-1] if names.endswith("*") else names[1:] + matches = { + comp_id: comp + for comp_id, comp in components.items() + if (search in base_names[comp_id]) != is_not_pattern + } + if is_not_pattern: + logger.info(f"Getting components NOT containing '{search}': {list(matches.keys())}") + else: + logger.info(f"Getting components containing '{search}': {list(matches.keys())}") + + # Substring match (no wildcards, but not an exact component name) + elif any(names in base_name for base_name in base_names.values()): + matches = { + comp_id: comp + for comp_id, comp in components.items() + if (names in base_names[comp_id]) != is_not_pattern + } + if is_not_pattern: + logger.info(f"Getting components NOT containing '{names}': {list(matches.keys())}") + else: + logger.info(f"Getting components containing '{names}': {list(matches.keys())}") + + else: + raise ValueError(f"Component or pattern '{names}' not found in ComponentsManager") + + if not matches: + raise ValueError(f"No components found matching pattern '{names}'") + + return get_return_dict(matches, return_dict_with_names) + + def enable_auto_cpu_offload(self, device: str | int | torch.device = None, memory_reserve_margin="3GB"): + """ + Enable automatic CPU offloading for all components. + + The algorithm works as follows: + 1. All models start on CPU by default + 2. When a model's forward pass is called, it's moved to the execution device + 3. If there's insufficient memory, other models on the device are moved back to CPU + 4. The system tries to offload the smallest combination of models that frees enough memory + 5. Models stay on the execution device until another model needs memory and forces them off + + Args: + device (str | int | torch.device): The execution device where models are moved for forward passes + memory_reserve_margin (str): The memory reserve margin to use, default is 3GB. This is the amount of + memory to keep free on the device to avoid running out of memory during model + execution (e.g., for intermediate activations, gradients, etc.) + """ + if not is_accelerate_available(): + raise ImportError("Make sure to install accelerate to use auto_cpu_offload") + + if device is None: + device = get_device() + if not isinstance(device, torch.device): + device = torch.device(device) + + device_type = device.type + device_module = getattr(torch, device_type, torch.cuda) + if not hasattr(device_module, "mem_get_info"): + raise NotImplementedError( + f"`enable_auto_cpu_offload() relies on the `mem_get_info()` method. It's not implemented for {str(device.type)}." + ) + + if device.index is None: + device = torch.device(f"{device.type}:{0}") + + for name, component in self.components.items(): + if isinstance(component, torch.nn.Module) and hasattr(component, "_hf_hook"): + remove_hook_from_module(component, recurse=True) + + self.disable_auto_cpu_offload() + offload_strategy = AutoOffloadStrategy(memory_reserve_margin=memory_reserve_margin) + + all_hooks = [] + for name, component in self.components.items(): + if isinstance(component, torch.nn.Module): + hook = custom_offload_with_hook(name, component, device, offload_strategy=offload_strategy) + all_hooks.append(hook) + + for hook in all_hooks: + other_hooks = [h for h in all_hooks if h is not hook] + for other_hook in other_hooks: + if other_hook.hook.execution_device == hook.hook.execution_device: + hook.add_other_hook(other_hook) + + self.model_hooks = all_hooks + self._auto_offload_enabled = True + self._auto_offload_device = device + + def disable_auto_cpu_offload(self): + """ + Disable automatic CPU offloading for all components. + """ + if self.model_hooks is None: + self._auto_offload_enabled = False + return + + for hook in self.model_hooks: + hook.offload() + hook.remove() + if self.model_hooks: + clear_device_cache() + self.model_hooks = None + self._auto_offload_enabled = False + + def get_model_info( + self, + component_id: str, + fields: str | list[str] | None = None, + ) -> dict[str, Any] | None: + """Get comprehensive information about a component. + + Args: + component_id (str): Name of the component to get info for + fields (str | list[str] | None): + Field(s) to return. Can be a string for single field or list of fields. If None, uses the + available_info_fields setting. + + Returns: + Dictionary containing requested component metadata. If fields is specified, returns only those fields. + Otherwise, returns all fields. + """ + if component_id not in self.components: + raise ValueError(f"Component '{component_id}' not found in ComponentsManager") + + component = self.components[component_id] + + # Validate fields if specified + if fields is not None: + if isinstance(fields, str): + fields = [fields] + for field in fields: + if field not in self._available_info_fields: + raise ValueError(f"Field '{field}' not found in available_info_fields") + + # Build complete info dict first + info = { + "model_id": component_id, + "added_time": self.added_time[component_id], + "collection": ", ".join([coll for coll, comps in self.collections.items() if component_id in comps]) + or None, + } + + # Additional info for torch.nn.Module components + if isinstance(component, torch.nn.Module): + # Check for hook information + has_hook = hasattr(component, "_hf_hook") + execution_device = None + if has_hook and hasattr(component._hf_hook, "execution_device"): + execution_device = component._hf_hook.execution_device + + info.update( + { + "class_name": component.__class__.__name__, + "size_gb": component.get_memory_footprint() / (1024**3), + "adapters": None, # Default to None + "has_hook": has_hook, + "execution_device": execution_device, + } + ) + + # Get adapters if applicable + if hasattr(component, "peft_config"): + info["adapters"] = list(component.peft_config.keys()) + + # Check for IP-Adapter scales + if hasattr(component, "_load_ip_adapter_weights") and hasattr(component, "attn_processors"): + processors = copy.deepcopy(component.attn_processors) + # First check if any processor is an IP-Adapter + processor_types = [v.__class__.__name__ for v in processors.values()] + if any("IPAdapter" in ptype for ptype in processor_types): + # Then get scales only from IP-Adapter processors + scales = { + k: v.scale + for k, v in processors.items() + if hasattr(v, "scale") and "IPAdapter" in v.__class__.__name__ + } + if scales: + info["ip_adapter"] = summarize_dict_by_value_and_parts(scales) + + # Check for quantization + hf_quantizer = getattr(component, "hf_quantizer", None) + if hf_quantizer is not None: + quant_config = hf_quantizer.quantization_config + if hasattr(quant_config, "to_diff_dict"): + info["quantization"] = quant_config.to_diff_dict() + else: + info["quantization"] = quant_config.to_dict() + else: + info["quantization"] = None + + # If fields specified, filter info + if fields is not None: + return {k: v for k, v in info.items() if k in fields} + else: + return info + + # YiYi TODO: (1) add display fields, allow user to set which fields to display in the comnponents table + def __repr__(self): + # Handle empty components case + if not self.components: + return "Components:\n" + "=" * 50 + "\nNo components registered.\n" + "=" * 50 + + # Extract load_id if available + def get_load_id(component): + if hasattr(component, "_diffusers_load_id"): + return component._diffusers_load_id + return "N/A" + + # Format device info compactly + def format_device(component, info): + if not info["has_hook"]: + return str(getattr(component, "device", "N/A")) + else: + device = str(getattr(component, "device", "N/A")) + exec_device = str(info["execution_device"] or "N/A") + return f"{device}({exec_device})" + + # Get max length of load_ids for models + load_ids = [ + get_load_id(component) + for component in self.components.values() + if isinstance(component, torch.nn.Module) and hasattr(component, "_diffusers_load_id") + ] + max_load_id_len = max([15] + [len(str(lid)) for lid in load_ids]) if load_ids else 15 + + # Get all collections for each component + component_collections = {} + for name in self.components.keys(): + component_collections[name] = [] + for coll, comps in self.collections.items(): + if name in comps: + component_collections[name].append(coll) + if not component_collections[name]: + component_collections[name] = ["N/A"] + + # Find the maximum collection name length + all_collections = [coll for colls in component_collections.values() for coll in colls] + max_collection_len = max(10, max(len(str(c)) for c in all_collections)) if all_collections else 10 + + col_widths = { + "id": max(15, max(len(name) for name in self.components.keys())), + "class": max(25, max(len(component.__class__.__name__) for component in self.components.values())), + "device": 20, + "dtype": 15, + "size": 10, + "load_id": max_load_id_len, + "collection": max_collection_len, + } + + # Create the header lines + sep_line = "=" * (sum(col_widths.values()) + len(col_widths) * 3 - 1) + "\n" + dash_line = "-" * (sum(col_widths.values()) + len(col_widths) * 3 - 1) + "\n" + + output = "Components:\n" + sep_line + + # Separate components into models and others + models = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)} + others = {k: v for k, v in self.components.items() if not isinstance(v, torch.nn.Module)} + + # Models section + if models: + output += "Models:\n" + dash_line + # Column headers + output += f"{'Name_ID':<{col_widths['id']}} | {'Class':<{col_widths['class']}} | " + output += f"{'Device: act(exec)':<{col_widths['device']}} | {'Dtype':<{col_widths['dtype']}} | " + output += f"{'Size (GB)':<{col_widths['size']}} | {'Load ID':<{col_widths['load_id']}} | Collection\n" + output += dash_line + + # Model entries + for name, component in models.items(): + info = self.get_model_info(name) + device_str = format_device(component, info) + dtype = str(component.dtype) if hasattr(component, "dtype") else "N/A" + load_id = get_load_id(component) + + # Print first collection on the main line + first_collection = component_collections[name][0] if component_collections[name] else "N/A" + + output += f"{name:<{col_widths['id']}} | {info['class_name']:<{col_widths['class']}} | " + output += f"{device_str:<{col_widths['device']}} | {dtype:<{col_widths['dtype']}} | " + output += f"{info['size_gb']:<{col_widths['size']}.2f} | {load_id:<{col_widths['load_id']}} | {first_collection}\n" + + # Print additional collections on separate lines if they exist + for i in range(1, len(component_collections[name])): + collection = component_collections[name][i] + output += f"{'':<{col_widths['id']}} | {'':<{col_widths['class']}} | " + output += f"{'':<{col_widths['device']}} | {'':<{col_widths['dtype']}} | " + output += f"{'':<{col_widths['size']}} | {'':<{col_widths['load_id']}} | {collection}\n" + + output += dash_line + + # Other components section + if others: + if models: # Add extra newline if we had models section + output += "\n" + output += "Other Components:\n" + dash_line + # Column headers for other components + output += f"{'ID':<{col_widths['id']}} | {'Class':<{col_widths['class']}} | Collection\n" + output += dash_line + + # Other component entries + for name, component in others.items(): + info = self.get_model_info(name) + + # Print first collection on the main line + first_collection = component_collections[name][0] if component_collections[name] else "N/A" + + output += f"{name:<{col_widths['id']}} | {component.__class__.__name__:<{col_widths['class']}} | {first_collection}\n" + + # Print additional collections on separate lines if they exist + for i in range(1, len(component_collections[name])): + collection = component_collections[name][i] + output += f"{'':<{col_widths['id']}} | {'':<{col_widths['class']}} | {collection}\n" + + output += dash_line + + # Add additional component info + output += "\nAdditional Component Info:\n" + "=" * 50 + "\n" + for name in self.components: + info = self.get_model_info(name) + if info is not None and ( + info.get("adapters") is not None or info.get("ip_adapter") or info.get("quantization") + ): + output += f"\n{name}:\n" + if info.get("adapters") is not None: + output += f" Adapters: {info['adapters']}\n" + if info.get("ip_adapter"): + output += " IP-Adapter: Enabled\n" + if info.get("quantization"): + output += f" Quantization: {info['quantization']}\n" + + return output + + def get_one( + self, + component_id: str | None = None, + name: str | None = None, + collection: str | None = None, + load_id: str | None = None, + ) -> Any: + """ + Get a single component by either: + - searching name (pattern matching), collection, or load_id. + - passing in a component_id + Raises an error if multiple components match or none are found. + + Args: + component_id (str | None): Optional component ID to get + name (str | None): Component name or pattern + collection (str | None): Optional collection to filter by + load_id (str | None): Optional load_id to filter by + + Returns: + A single component + + Raises: + ValueError: If no components match or multiple components match + """ + + if component_id is not None and (name is not None or collection is not None or load_id is not None): + raise ValueError("If searching by component_id, do not pass name, collection, or load_id") + + # search by component_id + if component_id is not None: + if component_id not in self.components: + raise ValueError(f"Component '{component_id}' not found in ComponentsManager") + return self.components[component_id] + # search with name/collection/load_id + results = self.search_components(name, collection, load_id) + + if not results: + raise ValueError(f"No components found matching '{name}'") + + if len(results) > 1: + raise ValueError(f"Multiple components found matching '{name}': {list(results.keys())}") + + return next(iter(results.values())) + + def get_ids(self, names: str | list[str] = None, collection: str | None = None): + """ + Get component IDs by a list of names, optionally filtered by collection. + + Args: + names (str | list[str]): list of component names + collection (str | None): Optional collection to filter by + + Returns: + list[str]: list of component IDs + """ + ids = set() + if not isinstance(names, list): + names = [names] + for name in names: + ids.update(self._lookup_ids(name=name, collection=collection)) + return list(ids) + + def get_components_by_ids(self, ids: list[str], return_dict_with_names: bool | None = True): + """ + Get components by a list of IDs. + + Args: + ids (list[str]): + list of component IDs + return_dict_with_names (bool | None): + Whether to return a dictionary with component names as keys: + + Returns: + dict[str, Any]: Dictionary of components. + - If return_dict_with_names=True, keys are component names. + - If return_dict_with_names=False, keys are component IDs. + + Raises: + ValueError: If duplicate component names are found in the search results when return_dict_with_names=True + """ + components = {id: self.components[id] for id in ids} + + if return_dict_with_names: + dict_to_return = {} + for comp_id, comp in components.items(): + comp_name = self._id_to_name(comp_id) + if comp_name in dict_to_return: + raise ValueError( + f"Duplicate component names found in the search results: {comp_name}, please set `return_dict_with_names=False` to return a dictionary with component IDs as keys" + ) + dict_to_return[comp_name] = comp + return dict_to_return + else: + return components + + def get_components_by_names(self, names: list[str], collection: str | None = None): + """ + Get components by a list of names, optionally filtered by collection. + + Args: + names (list[str]): list of component names + collection (str | None): Optional collection to filter by + + Returns: + dict[str, Any]: Dictionary of components with component names as keys + + Raises: + ValueError: If duplicate component names are found in the search results + """ + ids = self.get_ids(names, collection) + return self.get_components_by_ids(ids) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/mellon_node_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/mellon_node_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f65459dfc99023c72d250df35f3eee7554b81ecc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/mellon_node_utils.py @@ -0,0 +1,1101 @@ +import copy +import json +import logging +import os + +# Simple typed wrapper for parameter overrides +from dataclasses import asdict, dataclass +from typing import Any + +from huggingface_hub import create_repo, hf_hub_download, upload_file +from huggingface_hub.utils import ( + EntryNotFoundError, + HfHubHTTPError, + RepositoryNotFoundError, + RevisionNotFoundError, +) + +from ..utils import HUGGINGFACE_CO_RESOLVE_ENDPOINT +from .modular_pipeline_utils import InputParam, OutputParam + + +logger = logging.getLogger(__name__) + + +def _name_to_label(name: str) -> str: + """Convert snake_case name to Title Case label.""" + return name.replace("_", " ").title() + + +# Template definitions for standard diffuser pipeline parameters +MELLON_PARAM_TEMPLATES = { + # Image I/O + "image": {"label": "Image", "type": "image", "display": "input", "required_block_params": ["image"]}, + "images": {"label": "Images", "type": "image", "display": "output", "required_block_params": ["images"]}, + "control_image": { + "label": "Control Image", + "type": "image", + "display": "input", + "required_block_params": ["control_image"], + }, + # Latents + "latents": {"label": "Latents", "type": "latents", "display": "input", "required_block_params": ["latents"]}, + "image_latents": { + "label": "Image Latents", + "type": "latents", + "display": "input", + "required_block_params": ["image_latents"], + }, + "first_frame_latents": { + "label": "First Frame Latents", + "type": "latents", + "display": "input", + "required_block_params": ["first_frame_latents"], + }, + "latents_preview": {"label": "Latents Preview", "type": "latent", "display": "output"}, + # Image Latents with Strength + "image_latents_with_strength": { + "name": "image_latents", # name is not same as template key + "label": "Image Latents", + "type": "latents", + "display": "input", + "onChange": {"false": ["height", "width"], "true": ["strength"]}, + "required_block_params": ["image_latents", "strength"], + }, + # Embeddings + "embeddings": {"label": "Text Embeddings", "type": "embeddings", "display": "output"}, + "image_embeds": { + "label": "Image Embeddings", + "type": "image_embeds", + "display": "output", + "required_block_params": ["image_embeds"], + }, + # Text inputs + "prompt": { + "label": "Prompt", + "type": "string", + "display": "textarea", + "default": "", + "required_block_params": ["prompt"], + }, + "negative_prompt": { + "label": "Negative Prompt", + "type": "string", + "display": "textarea", + "default": "", + "required_block_params": ["negative_prompt"], + }, + # Numeric params + "guidance_scale": { + "label": "Guidance Scale", + "type": "float", + "display": "slider", + "default": 5.0, + "min": 1.0, + "max": 30.0, + "step": 0.1, + }, + "strength": { + "label": "Strength", + "type": "float", + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["strength"], + }, + "height": { + "label": "Height", + "type": "int", + "default": 1024, + "min": 64, + "step": 8, + "required_block_params": ["height"], + }, + "width": { + "label": "Width", + "type": "int", + "default": 1024, + "min": 64, + "step": 8, + "required_block_params": ["width"], + }, + "seed": { + "label": "Seed", + "type": "int", + "default": 0, + "min": 0, + "max": 4294967295, + "display": "random", + "required_block_params": ["generator"], + }, + "num_inference_steps": { + "label": "Steps", + "type": "int", + "default": 25, + "min": 1, + "max": 100, + "display": "slider", + "required_block_params": ["num_inference_steps"], + }, + "num_frames": { + "label": "Frames", + "type": "int", + "default": 81, + "min": 1, + "max": 480, + "display": "slider", + "required_block_params": ["num_frames"], + }, + "layers": { + "label": "Layers", + "type": "int", + "default": 4, + "min": 1, + "max": 10, + "display": "slider", + "required_block_params": ["layers"], + }, + "output_type": { + "label": "Output Type", + "type": "dropdown", + "default": "np", + "options": ["np", "pil", "pt"], + }, + # ControlNet + "controlnet_conditioning_scale": { + "label": "Controlnet Conditioning Scale", + "type": "float", + "default": 0.5, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["controlnet_conditioning_scale"], + }, + "control_guidance_start": { + "label": "Control Guidance Start", + "type": "float", + "default": 0.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["control_guidance_start"], + }, + "control_guidance_end": { + "label": "Control Guidance End", + "type": "float", + "default": 1.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "required_block_params": ["control_guidance_end"], + }, + # Video + "videos": {"label": "Videos", "type": "video", "display": "output", "required_block_params": ["videos"]}, + # Models + "vae": {"label": "VAE", "type": "diffusers_auto_model", "display": "input", "required_block_params": ["vae"]}, + "image_encoder": { + "label": "Image Encoder", + "type": "diffusers_auto_model", + "display": "input", + "required_block_params": ["image_encoder"], + }, + "unet": {"label": "Denoise Model", "type": "diffusers_auto_model", "display": "input"}, + "scheduler": {"label": "Scheduler", "type": "diffusers_auto_model", "display": "input"}, + "controlnet": { + "label": "ControlNet Model", + "type": "diffusers_auto_model", + "display": "input", + "required_block_params": ["controlnet"], + }, + "text_encoders": { + "label": "Text Encoders", + "type": "diffusers_auto_models", + "display": "input", + "required_block_params": ["text_encoder"], + }, + # Bundles/Custom + "controlnet_bundle": { + "label": "ControlNet", + "type": "custom_controlnet", + "display": "input", + "required_block_params": "controlnet_image", + }, + "ip_adapter": {"label": "IP Adapter", "type": "custom_ip_adapter", "display": "input"}, + "guider": { + "label": "Guider", + "type": "custom_guider", + "display": "input", + "onChange": {False: ["guidance_scale"], True: []}, + }, + "doc": {"label": "Doc", "type": "string", "display": "output"}, +} + + +class MellonParamMeta(type): + """Metaclass that enables MellonParam.template_name(**overrides) syntax.""" + + def __getattr__(cls, name: str): + if name in MELLON_PARAM_TEMPLATES: + + def factory(default=None, **overrides): + template = MELLON_PARAM_TEMPLATES[name] + # Use template's name if specified, otherwise use the key + params = {"name": template.get("name", name), **template, **overrides} + if default is not None: + params["default"] = default + return cls(**params) + + return factory + + raise AttributeError(f"type object 'MellonParam' has no attribute '{name}'") + + +@dataclass(frozen=True) +class MellonParam(metaclass=MellonParamMeta): + """ + Parameter definition for Mellon nodes. + + Usage: + ```python + # From template (standard diffuser params) + MellonParam.seed() + MellonParam.prompt(default="a cat") + MellonParam.latents(display="output") + + # Generic inputs (for custom blocks) + MellonParam.Input.slider("my_scale", default=1.0, min=0.0, max=2.0) + MellonParam.Input.dropdown("mode", options=["fast", "slow"]) + + # Generic outputs + MellonParam.Output.image("result_images") + + # Fully custom + MellonParam(name="custom", label="Custom", type="float", default=0.5) + ``` + """ + + name: str + label: str + type: str + display: str | None = None + default: Any = None + min: float | None = None + max: float | None = None + step: float | None = None + options: Any = None + value: Any = None + fieldOptions: dict[str, Any] | None = None + onChange: Any = None + onSignal: Any = None + required_block_params: str | list[str] | None = None + + def to_dict(self) -> dict[str, Any]: + """Convert to dict for Mellon schema, excluding None values and internal fields.""" + data = asdict(self) + return {k: v for k, v in data.items() if v is not None and k not in ("name", "required_block_params")} + + # ========================================================================= + # Input: Generic input parameter factories (for custom blocks) + # ========================================================================= + class Input: + """input UI elements for custom blocks.""" + + @classmethod + def image(cls, name: str) -> "MellonParam": + """image input.""" + return MellonParam(name=name, label=_name_to_label(name), type="image", display="input") + + @classmethod + def textbox(cls, name: str, default: str = "") -> "MellonParam": + """text input as textarea.""" + return MellonParam( + name=name, label=_name_to_label(name), type="string", display="textarea", default=default + ) + + @classmethod + def dropdown(cls, name: str, options: list[str] = None, default: str = None) -> "MellonParam": + """dropdown selection.""" + if options and not default: + default = options[0] + if not default: + default = "" + if not options: + options = [default] + return MellonParam(name=name, label=_name_to_label(name), type="string", options=options, value=default) + + @classmethod + def slider( + cls, name: str, default: float = 0, min: float = None, max: float = None, step: float = None + ) -> "MellonParam": + """slider input.""" + is_float = isinstance(default, float) or (step is not None and isinstance(step, float)) + param_type = "float" if is_float else "int" + if min is None: + min = default + if max is None: + max = default + if step is None: + step = 0.01 if is_float else 1 + return MellonParam( + name=name, + label=_name_to_label(name), + type=param_type, + display="slider", + default=default, + min=min, + max=max, + step=step, + ) + + @classmethod + def number( + cls, name: str, default: float = 0, min: float = None, max: float = None, step: float = None + ) -> "MellonParam": + """number input (no slider).""" + is_float = isinstance(default, float) or (step is not None and isinstance(step, float)) + param_type = "float" if is_float else "int" + return MellonParam( + name=name, label=_name_to_label(name), type=param_type, default=default, min=min, max=max, step=step + ) + + @classmethod + def seed(cls, name: str = "seed", default: int = 0) -> "MellonParam": + """seed input with randomize button.""" + return MellonParam( + name=name, + label=_name_to_label(name), + type="int", + display="random", + default=default, + min=0, + max=4294967295, + ) + + @classmethod + def checkbox(cls, name: str, default: bool = False) -> "MellonParam": + """boolean checkbox.""" + return MellonParam(name=name, label=_name_to_label(name), type="boolean", value=default) + + @classmethod + def custom_type(cls, name: str, type: str) -> "MellonParam": + """custom type input for node connections.""" + return MellonParam(name=name, label=_name_to_label(name), type=type, display="input") + + @classmethod + def model(cls, name: str) -> "MellonParam": + """model input for diffusers components.""" + return MellonParam(name=name, label=_name_to_label(name), type="diffusers_auto_model", display="input") + + # ========================================================================= + # Output: Generic output parameter factories (for custom blocks) + # ========================================================================= + class Output: + """output UI elements for custom blocks.""" + + @classmethod + def image(cls, name: str) -> "MellonParam": + """image output.""" + return MellonParam(name=name, label=_name_to_label(name), type="image", display="output") + + @classmethod + def video(cls, name: str) -> "MellonParam": + """video output.""" + return MellonParam(name=name, label=_name_to_label(name), type="video", display="output") + + @classmethod + def text(cls, name: str) -> "MellonParam": + """text output.""" + return MellonParam(name=name, label=_name_to_label(name), type="string", display="output") + + @classmethod + def custom_type(cls, name: str, type: str) -> "MellonParam": + """custom type output for node connections.""" + return MellonParam(name=name, label=_name_to_label(name), type=type, display="output") + + @classmethod + def model(cls, name: str) -> "MellonParam": + """model output for diffusers components.""" + return MellonParam(name=name, label=_name_to_label(name), type="diffusers_auto_model", display="output") + + +def input_param_to_mellon_param(input_param: "InputParam") -> MellonParam: + """ + Convert an InputParam to a MellonParam using metadata. + + Args: + input_param: An InputParam with optional metadata containing either: + - {"mellon": ""} for simple types (image, textbox, slider, etc.) + - {"mellon": MellonParam(...)} for full control over UI configuration + + Returns: + MellonParam instance + """ + name = input_param.name + metadata = input_param.metadata + mellon_value = metadata.get("mellon") if metadata else None + default = input_param.default + + # If it's already a MellonParam, return it directly + if isinstance(mellon_value, MellonParam): + return mellon_value + + mellon_type = mellon_value + + if mellon_type == "image": + return MellonParam.Input.image(name) + elif mellon_type == "textbox": + return MellonParam.Input.textbox(name, default=default or "") + elif mellon_type == "dropdown": + return MellonParam.Input.dropdown(name, default=default or "") + elif mellon_type == "slider": + return MellonParam.Input.slider(name, default=default or 0) + elif mellon_type == "number": + return MellonParam.Input.number(name, default=default or 0) + elif mellon_type == "seed": + return MellonParam.Input.seed(name, default=default or 0) + elif mellon_type == "checkbox": + return MellonParam.Input.checkbox(name, default=default or False) + elif mellon_type == "model": + return MellonParam.Input.model(name) + else: + # None or unknown -> custom + return MellonParam.Input.custom_type(name, type="custom") + + +def output_param_to_mellon_param(output_param: "OutputParam") -> MellonParam: + """ + Convert an OutputParam to a MellonParam using metadata. + + Args: + output_param: An OutputParam with optional metadata={"mellon": ""} where type is one of: + image, video, text, model. If metadata is None or unknown, maps to "custom". + + Returns: + MellonParam instance + """ + name = output_param.name + metadata = output_param.metadata + mellon_type = metadata.get("mellon") if metadata else None + + if mellon_type == "image": + return MellonParam.Output.image(name) + elif mellon_type == "video": + return MellonParam.Output.video(name) + elif mellon_type == "text": + return MellonParam.Output.text(name) + elif mellon_type == "model": + return MellonParam.Output.model(name) + else: + # None or unknown -> custom + return MellonParam.Output.custom_type(name, type="custom") + + +DEFAULT_NODE_SPECS = { + "controlnet": None, + "denoise": { + "inputs": [ + MellonParam.embeddings(display="input"), + MellonParam.width(), + MellonParam.height(), + MellonParam.seed(), + MellonParam.num_inference_steps(), + MellonParam.num_frames(), + MellonParam.guidance_scale(), + MellonParam.strength(), + MellonParam.image_latents_with_strength(), + MellonParam.image_latents(), + MellonParam.first_frame_latents(), + MellonParam.controlnet_bundle(display="input"), + ], + "model_inputs": [ + MellonParam.unet(), + MellonParam.guider(), + MellonParam.scheduler(), + ], + "outputs": [ + MellonParam.latents(display="output"), + MellonParam.latents_preview(), + MellonParam.doc(), + ], + "required_inputs": ["embeddings"], + "required_model_inputs": ["unet", "scheduler"], + "block_name": "denoise", + }, + "vae_encoder": { + "inputs": [ + MellonParam.image(), + ], + "model_inputs": [ + MellonParam.vae(), + ], + "outputs": [ + MellonParam.image_latents(display="output"), + MellonParam.doc(), + ], + "required_inputs": ["image"], + "required_model_inputs": ["vae"], + "block_name": "vae_encoder", + }, + "text_encoder": { + "inputs": [ + MellonParam.prompt(), + MellonParam.negative_prompt(), + ], + "model_inputs": [ + MellonParam.text_encoders(), + ], + "outputs": [ + MellonParam.embeddings(display="output"), + MellonParam.doc(), + ], + "required_inputs": ["prompt"], + "required_model_inputs": ["text_encoders"], + "block_name": "text_encoder", + }, + "decoder": { + "inputs": [ + MellonParam.latents(display="input"), + ], + "model_inputs": [ + MellonParam.vae(), + ], + "outputs": [ + MellonParam.images(), + MellonParam.videos(), + MellonParam.doc(), + ], + "required_inputs": ["latents"], + "required_model_inputs": ["vae"], + "block_name": "decode", + }, +} + + +def mark_required(label: str, marker: str = " *") -> str: + """Add required marker to label if not already present.""" + if label.endswith(marker): + return label + return f"{label}{marker}" + + +def node_spec_to_mellon_dict(node_spec: dict[str, Any], node_type: str) -> dict[str, Any]: + """ + Convert a node spec dict into Mellon format. + + A node spec is how we define a Mellon diffusers node in code. This function converts it into the `params` map + format that Mellon UI expects. + + The `params` map is a dict where keys are parameter names and values are UI configuration: + ```python + {"seed": {"label": "Seed", "type": "int", "default": 0}} + ``` + + For Modular Mellon nodes, we need to distinguish: + - `inputs`: Pipeline inputs (e.g., seed, prompt, image) + - `model_inputs`: Model components (e.g., unet, vae, scheduler) + - `outputs`: Node outputs (e.g., latents, images) + + The node spec also includes: + - `required_inputs` / `required_model_inputs`: Which params are required (marked with *) + - `block_name`: The modular pipeline block this node corresponds to on backend + + We provide factory methods for common parameters (e.g., `MellonParam.seed()`, `MellonParam.unet()`) so you don't + have to manually specify all the UI configuration. + + Args: + node_spec: Dict with `inputs`, `model_inputs`, `outputs` (lists of MellonParam), + plus `required_inputs`, `required_model_inputs`, `block_name`. + node_type: The node type string (e.g., "denoise", "controlnet") + + Returns: + Dict with: + - `params`: Flat dict of all params in Mellon UI format + - `input_names`: List of input parameter names + - `model_input_names`: List of model input parameter names + - `output_names`: List of output parameter names + - `block_name`: The backend block name + - `node_type`: The node type + + Example: + ```python + node_spec = { + "inputs": [MellonParam.seed(), MellonParam.prompt()], + "model_inputs": [MellonParam.unet()], + "outputs": [MellonParam.latents(display="output")], + "required_inputs": ["prompt"], + "required_model_inputs": ["unet"], + "block_name": "denoise", + } + + result = node_spec_to_mellon_dict(node_spec, "denoise") + # Returns: + # { + # "params": { + # "seed": {"label": "Seed", "type": "int", "default": 0}, + # "prompt": {"label": "Prompt *", "type": "string", "default": ""}, # * marks required + # "unet": {"label": "Denoise Model *", "type": "diffusers_auto_model", "display": "input"}, + # "latents": {"label": "Latents", "type": "latents", "display": "output"}, + # }, + # "input_names": ["seed", "prompt"], + # "model_input_names": ["unet"], + # "output_names": ["latents"], + # "block_name": "denoise", + # "node_type": "denoise", + # } + ``` + """ + params = {} + input_names = [] + model_input_names = [] + output_names = [] + + required_inputs = node_spec.get("required_inputs", []) + required_model_inputs = node_spec.get("required_model_inputs", []) + + # Process inputs + for p in node_spec.get("inputs", []): + param_dict = p.to_dict() + if p.name in required_inputs: + param_dict["label"] = mark_required(param_dict["label"]) + params[p.name] = param_dict + input_names.append(p.name) + + # Process model_inputs + for p in node_spec.get("model_inputs", []): + param_dict = p.to_dict() + if p.name in required_model_inputs: + param_dict["label"] = mark_required(param_dict["label"]) + params[p.name] = param_dict + model_input_names.append(p.name) + + # Process outputs: add a prefix to the output name if it already exists as an input + for p in node_spec.get("outputs", []): + if p.name in input_names: + # rename to out_ + output_name = f"out_{p.name}" + else: + output_name = p.name + params[output_name] = p.to_dict() + output_names.append(output_name) + + return { + "params": params, + "input_names": input_names, + "model_input_names": model_input_names, + "output_names": output_names, + "block_name": node_spec.get("block_name"), + "node_type": node_type, + } + + +class MellonPipelineConfig: + """ + Configuration for an entire Mellon pipeline containing multiple nodes. + + Accepts node specs as dicts with inputs/model_inputs/outputs lists of MellonParam, converts them to Mellon-ready + format, and handles save/load to Hub. + + Example: + ```python + config = MellonPipelineConfig( + node_specs={ + "denoise": { + "inputs": [MellonParam.seed(), MellonParam.prompt()], + "model_inputs": [MellonParam.unet()], + "outputs": [MellonParam.latents(display="output")], + "required_inputs": ["prompt"], + "required_model_inputs": ["unet"], + "block_name": "denoise", + }, + "decoder": { + "inputs": [MellonParam.latents(display="input")], + "outputs": [MellonParam.images()], + "block_name": "decoder", + }, + }, + label="My Pipeline", + default_repo="user/my-pipeline", + default_dtype="float16", + ) + + # Access Mellon format dict + denoise = config.node_params["denoise"] + input_names = denoise["input_names"] + params = denoise["params"] + + # Save to Hub + config.save("./my_config", push_to_hub=True, repo_id="user/my-pipeline") + + # Load from Hub + loaded = MellonPipelineConfig.load("user/my-pipeline") + ``` + """ + + config_name = "mellon_pipeline_config.json" + + def __init__( + self, + node_specs: dict[str, dict[str, Any] | None], + label: str = "", + default_repo: str = "", + default_dtype: str = "", + ): + """ + Args: + node_specs: Dict mapping node_type to node spec or None. + Node spec has: inputs, model_inputs, outputs, required_inputs, required_model_inputs, + block_name (all optional) + label: Human-readable label for the pipeline + default_repo: Default HuggingFace repo for this pipeline + default_dtype: Default dtype (e.g., "float16", "bfloat16") + """ + # Convert all node specs to Mellon format immediately + self.node_specs = node_specs + + self.label = label + self.default_repo = default_repo + self.default_dtype = default_dtype + + @property + def node_params(self) -> dict[str, Any]: + """Lazily compute node_params from node_specs.""" + if self.node_specs is None: + return self._node_params + + params = {} + for node_type, spec in self.node_specs.items(): + if spec is None: + params[node_type] = None + else: + params[node_type] = node_spec_to_mellon_dict(spec, node_type) + return params + + def __repr__(self) -> str: + lines = [ + f"MellonPipelineConfig(label={self.label!r}, default_repo={self.default_repo!r}, default_dtype={self.default_dtype!r})" + ] + for node_type, spec in self.node_specs.items(): + if spec is None: + lines.append(f" {node_type}: None") + else: + inputs = [p.name for p in spec.get("inputs", [])] + model_inputs = [p.name for p in spec.get("model_inputs", [])] + outputs = [p.name for p in spec.get("outputs", [])] + lines.append(f" {node_type}:") + lines.append(f" inputs: {inputs}") + lines.append(f" model_inputs: {model_inputs}") + lines.append(f" outputs: {outputs}") + return "\n".join(lines) + + def to_dict(self) -> dict[str, Any]: + """Convert to a JSON-serializable dictionary.""" + return { + "label": self.label, + "default_repo": self.default_repo, + "default_dtype": self.default_dtype, + "node_params": self.node_params, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "MellonPipelineConfig": + """ + Create from a dictionary (loaded from JSON). + + Note: The mellon_params are already in Mellon format when loading from JSON. + """ + instance = cls.__new__(cls) + instance.node_specs = None + instance._node_params = data.get("node_params", {}) + instance.label = data.get("label", "") + instance.default_repo = data.get("default_repo", "") + instance.default_dtype = data.get("default_dtype", "") + return instance + + def to_json_string(self) -> str: + """Serialize to JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=False) + "\n" + + def to_json_file(self, json_file_path: str | os.PathLike): + """Save to a JSON file.""" + with open(json_file_path, "w", encoding="utf-8") as writer: + writer.write(self.to_json_string()) + + @classmethod + def from_json_file(cls, json_file_path: str | os.PathLike) -> "MellonPipelineConfig": + """Load from a JSON file.""" + with open(json_file_path, "r", encoding="utf-8") as reader: + data = json.load(reader) + return cls.from_dict(data) + + def save(self, save_directory: str | os.PathLike, push_to_hub: bool = False, **kwargs): + """Save the mellon pipeline config to a directory.""" + if os.path.isfile(save_directory): + raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") + + os.makedirs(save_directory, exist_ok=True) + output_path = os.path.join(save_directory, self.config_name) + self.to_json_file(output_path) + logger.info(f"Pipeline config saved to {output_path}") + + if push_to_hub: + commit_message = kwargs.pop("commit_message", None) + private = kwargs.pop("private", None) + create_pr = kwargs.pop("create_pr", False) + token = kwargs.pop("token", None) + repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id + + upload_file( + path_or_fileobj=output_path, + path_in_repo=self.config_name, + repo_id=repo_id, + token=token, + commit_message=commit_message or "Upload MellonPipelineConfig", + create_pr=create_pr, + ) + logger.info(f"Pipeline config pushed to hub: {repo_id}") + + @classmethod + def load( + cls, + pretrained_model_name_or_path: str | os.PathLike, + **kwargs, + ) -> "MellonPipelineConfig": + """Load a pipeline config from a local path or Hugging Face Hub.""" + cache_dir = kwargs.pop("cache_dir", None) + local_dir = kwargs.pop("local_dir", None) + local_dir_use_symlinks = kwargs.pop("local_dir_use_symlinks", "auto") + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + token = kwargs.pop("token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + + pretrained_model_name_or_path = str(pretrained_model_name_or_path) + + if os.path.isfile(pretrained_model_name_or_path): + config_file = pretrained_model_name_or_path + elif os.path.isdir(pretrained_model_name_or_path): + config_file = os.path.join(pretrained_model_name_or_path, cls.config_name) + if not os.path.isfile(config_file): + raise EnvironmentError(f"No file named {cls.config_name} found in {pretrained_model_name_or_path}") + else: + try: + config_file = hf_hub_download( + pretrained_model_name_or_path, + filename=cls.config_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + ) + except RepositoryNotFoundError: + raise EnvironmentError( + f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier" + " listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a" + " token having permission to this repo with `token` or log in with `hf auth login`." + ) + except RevisionNotFoundError: + raise EnvironmentError( + f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for" + " this model name. Check the model page at" + f" 'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions." + ) + except EntryNotFoundError: + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named {cls.config_name}." + ) + except HfHubHTTPError as err: + raise EnvironmentError( + "There was a specific connection error when trying to load" + f" {pretrained_model_name_or_path}:\n{err}" + ) + except ValueError: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to" + " run the library in offline mode at" + " 'https://huggingface.co/docs/diffusers/installation#offline-mode'." + ) + except EnvironmentError: + raise EnvironmentError( + f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing a {cls.config_name} file" + ) + + try: + return cls.from_json_file(config_file) + except (json.JSONDecodeError, UnicodeDecodeError): + raise EnvironmentError(f"The config file at '{config_file}' is not a valid JSON file.") + + @classmethod + def from_blocks( + cls, + blocks, + template: dict[str, dict[str, Any]] | None = None, + label: str = "", + default_repo: str = "", + default_dtype: str = "bfloat16", + ) -> "MellonPipelineConfig": + """ + Create MellonPipelineConfig by matching template against actual pipeline blocks. + """ + if template is None: + template = DEFAULT_NODE_SPECS + + sub_block_map = dict(blocks.sub_blocks) + + def filter_spec_for_block(template_spec: dict[str, Any], block) -> dict[str, Any] | None: + """Filter template spec params based on what the block actually supports.""" + block_input_names = set(block.input_names) + block_output_names = set(block.intermediate_output_names) + block_component_names = set(block.component_names) + + filtered_inputs = [ + p + for p in template_spec.get("inputs", []) + if p.required_block_params is None + or all(name in block_input_names for name in p.required_block_params) + ] + filtered_model_inputs = [ + p + for p in template_spec.get("model_inputs", []) + if p.required_block_params is None + or all(name in block_component_names for name in p.required_block_params) + ] + filtered_outputs = [ + p + for p in template_spec.get("outputs", []) + if p.required_block_params is None + or all(name in block_output_names for name in p.required_block_params) + ] + + filtered_input_names = {p.name for p in filtered_inputs} + filtered_model_input_names = {p.name for p in filtered_model_inputs} + + filtered_required_inputs = [ + r for r in template_spec.get("required_inputs", []) if r in filtered_input_names + ] + filtered_required_model_inputs = [ + r for r in template_spec.get("required_model_inputs", []) if r in filtered_model_input_names + ] + + return { + "inputs": filtered_inputs, + "model_inputs": filtered_model_inputs, + "outputs": filtered_outputs, + "required_inputs": filtered_required_inputs, + "required_model_inputs": filtered_required_model_inputs, + "block_name": template_spec.get("block_name"), + } + + # Build node specs + node_specs = {} + for node_type, template_spec in template.items(): + if template_spec is None: + node_specs[node_type] = None + continue + + block_name = template_spec.get("block_name") + if block_name is None or block_name not in sub_block_map: + node_specs[node_type] = None + continue + + node_specs[node_type] = filter_spec_for_block(template_spec, sub_block_map[block_name]) + + return cls( + node_specs=node_specs, + label=label or getattr(blocks, "model_name", ""), + default_repo=default_repo, + default_dtype=default_dtype, + ) + + @classmethod + def from_custom_block( + cls, + block, + node_label: str = None, + input_types: dict[str, Any] | None = None, + output_types: dict[str, Any] | None = None, + ) -> "MellonPipelineConfig": + """ + Create a MellonPipelineConfig from a custom block. + + Args: + block: A block instance with `inputs`, `outputs`, and `expected_components`/`component_names` properties. + Each InputParam/OutputParam should have metadata={"mellon": ""} where type is one of: image, + video, text, checkbox, number, slider, dropdown, model. If metadata is None, maps to "custom". + node_label: The display label for the node. Defaults to block class name with spaces. + input_types: + Optional dict mapping input param names to mellon types. Overrides the block's metadata if provided. + Example: {"prompt": "textbox", "image": "image"} + output_types: + Optional dict mapping output param names to mellon types. Overrides the block's metadata if provided. + Example: {"prompt": "text", "images": "image"} + + Returns: + MellonPipelineConfig instance + """ + if node_label is None: + class_name = block.__class__.__name__ + node_label = "".join([" " + c if c.isupper() else c for c in class_name]).strip() + + if input_types is None: + input_types = {} + if output_types is None: + output_types = {} + + inputs = [] + model_inputs = [] + outputs = [] + + # Process block inputs + for input_param in block.inputs: + if input_param.name is None: + continue + if input_param.name in input_types: + input_param = copy.copy(input_param) + input_param.metadata = {"mellon": input_types[input_param.name]} + print(f" processing input: {input_param.name}, metadata: {input_param.metadata}") + inputs.append(input_param_to_mellon_param(input_param)) + + # Process block outputs + for output_param in block.outputs: + if output_param.name is None: + continue + if output_param.name in output_types: + output_param = copy.copy(output_param) + output_param.metadata = {"mellon": output_types[output_param.name]} + outputs.append(output_param_to_mellon_param(output_param)) + + # Process expected components (all map to model inputs) + component_names = block.component_names + for component_name in component_names: + model_inputs.append(MellonParam.Input.model(component_name)) + + # Always add doc output + outputs.append(MellonParam.doc()) + + node_spec = { + "inputs": inputs, + "model_inputs": model_inputs, + "outputs": outputs, + "required_inputs": [], + "required_model_inputs": [], + "block_name": "custom", + } + + return cls( + node_specs={"custom": node_spec}, + label=node_label, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/modular_pipeline.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/modular_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..a563d2aa99eb449dba8b5c71ed66ed9f3ba7a563 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/modular_pipeline.py @@ -0,0 +1,2829 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import inspect +import os +import sys +import traceback +import warnings +from collections import OrderedDict +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Any + +import torch +from huggingface_hub import create_repo +from huggingface_hub.utils import validate_hf_hub_args +from tqdm.auto import tqdm +from typing_extensions import Self + +from ..configuration_utils import ConfigMixin, FrozenDict +from ..pipelines.pipeline_loading_utils import ( + LOADABLE_CLASSES, + _fetch_class_library_tuple, + _unwrap_model, + simple_get_class_obj, +) +from ..utils import PushToHubMixin, is_accelerate_available, logging +from ..utils.dynamic_modules_utils import get_class_from_dynamic_module, resolve_trust_remote_code +from ..utils.hub_utils import load_or_create_model_card, populate_model_card +from ..utils.torch_utils import is_compiled_module +from .components_manager import ComponentsManager +from .modular_pipeline_utils import ( + MODULAR_MODEL_CARD_TEMPLATE, + ComponentSpec, + ConfigSpec, + InputParam, + InsertableDict, + OutputParam, + _validate_requirements, + combine_inputs, + combine_outputs, + format_components, + format_configs, + format_workflow, + generate_modular_model_card_content, + make_doc_string, +) + + +if is_accelerate_available(): + import accelerate + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +# map regular pipeline to modular pipeline class name + + +def _create_default_map_fn(pipeline_class_name: str): + """Create a mapping function that always returns the same pipeline class.""" + + def _map_fn(config_dict=None): + return pipeline_class_name + + return _map_fn + + +def _flux2_klein_map_fn(config_dict=None): + if config_dict is None: + return "Flux2KleinModularPipeline" + + if "is_distilled" in config_dict and config_dict["is_distilled"]: + return "Flux2KleinModularPipeline" + else: + return "Flux2KleinBaseModularPipeline" + + +def _wan_map_fn(config_dict=None): + if config_dict is None: + return "WanModularPipeline" + + if "boundary_ratio" in config_dict and config_dict["boundary_ratio"] is not None: + return "Wan22ModularPipeline" + else: + return "WanModularPipeline" + + +def _wan_i2v_map_fn(config_dict=None): + if config_dict is None: + return "WanImage2VideoModularPipeline" + + if "boundary_ratio" in config_dict and config_dict["boundary_ratio"] is not None: + return "Wan22Image2VideoModularPipeline" + else: + return "WanImage2VideoModularPipeline" + + +MODULAR_PIPELINE_MAPPING = OrderedDict( + [ + ("stable-diffusion-xl", _create_default_map_fn("StableDiffusionXLModularPipeline")), + ("wan", _wan_map_fn), + ("wan-i2v", _wan_i2v_map_fn), + ("flux", _create_default_map_fn("FluxModularPipeline")), + ("flux-kontext", _create_default_map_fn("FluxKontextModularPipeline")), + ("flux2", _create_default_map_fn("Flux2ModularPipeline")), + ("flux2-klein", _flux2_klein_map_fn), + ("qwenimage", _create_default_map_fn("QwenImageModularPipeline")), + ("qwenimage-edit", _create_default_map_fn("QwenImageEditModularPipeline")), + ("qwenimage-edit-plus", _create_default_map_fn("QwenImageEditPlusModularPipeline")), + ("qwenimage-layered", _create_default_map_fn("QwenImageLayeredModularPipeline")), + ("z-image", _create_default_map_fn("ZImageModularPipeline")), + ] +) + + +@dataclass +class PipelineState: + """ + [`PipelineState`] stores the state of a pipeline. It is used to pass data between pipeline blocks. + """ + + values: dict[str, Any] = field(default_factory=dict) + kwargs_mapping: dict[str, list[str]] = field(default_factory=dict) + + def set(self, key: str, value: Any, kwargs_type: str = None): + """ + Add a value to the pipeline state. + + Args: + key (str): The key for the value + value (Any): The value to store + kwargs_type (str): The kwargs_type with which the value is associated + """ + self.values[key] = value + + if kwargs_type is not None: + if kwargs_type not in self.kwargs_mapping: + self.kwargs_mapping[kwargs_type] = [key] + else: + self.kwargs_mapping[kwargs_type].append(key) + + def get(self, keys: str | list[str], default: Any = None) -> Any | dict[str, Any]: + """ + Get one or multiple values from the pipeline state. + + Args: + keys (str | list[str]): Key or list of keys for the values + default (Any): The default value to return if not found + + Returns: + Any | dict[str, Any]: Single value if keys is str, dictionary of values if keys is list + """ + if isinstance(keys, str): + return self.values.get(keys, default) + return {key: self.values.get(key, default) for key in keys} + + def get_by_kwargs(self, kwargs_type: str) -> dict[str, Any]: + """ + Get all values with matching kwargs_type. + + Args: + kwargs_type (str): The kwargs_type to filter by + + Returns: + dict[str, Any]: Dictionary of values with matching kwargs_type + """ + value_names = self.kwargs_mapping.get(kwargs_type, []) + return self.get(value_names) + + def to_dict(self) -> dict[str, Any]: + """ + Convert PipelineState to a dictionary. + """ + return {**self.__dict__} + + def __getattr__(self, name): + """ + Allow attribute access to intermediate values. If an attribute is not found in the object, look for it in the + intermediates dict. + """ + # Use object.__getattribute__ to avoid infinite recursion during deepcopy + try: + values = object.__getattribute__(self, "values") + except AttributeError: + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") + + if name in values: + return values[name] + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") + + def __repr__(self): + def format_value(v): + if hasattr(v, "shape") and hasattr(v, "dtype"): + return f"Tensor(dtype={v.dtype}, shape={v.shape})" + elif isinstance(v, list) and len(v) > 0 and hasattr(v[0], "shape") and hasattr(v[0], "dtype"): + return f"[Tensor(dtype={v[0].dtype}, shape={v[0].shape}), ...]" + else: + return repr(v) + + values_str = "\n".join(f" {k}: {format_value(v)}" for k, v in self.values.items()) + kwargs_mapping_str = "\n".join(f" {k}: {v}" for k, v in self.kwargs_mapping.items()) + + return f"PipelineState(\n values={{\n{values_str}\n }},\n kwargs_mapping={{\n{kwargs_mapping_str}\n }}\n)" + + +@dataclass +class BlockState: + """ + Container for block state data with attribute access and formatted representation. + """ + + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + def __getitem__(self, key: str): + # allows block_state["foo"] + return getattr(self, key, None) + + def __setitem__(self, key: str, value: Any): + # allows block_state["foo"] = "bar" + setattr(self, key, value) + + def as_dict(self): + """ + Convert BlockState to a dictionary. + + Returns: + dict[str, Any]: Dictionary containing all attributes of the BlockState + """ + return dict(self.__dict__.items()) + + def __repr__(self): + def format_value(v): + # Handle tensors directly + if hasattr(v, "shape") and hasattr(v, "dtype"): + return f"Tensor(dtype={v.dtype}, shape={v.shape})" + + # Handle lists of tensors + elif isinstance(v, list): + if len(v) > 0 and hasattr(v[0], "shape") and hasattr(v[0], "dtype"): + shapes = [t.shape for t in v] + return f"list[{len(v)}] of Tensors with shapes {shapes}" + return repr(v) + + # Handle tuples of tensors + elif isinstance(v, tuple): + if len(v) > 0 and hasattr(v[0], "shape") and hasattr(v[0], "dtype"): + shapes = [t.shape for t in v] + return f"tuple[{len(v)}] of Tensors with shapes {shapes}" + return repr(v) + + # Handle dicts with tensor values + elif isinstance(v, dict): + formatted_dict = {} + for k, val in v.items(): + if hasattr(val, "shape") and hasattr(val, "dtype"): + formatted_dict[k] = f"Tensor(shape={val.shape}, dtype={val.dtype})" + elif ( + isinstance(val, list) + and len(val) > 0 + and hasattr(val[0], "shape") + and hasattr(val[0], "dtype") + ): + shapes = [t.shape for t in val] + formatted_dict[k] = f"list[{len(val)}] of Tensors with shapes {shapes}" + else: + formatted_dict[k] = repr(val) + return formatted_dict + + # Default case + return repr(v) + + attributes = "\n".join(f" {k}: {format_value(v)}" for k, v in self.__dict__.items()) + return f"BlockState(\n{attributes}\n)" + + +class ModularPipelineBlocks(ConfigMixin, PushToHubMixin): + """ + Base class for all Pipeline Blocks: ConditionalPipelineBlocks, AutoPipelineBlocks, SequentialPipelineBlocks, + LoopSequentialPipelineBlocks + + [`ModularPipelineBlocks`] provides method to load and save the definition of pipeline blocks. + + > [!WARNING] > This is an experimental feature and is likely to change in the future. + """ + + config_name = "modular_config.json" + model_name = None + _requirements: dict[str, str] | None = None + _workflow_map = None + + @classmethod + def _get_signature_keys(cls, obj): + parameters = inspect.signature(obj.__init__).parameters + required_parameters = {k: v for k, v in parameters.items() if v.default == inspect._empty} + optional_parameters = set({k for k, v in parameters.items() if v.default != inspect._empty}) + expected_modules = set(required_parameters.keys()) - {"self"} + + return expected_modules, optional_parameters + + def __init__(self): + self.sub_blocks = InsertableDict() + + @property + def description(self) -> str: + """Description of the block. Must be implemented by subclasses.""" + return "" + + @property + def expected_components(self) -> list[ComponentSpec]: + return [] + + @property + def expected_configs(self) -> list[ConfigSpec]: + return [] + + @property + def inputs(self) -> list[InputParam]: + """list of input parameters. Must be implemented by subclasses.""" + return [] + + def _get_required_inputs(self): + input_names = [] + for input_param in self.inputs: + if input_param.required: + input_names.append(input_param.name) + + return input_names + + @property + def required_inputs(self) -> list[InputParam]: + return self._get_required_inputs() + + @property + def intermediate_outputs(self) -> list[OutputParam]: + """list of intermediate output parameters. Must be implemented by subclasses.""" + return [] + + def _get_outputs(self): + return self.intermediate_outputs + + @property + def outputs(self) -> list[OutputParam]: + return self._get_outputs() + + # currentlyonly ConditionalPipelineBlocks and SequentialPipelineBlocks support `get_execution_blocks` + def get_execution_blocks(self, **kwargs): + """ + Get the block(s) that would execute given the inputs. Must be implemented by subclasses that support + conditional block selection. + + Args: + **kwargs: Input names and values. Only trigger inputs affect block selection. + """ + raise NotImplementedError(f"`get_execution_blocks` is not implemented for {self.__class__.__name__}") + + # currently only SequentialPipelineBlocks support workflows + @property + def available_workflows(self): + """ + Returns a list of available workflow names. Must be implemented by subclasses that define `_workflow_map`. + """ + raise NotImplementedError(f"`available_workflows` is not implemented for {self.__class__.__name__}") + + def get_workflow(self, workflow_name: str): + """ + Get the execution blocks for a specific workflow. Must be implemented by subclasses that define + `_workflow_map`. + + Args: + workflow_name: Name of the workflow to retrieve. + """ + raise NotImplementedError(f"`get_workflow` is not implemented for {self.__class__.__name__}") + + @classmethod + def from_pretrained( + cls, + pretrained_model_name_or_path: str, + trust_remote_code: bool = False, + **kwargs, + ): + hub_kwargs_names = [ + "cache_dir", + "force_download", + "local_files_only", + "local_dir", + "proxies", + "revision", + "subfolder", + "token", + ] + hub_kwargs = {name: kwargs.pop(name) for name in hub_kwargs_names if name in kwargs} + + config = cls.load_config(pretrained_model_name_or_path, **hub_kwargs) + has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"] + trust_remote_code = resolve_trust_remote_code( + trust_remote_code, pretrained_model_name_or_path, has_remote_code + ) + if not has_remote_code and trust_remote_code: + raise ValueError( + "Selected model repository does not happear to have any custom code or does not have a valid `config.json` file." + ) + + if "requirements" in config and config["requirements"] is not None: + _ = _validate_requirements(config["requirements"]) + + class_ref = config["auto_map"][cls.__name__] + module_file, class_name = class_ref.split(".") + module_file = module_file + ".py" + block_cls = get_class_from_dynamic_module( + pretrained_model_name_or_path, + module_file=module_file, + class_name=class_name, + **hub_kwargs, + ) + expected_kwargs, optional_kwargs = block_cls._get_signature_keys(block_cls) + block_kwargs = { + name: kwargs.get(name) for name in kwargs if name in expected_kwargs or name in optional_kwargs + } + + return block_cls(**block_kwargs) + + def save_pretrained(self, save_directory, push_to_hub=False, **kwargs): + # TODO: factor out this logic. + cls_name = self.__class__.__name__ + + full_mod = type(self).__module__ + module = full_mod.rsplit(".", 1)[-1].replace("__dynamic__", "") + parent_module = self.save_pretrained.__func__.__qualname__.split(".", 1)[0] + auto_map = {f"{parent_module}": f"{module}.{cls_name}"} + self.register_to_config(auto_map=auto_map) + + # resolve requirements + requirements = _validate_requirements(getattr(self, "_requirements", None)) + if requirements: + self.register_to_config(requirements=requirements) + + self.save_config(save_directory=save_directory, push_to_hub=push_to_hub, **kwargs) + config = dict(self.config) + self._internal_dict = FrozenDict(config) + + def init_pipeline( + self, + pretrained_model_name_or_path: str | os.PathLike | None = None, + components_manager: ComponentsManager | None = None, + collection: str | None = None, + ) -> "ModularPipeline": + """ + create a ModularPipeline, optionally accept pretrained_model_name_or_path to load from hub. + """ + map_fn = MODULAR_PIPELINE_MAPPING.get(self.model_name, _create_default_map_fn("ModularPipeline")) + pipeline_class_name = map_fn() + diffusers_module = importlib.import_module("diffusers") + pipeline_class = getattr(diffusers_module, pipeline_class_name) + + modular_pipeline = pipeline_class( + blocks=deepcopy(self), + pretrained_model_name_or_path=pretrained_model_name_or_path, + components_manager=components_manager, + collection=collection, + ) + return modular_pipeline + + def get_block_state(self, state: PipelineState) -> dict: + """Get all inputs and intermediates in one dictionary""" + data = {} + state_inputs = self.inputs + + # Check inputs + for input_param in state_inputs: + if input_param.name: + value = state.get(input_param.name) + if input_param.required and value is None: + raise ValueError(f"Required input '{input_param.name}' is missing") + elif value is not None or (value is None and input_param.name not in data): + data[input_param.name] = value + + elif input_param.kwargs_type: + # if kwargs_type is provided, get all inputs with matching kwargs_type + if input_param.kwargs_type not in data: + data[input_param.kwargs_type] = {} + inputs_kwargs = state.get_by_kwargs(input_param.kwargs_type) + if inputs_kwargs: + for k, v in inputs_kwargs.items(): + if v is not None: + data[k] = v + data[input_param.kwargs_type][k] = v + + return BlockState(**data) + + def set_block_state(self, state: PipelineState, block_state: BlockState): + for output_param in self.intermediate_outputs: + if not hasattr(block_state, output_param.name): + raise ValueError(f"Intermediate output '{output_param.name}' is missing in block state") + param = getattr(block_state, output_param.name) + state.set(output_param.name, param, output_param.kwargs_type) + + for input_param in self.inputs: + if input_param.name and hasattr(block_state, input_param.name): + param = getattr(block_state, input_param.name) + # Only add if the value is different from what's in the state + current_value = state.get(input_param.name) + if current_value is not param: # Using identity comparison to check if object was modified + state.set(input_param.name, param, input_param.kwargs_type) + + elif input_param.kwargs_type: + # if it is a kwargs type, e.g. "denoiser_input_fields", it is likely to be a list of parameters + # we need to first find out which inputs are and loop through them. + intermediate_kwargs = state.get_by_kwargs(input_param.kwargs_type) + for param_name, current_value in intermediate_kwargs.items(): + if param_name is None: + continue + + if not hasattr(block_state, param_name): + continue + + param = getattr(block_state, param_name) + if current_value is not param: # Using identity comparison to check if object was modified + state.set(param_name, param, input_param.kwargs_type) + + @property + def input_names(self) -> list[str]: + return [input_param.name for input_param in self.inputs if input_param.name is not None] + + @property + def intermediate_output_names(self) -> list[str]: + return [output_param.name for output_param in self.intermediate_outputs if output_param.name is not None] + + @property + def output_names(self) -> list[str]: + return [output_param.name for output_param in self.outputs if output_param.name is not None] + + @property + def component_names(self) -> list[str]: + return [component.name for component in self.expected_components] + + @property + def doc(self): + return make_doc_string( + self.inputs, + self.outputs, + self.description, + class_name=self.__class__.__name__, + expected_components=self.expected_components, + expected_configs=self.expected_configs, + ) + + +class ConditionalPipelineBlocks(ModularPipelineBlocks): + """ + A Pipeline Blocks that conditionally selects a block to run based on the inputs. Subclasses must implement the + `select_block` method to define the logic for selecting the block. Currently, we only support selection logic based + on the presence or absence of inputs (i.e., whether they are `None` or not) + + This class inherits from [`ModularPipelineBlocks`]. Check the superclass documentation for the generic methods the + library implements for all the pipeline blocks (such as loading or saving etc.) + + > [!WARNING] > This is an experimental feature and is likely to change in the future. + + Attributes: + block_classes: List of block classes to be used. Must have the same length as `block_names`. + block_names: List of names for each block. Must have the same length as `block_classes`. + block_trigger_inputs: List of input names that `select_block()` uses to determine which block to run. + For `ConditionalPipelineBlocks`, this does not need to correspond to `block_names` and `block_classes`. For + `AutoPipelineBlocks`, this must have the same length as `block_names` and `block_classes`, where each + element specifies the trigger input for the corresponding block. + default_block_name: Name of the default block to run when no trigger inputs match. + If None, this block can be skipped entirely when no trigger inputs are provided. + """ + + block_classes = [] + block_names = [] + block_trigger_inputs = [] + default_block_name = None + + def __init__(self): + sub_blocks = InsertableDict() + for block_name, block in zip(self.block_names, self.block_classes): + if inspect.isclass(block): + sub_blocks[block_name] = block() + else: + sub_blocks[block_name] = block + self.sub_blocks = sub_blocks + if not (len(self.block_classes) == len(self.block_names)): + raise ValueError( + f"In {self.__class__.__name__}, the number of block_classes and block_names must be the same." + ) + if self.default_block_name is not None and self.default_block_name not in self.block_names: + raise ValueError( + f"In {self.__class__.__name__}, default_block_name '{self.default_block_name}' must be one of block_names: {self.block_names}" + ) + + @property + def model_name(self): + return next(iter(self.sub_blocks.values())).model_name + + @property + def description(self): + return "" + + @property + def expected_components(self): + expected_components = [] + for block in self.sub_blocks.values(): + for component in block.expected_components: + if component not in expected_components: + expected_components.append(component) + return expected_components + + @property + def expected_configs(self): + expected_configs = [] + for block in self.sub_blocks.values(): + for config in block.expected_configs: + if config not in expected_configs: + expected_configs.append(config) + return expected_configs + + @property + def required_inputs(self) -> list[str]: + # no default block means this conditional block can be skipped entirely + if self.default_block_name is None: + return [] + + first_block = next(iter(self.sub_blocks.values())) + required_by_all = set(getattr(first_block, "required_inputs", set())) + + # Intersect with required inputs from all other blocks + for block in list(self.sub_blocks.values())[1:]: + block_required = set(getattr(block, "required_inputs", set())) + required_by_all.intersection_update(block_required) + + return list(required_by_all) + + @property + def inputs(self) -> list[tuple[str, Any]]: + named_inputs = [(name, block.inputs) for name, block in self.sub_blocks.items()] + combined_inputs = combine_inputs(*named_inputs) + # mark Required inputs only if that input is required by all the blocks + for input_param in combined_inputs: + if input_param.name in self.required_inputs: + input_param.required = True + else: + input_param.required = False + return combined_inputs + + @property + def intermediate_outputs(self) -> list[str]: + named_outputs = [(name, block.intermediate_outputs) for name, block in self.sub_blocks.items()] + combined_outputs = combine_outputs(*named_outputs) + return combined_outputs + + @property + def outputs(self) -> list[str]: + named_outputs = [(name, block.outputs) for name, block in self.sub_blocks.items()] + combined_outputs = combine_outputs(*named_outputs) + return combined_outputs + + @property + # Copied from diffusers.modular_pipelines.modular_pipeline.SequentialPipelineBlocks._requirements + def _requirements(self) -> dict[str, str]: + requirements = {} + for block_name, block in self.sub_blocks.items(): + if getattr(block, "_requirements", None): + requirements[block_name] = block._requirements + return requirements + + # used for `__repr__` + def _get_trigger_inputs(self) -> set: + """ + Returns a set of all unique trigger input values found in this block and nested blocks. + """ + + def fn_recursive_get_trigger(blocks): + trigger_values = set() + + if blocks is not None: + for name, block in blocks.items(): + # Check if current block has block_trigger_inputs + if hasattr(block, "block_trigger_inputs") and block.block_trigger_inputs is not None: + trigger_values.update(t for t in block.block_trigger_inputs if t is not None) + + # If block has sub_blocks, recursively check them + if block.sub_blocks: + nested_triggers = fn_recursive_get_trigger(block.sub_blocks) + trigger_values.update(nested_triggers) + + return trigger_values + + # Start with this block's block_trigger_inputs + all_triggers = {t for t in self.block_trigger_inputs if t is not None} + # Add nested triggers + all_triggers.update(fn_recursive_get_trigger(self.sub_blocks)) + + return all_triggers + + def select_block(self, **kwargs) -> str | None: + """ + Select the block to run based on the trigger inputs. Subclasses must implement this method to define the logic + for selecting the block. + + Note: When trigger inputs include intermediate outputs from earlier blocks, the selection logic should only + depend on the presence or absence of the input (i.e., whether it is None or not), not on its actual value. This + is because `get_execution_blocks()` resolves conditions statically by propagating intermediate output names + without their runtime values. + + Args: + **kwargs: Trigger input names and their values from the state. + + Returns: + str | None: The name of the block to run, or None to use default/skip. + """ + raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement the `select_block` method.") + + @torch.no_grad() + def __call__(self, pipeline, state: PipelineState) -> PipelineState: + trigger_kwargs = {name: state.get(name) for name in self.block_trigger_inputs if name is not None} + block_name = self.select_block(**trigger_kwargs) + + if block_name is None: + block_name = self.default_block_name + + if block_name is None: + logger.info(f"skipping conditional block: {self.__class__.__name__}") + return pipeline, state + + block = self.sub_blocks[block_name] + + try: + logger.info(f"Running block: {block.__class__.__name__}") + return block(pipeline, state) + except Exception as e: + error_msg = ( + f"\nError in block: {block.__class__.__name__}\n" + f"Error details: {str(e)}\n" + f"Traceback:\n{traceback.format_exc()}" + ) + logger.error(error_msg) + raise + + def get_execution_blocks(self, **kwargs) -> ModularPipelineBlocks | None: + """ + Get the block(s) that would execute given the inputs. + + Recursively resolves nested ConditionalPipelineBlocks until reaching either: + - A leaf block (no sub_blocks or LoopSequentialPipelineBlocks) → returns single `ModularPipelineBlocks` + - A `SequentialPipelineBlocks` → delegates to its `get_execution_blocks()` which returns + a `SequentialPipelineBlocks` containing the resolved execution blocks + + Args: + **kwargs: Input names and values. Only trigger inputs affect block selection. + + Returns: + - `ModularPipelineBlocks`: A leaf block or resolved `SequentialPipelineBlocks` + - `None`: If this block would be skipped (no trigger matched and no default) + """ + trigger_kwargs = {name: kwargs.get(name) for name in self.block_trigger_inputs if name is not None} + block_name = self.select_block(**trigger_kwargs) + + if block_name is None: + block_name = self.default_block_name + + if block_name is None: + return None + + block = self.sub_blocks[block_name] + + # Recursively resolve until we hit a leaf block + if block.sub_blocks and not isinstance(block, LoopSequentialPipelineBlocks): + return block.get_execution_blocks(**kwargs) + + return block + + def __repr__(self): + class_name = self.__class__.__name__ + base_class = self.__class__.__bases__[0].__name__ + header = ( + f"{class_name}(\n Class: {base_class}\n" if base_class and base_class != "object" else f"{class_name}(\n" + ) + + if self._get_trigger_inputs(): + header += "\n" + header += " " + "=" * 100 + "\n" + header += " This pipeline contains blocks that are selected at runtime based on inputs.\n" + header += f" Trigger Inputs: {sorted(self._get_trigger_inputs())}\n" + header += " " + "=" * 100 + "\n\n" + + # Format description with proper indentation + desc_lines = self.description.split("\n") + desc = [] + # First line with "Description:" label + desc.append(f" Description: {desc_lines[0]}") + # Subsequent lines with proper indentation + if len(desc_lines) > 1: + desc.extend(f" {line}" for line in desc_lines[1:]) + desc = "\n".join(desc) + "\n" + + # Components section - focus only on expected components + expected_components = getattr(self, "expected_components", []) + components_str = format_components(expected_components, indent_level=2, add_empty_lines=False) + + # Configs section - use format_configs with add_empty_lines=False + expected_configs = getattr(self, "expected_configs", []) + configs_str = format_configs(expected_configs, indent_level=2, add_empty_lines=False) + + # Blocks section + blocks_str = " Sub-Blocks:\n" + for i, (name, block) in enumerate(self.sub_blocks.items()): + if name == self.default_block_name: + addtional_str = " [default]" + else: + addtional_str = "" + blocks_str += f" • {name}{addtional_str} ({block.__class__.__name__})\n" + + # Add block description + block_desc_lines = block.description.split("\n") + indented_desc = block_desc_lines[0] + if len(block_desc_lines) > 1: + indented_desc += "\n" + "\n".join(" " + line for line in block_desc_lines[1:]) + blocks_str += f" Description: {indented_desc}\n\n" + + # Build the representation with conditional sections + result = f"{header}\n{desc}" + + # Only add components section if it has content + if components_str.strip(): + result += f"\n\n{components_str}" + + # Only add configs section if it has content + if configs_str.strip(): + result += f"\n\n{configs_str}" + + # Always add blocks section + result += f"\n\n{blocks_str})" + + return result + + @property + def doc(self): + return make_doc_string( + self.inputs, + self.outputs, + self.description, + class_name=self.__class__.__name__, + expected_components=self.expected_components, + expected_configs=self.expected_configs, + ) + + +class AutoPipelineBlocks(ConditionalPipelineBlocks): + """ + A Pipeline Blocks that automatically selects a block to run based on the presence of trigger inputs. + + This is a specialized version of `ConditionalPipelineBlocks` where: + - Each block has one corresponding trigger input (1:1 mapping) + - Block selection is automatic: the first block whose trigger input is present gets selected + - `block_trigger_inputs` must have the same length as `block_names` and `block_classes` + - Use `None` in `block_trigger_inputs` to specify the default block, i.e the block that will run if no trigger + inputs are present + + Attributes: + block_classes: + List of block classes to be used. Must have the same length as `block_names` and + `block_trigger_inputs`. + block_names: + List of names for each block. Must have the same length as `block_classes` and `block_trigger_inputs`. + block_trigger_inputs: + List of input names where each element specifies the trigger input for the corresponding block. Use + `None` to mark the default block. + + Example: + ```python + class MyAutoBlock(AutoPipelineBlocks): + block_classes = [InpaintEncoderBlock, ImageEncoderBlock, TextEncoderBlock] + block_names = ["inpaint", "img2img", "text2img"] + block_trigger_inputs = ["mask_image", "image", None] # text2img is the default + ``` + + With this definition: + - As long as `mask_image` is provided, "inpaint" block runs (regardless of `image` being provided or not) + - If `mask_image` is not provided but `image` is provided, "img2img" block runs + - Otherwise, "text2img" block runs (default, trigger is `None`) + """ + + def __init__(self): + super().__init__() + + if self.default_block_name is not None: + raise ValueError( + f"In {self.__class__.__name__}, do not set `default_block_name` for AutoPipelineBlocks. " + f"Use `None` in `block_trigger_inputs` to specify the default block." + ) + + if not (len(self.block_classes) == len(self.block_names) == len(self.block_trigger_inputs)): + raise ValueError( + f"In {self.__class__.__name__}, the number of block_classes, block_names, and block_trigger_inputs must be the same." + ) + + if None in self.block_trigger_inputs: + idx = self.block_trigger_inputs.index(None) + self.default_block_name = self.block_names[idx] + + def select_block(self, **kwargs) -> str | None: + """Select block based on which trigger input is present (not None).""" + for trigger_input, block_name in zip(self.block_trigger_inputs, self.block_names): + if trigger_input is not None and kwargs.get(trigger_input) is not None: + return block_name + return None + + +class SequentialPipelineBlocks(ModularPipelineBlocks): + """ + A Pipeline Blocks that combines multiple pipeline block classes into one. When called, it will call each block in + sequence. + + This class inherits from [`ModularPipelineBlocks`]. Check the superclass documentation for the generic methods the + library implements for all the pipeline blocks (such as loading or saving etc.) + + > [!WARNING] > This is an experimental feature and is likely to change in the future. + + Attributes: + block_classes: list of block classes to be used + block_names: list of prefixes for each block + """ + + block_classes = [] + block_names = [] + + @property + def description(self): + return "" + + @property + def model_name(self): + return next((block.model_name for block in self.sub_blocks.values() if block.model_name is not None), None) + + @property + def expected_components(self): + expected_components = [] + for block in self.sub_blocks.values(): + for component in block.expected_components: + if component not in expected_components: + expected_components.append(component) + return expected_components + + @property + def expected_configs(self): + expected_configs = [] + for block in self.sub_blocks.values(): + for config in block.expected_configs: + if config not in expected_configs: + expected_configs.append(config) + return expected_configs + + @property + def available_workflows(self): + if self._workflow_map is None: + raise NotImplementedError( + f"workflows is not supported because _workflow_map is not set for {self.__class__.__name__}" + ) + + return list(self._workflow_map.keys()) + + def get_workflow(self, workflow_name: str): + if self._workflow_map is None: + raise NotImplementedError( + f"workflows is not supported because _workflow_map is not set for {self.__class__.__name__}" + ) + + if workflow_name not in self._workflow_map: + raise ValueError(f"Workflow {workflow_name} not found in {self.__class__.__name__}") + + trigger_inputs = self._workflow_map[workflow_name] + workflow_blocks = self.get_execution_blocks(**trigger_inputs) + + return workflow_blocks + + @classmethod + def from_blocks_dict( + cls, blocks_dict: dict[str, Any], description: str | None = None + ) -> "SequentialPipelineBlocks": + """Creates a SequentialPipelineBlocks instance from a dictionary of blocks. + + Args: + blocks_dict: Dictionary mapping block names to block classes or instances + + Returns: + A new SequentialPipelineBlocks instance + """ + instance = cls() + + # Create instances if classes are provided + sub_blocks = InsertableDict() + for name, block in blocks_dict.items(): + if inspect.isclass(block): + sub_blocks[name] = block() + else: + sub_blocks[name] = block + + instance.block_classes = [block.__class__ for block in sub_blocks.values()] + instance.block_names = list(sub_blocks.keys()) + instance.sub_blocks = sub_blocks + + if description is not None: + instance.description = description + + return instance + + def __init__(self): + sub_blocks = InsertableDict() + for block_name, block in zip(self.block_names, self.block_classes): + if inspect.isclass(block): + sub_blocks[block_name] = block() + else: + sub_blocks[block_name] = block + self.sub_blocks = sub_blocks + if not len(self.block_names) == len(self.block_classes): + raise ValueError( + f"In {self.__class__.__name__}, the number of block_names and block_classes must be the same." + ) + + def _get_inputs(self): + inputs = [] + outputs = set() + + # Go through all blocks in order + for block in self.sub_blocks.values(): + # Add inputs that aren't in outputs yet + for inp in block.inputs: + if inp.name not in outputs and inp.name not in {input.name for input in inputs}: + inputs.append(inp) + + # Only add outputs if the block cannot be skipped + should_add_outputs = True + if isinstance(block, ConditionalPipelineBlocks) and block.default_block_name is None: + # ConditionalPipelineBlocks without default can be skipped + should_add_outputs = False + + if should_add_outputs: + # Add this block's outputs + block_intermediate_outputs = [out.name for out in block.intermediate_outputs] + outputs.update(block_intermediate_outputs) + + return inputs + + # YiYi TODO: add test for this + @property + def inputs(self) -> list[tuple[str, Any]]: + return self._get_inputs() + + @property + def required_inputs(self) -> list[str]: + # Get the first block from the dictionary + first_block = next(iter(self.sub_blocks.values())) + required_by_any = set(getattr(first_block, "required_inputs", set())) + + # Union with required inputs from all other blocks + for block in list(self.sub_blocks.values())[1:]: + block_required = set(getattr(block, "required_inputs", set())) + required_by_any.update(block_required) + + return list(required_by_any) + + @property + def intermediate_outputs(self) -> list[str]: + named_outputs = [] + for name, block in self.sub_blocks.items(): + inp_names = {inp.name for inp in block.inputs} + # so we only need to list new variables as intermediate_outputs, but if user wants to list these they modified it's still fine (a.k.a we don't enforce) + # filter out them here so they do not end up as intermediate_outputs + if name not in inp_names: + named_outputs.append((name, block.intermediate_outputs)) + combined_outputs = combine_outputs(*named_outputs) + return combined_outputs + + # YiYi TODO: I think we can remove the outputs property + @property + def outputs(self) -> list[str]: + # return next(reversed(self.sub_blocks.values())).intermediate_outputs + return self.intermediate_outputs + + @torch.no_grad() + def __call__(self, pipeline, state: PipelineState) -> PipelineState: + for block_name, block in self.sub_blocks.items(): + try: + pipeline, state = block(pipeline, state) + except Exception as e: + error_msg = ( + f"\nError in block: ({block_name}, {block.__class__.__name__})\n" + f"Error details: {str(e)}\n" + f"Traceback:\n{traceback.format_exc()}" + ) + logger.error(error_msg) + raise + return pipeline, state + + # used for `__repr__` + def _get_trigger_inputs(self): + """ + Returns a set of all unique trigger input values found in the blocks. + """ + + def fn_recursive_get_trigger(blocks): + trigger_values = set() + + if blocks is not None: + for name, block in blocks.items(): + # Check if current block has block_trigger_inputs (ConditionalPipelineBlocks) + if hasattr(block, "block_trigger_inputs") and block.block_trigger_inputs is not None: + trigger_values.update(t for t in block.block_trigger_inputs if t is not None) + + # If block has sub_blocks, recursively check them + if block.sub_blocks: + nested_triggers = fn_recursive_get_trigger(block.sub_blocks) + trigger_values.update(nested_triggers) + + return trigger_values + + return fn_recursive_get_trigger(self.sub_blocks) + + def get_execution_blocks(self, **kwargs) -> "SequentialPipelineBlocks": + """ + Get the blocks that would execute given the specified inputs. + + As the traversal walks through sequential blocks, intermediate outputs from resolved blocks are added to the + active inputs. This means conditional blocks that depend on intermediates (e.g., "run img2img if image_latents + is present") will resolve correctly, as long as the condition is based on presence/absence (None or not None), + not on the actual value. + + + Args: + **kwargs: Input names and values. Only trigger inputs affect block selection. + + Returns: + SequentialPipelineBlocks containing only the blocks that would execute + """ + # Copy kwargs so we can add outputs as we traverse + active_inputs = dict(kwargs) + + def fn_recursive_traverse(block, block_name, active_inputs): + result_blocks = OrderedDict() + + # ConditionalPipelineBlocks (includes AutoPipelineBlocks) + if isinstance(block, ConditionalPipelineBlocks): + block = block.get_execution_blocks(**active_inputs) + if block is None: + return result_blocks + + # Has sub_blocks (SequentialPipelineBlocks/ConditionalPipelineBlocks) + if block.sub_blocks and not isinstance(block, LoopSequentialPipelineBlocks): + for sub_block_name, sub_block in block.sub_blocks.items(): + nested_blocks = fn_recursive_traverse(sub_block, sub_block_name, active_inputs) + nested_blocks = {f"{block_name}.{k}": v for k, v in nested_blocks.items()} + result_blocks.update(nested_blocks) + else: + # Leaf block: single ModularPipelineBlocks or LoopSequentialPipelineBlocks + result_blocks[block_name] = block + # Add outputs to active_inputs so subsequent blocks can use them as triggers + if hasattr(block, "intermediate_outputs"): + for out in block.intermediate_outputs: + active_inputs[out.name] = True + + return result_blocks + + all_blocks = OrderedDict() + for block_name, block in self.sub_blocks.items(): + nested_blocks = fn_recursive_traverse(block, block_name, active_inputs) + all_blocks.update(nested_blocks) + + return SequentialPipelineBlocks.from_blocks_dict(all_blocks) + + def __repr__(self): + class_name = self.__class__.__name__ + base_class = self.__class__.__bases__[0].__name__ + header = ( + f"{class_name}(\n Class: {base_class}\n" if base_class and base_class != "object" else f"{class_name}(\n" + ) + + if self._workflow_map is None and self._get_trigger_inputs(): + header += "\n" + header += " " + "=" * 100 + "\n" + header += " This pipeline contains blocks that are selected at runtime based on inputs.\n" + header += f" Trigger Inputs: {[inp for inp in self._get_trigger_inputs() if inp is not None]}\n" + # Get first trigger input as example + example_input = next(t for t in self._get_trigger_inputs() if t is not None) + header += f" Use `get_execution_blocks()` to see selected blocks (e.g. `get_execution_blocks({example_input}=...)`).\n" + header += " " + "=" * 100 + "\n\n" + + description = self.description + if self._workflow_map is not None: + workflow_str = format_workflow(self._workflow_map) + description = f"{self.description}\n\n{workflow_str}" + + # Format description with proper indentation + desc_lines = description.split("\n") + desc = [] + # First line with "Description:" label + desc.append(f" Description: {desc_lines[0]}") + # Subsequent lines with proper indentation + if len(desc_lines) > 1: + desc.extend(f" {line}" for line in desc_lines[1:]) + desc = "\n".join(desc) + "\n" + + # Components section - focus only on expected components + expected_components = getattr(self, "expected_components", []) + components_str = format_components(expected_components, indent_level=2, add_empty_lines=False) + + # Configs section - use format_configs with add_empty_lines=False + expected_configs = getattr(self, "expected_configs", []) + configs_str = format_configs(expected_configs, indent_level=2, add_empty_lines=False) + + # Blocks section - moved to the end with simplified format + blocks_str = " Sub-Blocks:\n" + for i, (name, block) in enumerate(self.sub_blocks.items()): + # show execution order + blocks_str += f" [{i}] {name} ({block.__class__.__name__})\n" + + # Add block description + desc_lines = block.description.split("\n") + indented_desc = desc_lines[0] + if len(desc_lines) > 1: + indented_desc += "\n" + "\n".join(" " + line for line in desc_lines[1:]) + blocks_str += f" Description: {indented_desc}\n\n" + + # Build the representation with conditional sections + result = f"{header}\n{desc}" + + # Only add components section if it has content + if components_str.strip(): + result += f"\n\n{components_str}" + + # Only add configs section if it has content + if configs_str.strip(): + result += f"\n\n{configs_str}" + + # Always add blocks section + result += f"\n\n{blocks_str})" + + return result + + @property + def doc(self): + description = self.description + if self._workflow_map is not None: + workflow_str = format_workflow(self._workflow_map) + description = f"{self.description}\n\n{workflow_str}" + + return make_doc_string( + self.inputs, + self.outputs, + description=description, + class_name=self.__class__.__name__, + expected_components=self.expected_components, + expected_configs=self.expected_configs, + ) + + @property + def _requirements(self) -> dict[str, str]: + requirements = {} + for block_name, block in self.sub_blocks.items(): + if getattr(block, "_requirements", None): + requirements[block_name] = block._requirements + return requirements + + +class LoopSequentialPipelineBlocks(ModularPipelineBlocks): + """ + A Pipeline blocks that combines multiple pipeline block classes into a For Loop. When called, it will call each + block in sequence. + + This class inherits from [`ModularPipelineBlocks`]. Check the superclass documentation for the generic methods the + library implements for all the pipeline blocks (such as loading or saving etc.) + + > [!WARNING] > This is an experimental feature and is likely to change in the future. + + Attributes: + block_classes: list of block classes to be used + block_names: list of prefixes for each block + """ + + model_name = None + block_classes = [] + block_names = [] + + @property + def description(self) -> str: + """Description of the block. Must be implemented by subclasses.""" + raise NotImplementedError("description method must be implemented in subclasses") + + @property + def loop_expected_components(self) -> list[ComponentSpec]: + return [] + + @property + def loop_expected_configs(self) -> list[ConfigSpec]: + return [] + + @property + def loop_inputs(self) -> list[InputParam]: + """list of input parameters. Must be implemented by subclasses.""" + return [] + + @property + def loop_required_inputs(self) -> list[str]: + input_names = [] + for input_param in self.loop_inputs: + if input_param.required: + input_names.append(input_param.name) + return input_names + + @property + def loop_intermediate_outputs(self) -> list[OutputParam]: + """list of intermediate output parameters. Must be implemented by subclasses.""" + return [] + + # modified from SequentialPipelineBlocks to include loop_expected_components + @property + def expected_components(self): + expected_components = [] + for block in self.sub_blocks.values(): + for component in block.expected_components: + if component not in expected_components: + expected_components.append(component) + for component in self.loop_expected_components: + if component not in expected_components: + expected_components.append(component) + return expected_components + + # modified from SequentialPipelineBlocks to include loop_expected_configs + @property + def expected_configs(self): + expected_configs = [] + for block in self.sub_blocks.values(): + for config in block.expected_configs: + if config not in expected_configs: + expected_configs.append(config) + for config in self.loop_expected_configs: + if config not in expected_configs: + expected_configs.append(config) + return expected_configs + + def _get_inputs(self): + inputs = [] + inputs.extend(self.loop_inputs) + outputs = set() + + for name, block in self.sub_blocks.items(): + # Add inputs that aren't in outputs yet + for inp in block.inputs: + if inp.name not in outputs and inp not in inputs: + inputs.append(inp) + + # Add this block's outputs + block_intermediate_outputs = [out.name for out in block.intermediate_outputs] + outputs.update(block_intermediate_outputs) + + for input_param in inputs: + if input_param.name in self.required_inputs: + input_param.required = True + else: + input_param.required = False + + return inputs + + @property + # Copied from diffusers.modular_pipelines.modular_pipeline.SequentialPipelineBlocks.inputs + def inputs(self): + return self._get_inputs() + + # modified from SequentialPipelineBlocks, if any additionan input required by the loop is required by the block + @property + def required_inputs(self) -> list[str]: + # Get the first block from the dictionary + first_block = next(iter(self.sub_blocks.values())) + required_by_any = set(getattr(first_block, "required_inputs", set())) + + required_by_loop = set(getattr(self, "loop_required_inputs", set())) + required_by_any.update(required_by_loop) + + # Union with required inputs from all other blocks + for block in list(self.sub_blocks.values())[1:]: + block_required = set(getattr(block, "required_inputs", set())) + required_by_any.update(block_required) + + return list(required_by_any) + + # YiYi TODO: this need to be thought about more + # modified from SequentialPipelineBlocks to include loop_intermediate_outputs + @property + def intermediate_outputs(self) -> list[str]: + named_outputs = [(name, block.intermediate_outputs) for name, block in self.sub_blocks.items()] + combined_outputs = combine_outputs(*named_outputs) + for output in self.loop_intermediate_outputs: + if output.name not in {output.name for output in combined_outputs}: + combined_outputs.append(output) + return combined_outputs + + # YiYi TODO: this need to be thought about more + @property + def outputs(self) -> list[str]: + return next(reversed(self.sub_blocks.values())).intermediate_outputs + + @property + # Copied from diffusers.modular_pipelines.modular_pipeline.SequentialPipelineBlocks._requirements + def _requirements(self) -> dict[str, str]: + requirements = {} + for block_name, block in self.sub_blocks.items(): + if getattr(block, "_requirements", None): + requirements[block_name] = block._requirements + return requirements + + def __init__(self): + sub_blocks = InsertableDict() + for block_name, block in zip(self.block_names, self.block_classes): + if inspect.isclass(block): + sub_blocks[block_name] = block() + else: + sub_blocks[block_name] = block + self.sub_blocks = sub_blocks + + # Validate that sub_blocks are only leaf blocks + for block_name, block in self.sub_blocks.items(): + if block.sub_blocks: + raise ValueError( + f"In {self.__class__.__name__}, sub_blocks must be leaf blocks (no sub_blocks). " + f"Block '{block_name}' ({block.__class__.__name__}) has sub_blocks." + ) + + @classmethod + def from_blocks_dict(cls, blocks_dict: dict[str, Any]) -> "LoopSequentialPipelineBlocks": + """ + Creates a LoopSequentialPipelineBlocks instance from a dictionary of blocks. + + Args: + blocks_dict: Dictionary mapping block names to block instances + + Returns: + A new LoopSequentialPipelineBlocks instance + """ + instance = cls() + + # Create instances if classes are provided + sub_blocks = InsertableDict() + for name, block in blocks_dict.items(): + if inspect.isclass(block): + sub_blocks[name] = block() + else: + sub_blocks[name] = block + + instance.block_classes = [block.__class__ for block in blocks_dict.values()] + instance.block_names = list(blocks_dict.keys()) + instance.sub_blocks = blocks_dict + return instance + + def loop_step(self, components, state: PipelineState, **kwargs): + for block_name, block in self.sub_blocks.items(): + try: + components, state = block(components, state, **kwargs) + except Exception as e: + error_msg = ( + f"\nError in block: ({block_name}, {block.__class__.__name__})\n" + f"Error details: {str(e)}\n" + f"Traceback:\n{traceback.format_exc()}" + ) + logger.error(error_msg) + raise + return components, state + + def __call__(self, components, state: PipelineState) -> PipelineState: + raise NotImplementedError("`__call__` method needs to be implemented by the subclass") + + @property + def doc(self): + return make_doc_string( + self.inputs, + self.outputs, + self.description, + class_name=self.__class__.__name__, + expected_components=self.expected_components, + expected_configs=self.expected_configs, + ) + + # modified from SequentialPipelineBlocks, + # (does not need trigger_inputs related part so removed them, + # do not need to support auto block for loop blocks) + def __repr__(self): + class_name = self.__class__.__name__ + base_class = self.__class__.__bases__[0].__name__ + header = ( + f"{class_name}(\n Class: {base_class}\n" if base_class and base_class != "object" else f"{class_name}(\n" + ) + + # Format description with proper indentation + desc_lines = self.description.split("\n") + desc = [] + # First line with "Description:" label + desc.append(f" Description: {desc_lines[0]}") + # Subsequent lines with proper indentation + if len(desc_lines) > 1: + desc.extend(f" {line}" for line in desc_lines[1:]) + desc = "\n".join(desc) + "\n" + + # Components section - focus only on expected components + expected_components = getattr(self, "expected_components", []) + components_str = format_components(expected_components, indent_level=2, add_empty_lines=False) + + # Configs section - use format_configs with add_empty_lines=False + expected_configs = getattr(self, "expected_configs", []) + configs_str = format_configs(expected_configs, indent_level=2, add_empty_lines=False) + + # Blocks section - moved to the end with simplified format + blocks_str = " Sub-Blocks:\n" + for i, (name, block) in enumerate(self.sub_blocks.items()): + # For SequentialPipelineBlocks, show execution order + blocks_str += f" [{i}] {name} ({block.__class__.__name__})\n" + + # Add block description + desc_lines = block.description.split("\n") + indented_desc = desc_lines[0] + if len(desc_lines) > 1: + indented_desc += "\n" + "\n".join(" " + line for line in desc_lines[1:]) + blocks_str += f" Description: {indented_desc}\n\n" + + # Build the representation with conditional sections + result = f"{header}\n{desc}" + + # Only add components section if it has content + if components_str.strip(): + result += f"\n\n{components_str}" + + # Only add configs section if it has content + if configs_str.strip(): + result += f"\n\n{configs_str}" + + # Always add blocks section + result += f"\n\n{blocks_str})" + + return result + + @torch.compiler.disable + def progress_bar(self, iterable=None, total=None): + if not hasattr(self, "_progress_bar_config"): + self._progress_bar_config = {} + elif not isinstance(self._progress_bar_config, dict): + raise ValueError( + f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}." + ) + + if iterable is not None: + return tqdm(iterable, **self._progress_bar_config) + elif total is not None: + return tqdm(total=total, **self._progress_bar_config) + else: + raise ValueError("Either `total` or `iterable` has to be defined.") + + def set_progress_bar_config(self, **kwargs): + self._progress_bar_config = kwargs + + +# YiYi TODO: +# 1. look into the serialization of modular_model_index.json, make sure the items are properly ordered like model_index.json (currently a mess) +# 2. do we need ConfigSpec? the are basically just key/val kwargs +# 3. imnprove docstring and potentially add validator for methods where we accept kwargs to be passed to from_pretrained/save_pretrained/load_components() +class ModularPipeline(ConfigMixin, PushToHubMixin): + """ + Base class for all Modular pipelines. + + > [!WARNING] > This is an experimental feature and is likely to change in the future. + + Args: + blocks: ModularPipelineBlocks, the blocks to be used in the pipeline + """ + + config_name = "modular_model_index.json" + hf_device_map = None + default_blocks_name = None + + # YiYi TODO: add warning for passing multiple ComponentSpec/ConfigSpec with the same name + def __init__( + self, + blocks: ModularPipelineBlocks | None = None, + pretrained_model_name_or_path: str | os.PathLike | None = None, + components_manager: ComponentsManager | None = None, + collection: str | None = None, + modular_config_dict: dict[str, Any] | None = None, + config_dict: dict[str, Any] | None = None, + **kwargs, + ): + """ + Initialize a ModularPipeline instance. + + This method sets up the pipeline by: + - creating default pipeline blocks if not provided + - gather component and config specifications based on the pipeline blocks's requirement (e.g. + expected_components, expected_configs) + - update the loading specs of from_pretrained components based on the modular_model_index.json file from + huggingface hub if `pretrained_model_name_or_path` is provided + - create defaultfrom_config components and register everything + + Args: + blocks: `ModularPipelineBlocks` instance. If None, will attempt to load + default blocks based on the pipeline class name. + pretrained_model_name_or_path: Path to a pretrained pipeline configuration. Can be None if the pipeline + does not require any additional loading config. If provided, will first try to load component specs + (only for from_pretrained components) and config values from `modular_model_index.json`, then + fallback to `model_index.json` for compatibility with standard non-modular repositories. + components_manager: + Optional ComponentsManager for managing multiple component cross different pipelines and apply + offloading strategies. + collection: Optional collection name for organizing components in the ComponentsManager. + **kwargs: Additional arguments passed to `load_config()` when loading pretrained configuration. + + Examples: + ```python + # Initialize with custom blocks + pipeline = ModularPipeline(blocks=my_custom_blocks) + + # Initialize from pretrained configuration + pipeline = ModularPipeline(blocks=my_blocks, pretrained_model_name_or_path="my-repo/modular-pipeline") + + # Initialize with components manager + pipeline = ModularPipeline( + blocks=my_blocks, components_manager=ComponentsManager(), collection="my_collection" + ) + ``` + + Notes: + - If blocks is None, the method will try to find default blocks based on the pipeline class name + - Components with default_creation_method="from_config" are created immediately, its specs are not included + in config dict and will not be saved in `modular_model_index.json` + - Components with default_creation_method="from_pretrained" are set to None and can be loaded later with + `load_components()` (with or without specific component names) + - The pipeline's config dict is populated with component specs (only for from_pretrained components) and + config values, which will be saved as `modular_model_index.json` during `save_pretrained` + - The pipeline's config dict is also used to store the pipeline blocks's class name, which will be saved as + `_blocks_class_name` in the config dict + """ + + if modular_config_dict is None and config_dict is None and pretrained_model_name_or_path is not None: + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + token = kwargs.pop("token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + + load_config_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "token": token, + "local_files_only": local_files_only, + "revision": revision, + } + + modular_config_dict, config_dict = self._load_pipeline_config( + pretrained_model_name_or_path, **load_config_kwargs + ) + + if blocks is None: + if modular_config_dict is not None: + blocks_class_name = modular_config_dict.get("_blocks_class_name") + else: + blocks_class_name = self.default_blocks_name + if blocks_class_name is not None: + diffusers_module = importlib.import_module("diffusers") + blocks_class = getattr(diffusers_module, blocks_class_name, None) + # If the blocks_class is not found or is a base class (e.g. SequentialPipelineBlocks saved by from_blocks_dict) with empty block_classes + # fall back to default_blocks_name + if blocks_class is None or not blocks_class.block_classes: + blocks_class_name = self.default_blocks_name + blocks_class = getattr(diffusers_module, blocks_class_name) + + if blocks_class is not None: + blocks = blocks_class() + else: + logger.warning(f"`blocks` is `None`, no default blocks class found for {self.__class__.__name__}") + + self._blocks = blocks + self._components_manager = components_manager + self._collection = collection + self._component_specs = {spec.name: deepcopy(spec) for spec in self._blocks.expected_components} + self._config_specs = {spec.name: deepcopy(spec) for spec in self._blocks.expected_configs} + + # update component_specs and config_specs based on modular_model_index.json + if modular_config_dict is not None: + for name, value in modular_config_dict.items(): + # all the components in modular_model_index.json are from_pretrained components + if name in self._component_specs and isinstance(value, (tuple, list)) and len(value) == 3: + library, class_name, component_spec_dict = value + component_spec = self._dict_to_component_spec(name, component_spec_dict) + component_spec.default_creation_method = "from_pretrained" + self._component_specs[name] = component_spec + + elif name in self._config_specs: + self._config_specs[name].default = value + + # if `modular_config_dict` is None (i.e. `modular_model_index.json` is not found), update based on `config_dict` (i.e. `model_index.json`) + elif config_dict is not None: + for name, value in config_dict.items(): + if name in self._component_specs and isinstance(value, (tuple, list)) and len(value) == 2: + library, class_name = value + component_spec_dict = { + "repo": pretrained_model_name_or_path, + "subfolder": name, + "type_hint": (library, class_name), + } + component_spec = self._dict_to_component_spec(name, component_spec_dict) + component_spec.default_creation_method = "from_pretrained" + self._component_specs[name] = component_spec + elif name in self._config_specs: + self._config_specs[name].default = value + + if len(kwargs) > 0: + logger.warning(f"Unexpected input '{kwargs.keys()}' provided. This input will be ignored.") + + register_components_dict = {} + for name, component_spec in self._component_specs.items(): + if component_spec.default_creation_method == "from_config": + component = component_spec.create() + else: + component = None + register_components_dict[name] = component + self.register_components(**register_components_dict) + + default_configs = {} + for name, config_spec in self._config_specs.items(): + default_configs[name] = config_spec.default + self.register_to_config(**default_configs) + self.register_to_config( + _blocks_class_name=self._blocks.__class__.__name__ if self._blocks is not None else None + ) + + self._pretrained_model_name_or_path = pretrained_model_name_or_path + + @property + def default_call_parameters(self) -> dict[str, Any]: + """ + Returns: + - Dictionary mapping input names to their default values + """ + params = {} + for input_param in self._blocks.inputs: + params[input_param.name] = input_param.default + return params + + @classmethod + def _load_pipeline_config( + cls, + pretrained_model_name_or_path: str | os.PathLike | None, + **load_config_kwargs, + ): + try: + # try to load modular_model_index.json + modular_config_dict = cls.load_config(pretrained_model_name_or_path, **load_config_kwargs) + return modular_config_dict, None + + except EnvironmentError as e: + logger.debug(f" modular_model_index.json not found in the repo: {e}") + + try: + logger.debug(" try to load model_index.json") + from diffusers import DiffusionPipeline + + config_dict = DiffusionPipeline.load_config(pretrained_model_name_or_path, **load_config_kwargs) + return None, config_dict + + except EnvironmentError as e: + raise EnvironmentError( + f"Failed to load config from '{pretrained_model_name_or_path}'. " + f"Could not find or load 'modular_model_index.json' or 'model_index.json'." + ) from e + + return None, None + + @classmethod + @validate_hf_hub_args + def from_pretrained( + cls, + pretrained_model_name_or_path: str | os.PathLike | None, + trust_remote_code: bool | None = None, + components_manager: ComponentsManager | None = None, + collection: str | None = None, + **kwargs, + ): + """ + Load a ModularPipeline from a huggingface hub repo. + + Args: + pretrained_model_name_or_path (`str` or `os.PathLike`, optional): + Path to a pretrained pipeline configuration. It will first try to load config from + `modular_model_index.json`, then fallback to `model_index.json` for compatibility with standard + non-modular repositories. If the pretrained_model_name_or_path does not contain any pipeline config, it + will be set to None during initialization. + trust_remote_code (`bool`, optional): + Whether to trust remote code when loading the pipeline, need to be set to True if you want to create + pipeline blocks based on the custom code in `pretrained_model_name_or_path` + components_manager (`ComponentsManager`, optional): + ComponentsManager instance for managing multiple component cross different pipelines and apply + offloading strategies. + collection (`str`, optional):` + Collection name for organizing components in the ComponentsManager. + """ + from ..pipelines.pipeline_loading_utils import _get_pipeline_class + + try: + blocks = ModularPipelineBlocks.from_pretrained( + pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs + ) + except EnvironmentError as e: + logger.debug(f"EnvironmentError: {e}") + blocks = None + + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + token = kwargs.pop("token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + + load_config_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "token": token, + "local_files_only": local_files_only, + "revision": revision, + } + + modular_config_dict, config_dict = cls._load_pipeline_config( + pretrained_model_name_or_path, **load_config_kwargs + ) + + if modular_config_dict is not None: + pipeline_class = _get_pipeline_class(cls, config=modular_config_dict) + elif config_dict is not None: + from diffusers.pipelines.auto_pipeline import _get_model + + logger.debug(" try to determine the modular pipeline class from model_index.json") + standard_pipeline_class = _get_pipeline_class(cls, config=config_dict) + model_name = _get_model(standard_pipeline_class.__name__) + map_fn = MODULAR_PIPELINE_MAPPING.get(model_name, _create_default_map_fn("ModularPipeline")) + pipeline_class_name = map_fn(config_dict) + diffusers_module = importlib.import_module("diffusers") + pipeline_class = getattr(diffusers_module, pipeline_class_name) + else: + # there is no config for modular pipeline, assuming that the pipeline block does not need any from_pretrained components + pipeline_class = cls + pretrained_model_name_or_path = None + + pipeline = pipeline_class( + blocks=blocks, + pretrained_model_name_or_path=pretrained_model_name_or_path, + components_manager=components_manager, + collection=collection, + modular_config_dict=modular_config_dict, + config_dict=config_dict, + **kwargs, + ) + return pipeline + + def save_pretrained( + self, + save_directory: str | os.PathLike, + safe_serialization: bool = True, + variant: str | None = None, + max_shard_size: int | str | None = None, + push_to_hub: bool = False, + **kwargs, + ): + """ + Save the pipeline and all its components to a directory, so that it can be re-loaded using the + [`~ModularPipeline.from_pretrained`] class method. + + Args: + save_directory (`str` or `os.PathLike`): + Directory to save the pipeline to. Will be created if it doesn't exist. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`. + variant (`str`, *optional*): + If specified, weights are saved in the format `pytorch_model..bin`. + max_shard_size (`int` or `str`, defaults to `None`): + The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size + lower than this size. If expressed as a string, needs to be digits followed by a unit (like `"5GB"`). + If expressed as an integer, the unit is bytes. + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether to push the pipeline to the Hugging Face model hub after saving it. + **kwargs: Additional keyword arguments: + - `overwrite_modular_index` (`bool`, *optional*, defaults to `False`): + When saving a Modular Pipeline, its components in `modular_model_index.json` may reference repos + different from the destination repo. Setting this to `True` updates all component references in + `modular_model_index.json` so they point to the repo specified by `repo_id`. + - `repo_id` (`str`, *optional*): + The repository ID to push the pipeline to. Defaults to the last component of `save_directory`. + - `commit_message` (`str`, *optional*): + Commit message for the push to hub operation. + - `private` (`bool`, *optional*): + Whether the repository should be private. + - `create_pr` (`bool`, *optional*, defaults to `False`): + Whether to create a pull request instead of pushing directly. + - `token` (`str`, *optional*): + The Hugging Face token to use for authentication. + """ + overwrite_modular_index = kwargs.pop("overwrite_modular_index", False) + repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + + if push_to_hub: + commit_message = kwargs.pop("commit_message", None) + private = kwargs.pop("private", None) + create_pr = kwargs.pop("create_pr", False) + token = kwargs.pop("token", None) + update_model_card = kwargs.pop("update_model_card", False) + repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id + + for component_name, component_spec in self._component_specs.items(): + if component_spec.default_creation_method != "from_pretrained": + continue + + component = getattr(self, component_name, None) + if component is None: + continue + + model_cls = component.__class__ + if is_compiled_module(component): + component = _unwrap_model(component) + model_cls = component.__class__ + + save_method_name = None + for library_name, library_classes in LOADABLE_CLASSES.items(): + if library_name in sys.modules: + library = importlib.import_module(library_name) + else: + logger.info( + f"{library_name} is not installed. Cannot save {component_name} as {library_classes} from {library_name}" + ) + continue + + for base_class, save_load_methods in library_classes.items(): + class_candidate = getattr(library, base_class, None) + if class_candidate is not None and issubclass(model_cls, class_candidate): + save_method_name = save_load_methods[0] + break + if save_method_name is not None: + break + + if save_method_name is None: + logger.warning(f"self.{component_name}={component} of type {type(component)} cannot be saved.") + continue + + save_method = getattr(component, save_method_name) + save_method_signature = inspect.signature(save_method) + save_method_accept_safe = "safe_serialization" in save_method_signature.parameters + save_method_accept_variant = "variant" in save_method_signature.parameters + save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters + + save_kwargs = {} + if save_method_accept_safe: + save_kwargs["safe_serialization"] = safe_serialization + if save_method_accept_variant: + save_kwargs["variant"] = variant + if save_method_accept_max_shard_size and max_shard_size is not None: + save_kwargs["max_shard_size"] = max_shard_size + + component_save_path = os.path.join(save_directory, component_name) + save_method(component_save_path, **save_kwargs) + + if component_name not in self.config: + continue + + has_no_load_id = not hasattr(component, "_diffusers_load_id") or component._diffusers_load_id == "null" + if overwrite_modular_index or has_no_load_id: + library, class_name, component_spec_dict = self.config[component_name] + component_spec_dict["pretrained_model_name_or_path"] = repo_id if push_to_hub else save_directory + component_spec_dict["subfolder"] = component_name + self.register_to_config(**{component_name: (library, class_name, component_spec_dict)}) + + self.save_config(save_directory=save_directory) + + if push_to_hub: + card_content = generate_modular_model_card_content(self.blocks) + model_card = load_or_create_model_card( + repo_id, + token=token, + is_pipeline=True, + model_description=MODULAR_MODEL_CARD_TEMPLATE.format(**card_content), + is_modular=True, + update_model_card=update_model_card, + ) + model_card = populate_model_card(model_card, tags=card_content["tags"]) + model_card.save(os.path.join(save_directory, "README.md")) + + self._upload_folder( + save_directory, + repo_id, + token=token, + commit_message=commit_message, + create_pr=create_pr, + ) + + @property + def doc(self): + """ + Returns: + - The docstring of the pipeline blocks + """ + return self._blocks.doc + + @property + def blocks(self) -> ModularPipelineBlocks: + """ + Returns: + - A copy of the pipeline blocks + """ + return deepcopy(self._blocks) + + def register_components(self, **kwargs): + """ + Register components with their corresponding specifications. + + This method is responsible for: + 1. Sets component objects as attributes on the loader (e.g., self.unet = unet) + 2. Updates the config dict, which will be saved as `modular_model_index.json` during `save_pretrained` (only + for from_pretrained components) + 3. Adds components to the component manager if one is attached (only for from_pretrained components) + + This method is called when: + - Components are first initialized in __init__: + - from_pretrained components not loaded during __init__ so they are registered as None; + - non from_pretrained components are created during __init__ and registered as the object itself + - Components are updated with the `update_components()` method: e.g. loader.update_components(unet=unet) or + loader.update_components(guider=guider_spec) + - (from_pretrained) Components are loaded with the `load_components()` method: e.g. + loader.load_components(names=["unet"]) or loader.load_components() to load all default components + + Args: + **kwargs: Keyword arguments where keys are component names and values are component objects. + E.g., register_components(unet=unet_model, text_encoder=encoder_model) + + Notes: + - When registering None for a component, it sets attribute to None but still syncs specs with the config + dict, which will be saved as `modular_model_index.json` during `save_pretrained` + - component_specs are updated to match the new component outside of this method, e.g. in + `update_components()` method + """ + for name, module in kwargs.items(): + # current component spec + component_spec = self._component_specs.get(name) + if component_spec is None: + logger.warning(f"ModularPipeline.register_components: skipping unknown component '{name}'") + continue + + # check if it is the first time registration, i.e. calling from __init__ + is_registered = hasattr(self, name) + is_from_pretrained = component_spec.default_creation_method == "from_pretrained" + + if module is not None: + # actual library and class name of the module + library, class_name = _fetch_class_library_tuple(module) # e.g. ("diffusers", "UNet2DConditionModel") + else: + # if module is None, e.g. self.register_components(unet=None) during __init__ + # we do not update the spec, + # but we still need to update the modular_model_index.json config based on component spec + library, class_name = None, None + + # extract the loading spec from the updated component spec that'll be used as part of modular_model_index.json config + # e.g. {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-2-1", + # "type_hint": ("diffusers", "UNet2DConditionModel"), + # "subfolder": "unet", + # "variant": None, + # "revision": None} + component_spec_dict = self._component_spec_to_dict(component_spec) + + register_dict = {name: (library, class_name, component_spec_dict)} + + # set the component as attribute + # if it is not set yet, just set it and skip the process to check and warn below + if not is_registered: + if is_from_pretrained: + self.register_to_config(**register_dict) + setattr(self, name, module) + if module is not None and is_from_pretrained and self._components_manager is not None: + self._components_manager.add(name, module, self._collection) + continue + + current_module = getattr(self, name, None) + # skip if the component is already registered with the same object + if current_module is module: + logger.info( + f"ModularPipeline.register_components: {name} is already registered with same object, skipping" + ) + continue + + # warn if unregister + if current_module is not None and module is None: + logger.info( + f"ModularPipeline.register_components: setting '{name}' to None " + f"(was {current_module.__class__.__name__})" + ) + # same type, new instance → replace but send debug log + elif ( + current_module is not None + and module is not None + and isinstance(module, current_module.__class__) + and current_module != module + ): + logger.debug( + f"ModularPipeline.register_components: replacing existing '{name}' " + f"(same type {type(current_module).__name__}, new instance)" + ) + + # update modular_model_index.json config + if is_from_pretrained: + self.register_to_config(**register_dict) + # finally set models + setattr(self, name, module) + # add to component manager if one is attached + if module is not None and is_from_pretrained and self._components_manager is not None: + self._components_manager.add(name, module, self._collection) + + @property + def device(self) -> torch.device: + r""" + Returns: + `torch.device`: The torch device on which the pipeline is located. + """ + modules = self.components.values() + modules = [m for m in modules if isinstance(m, torch.nn.Module)] + + for module in modules: + return module.device + + return torch.device("cpu") + + @property + # Modified from diffusers.pipelines.pipeline_utils.DiffusionPipeline._execution_device + def _execution_device(self): + r""" + Returns the device on which the pipeline's models will be executed. After calling + [`~DiffusionPipeline.enable_sequential_cpu_offload`] the execution device can only be inferred from + Accelerate's module hooks. + """ + for name, model in self.components.items(): + if not isinstance(model, torch.nn.Module): + continue + + if not hasattr(model, "_hf_hook"): + return self.device + for module in model.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return self.device + + @property + def dtype(self) -> torch.dtype: + r""" + Returns: + `torch.dtype`: The torch dtype on which the pipeline is located. + """ + modules = self.components.values() + modules = [m for m in modules if isinstance(m, torch.nn.Module)] + + for module in modules: + return module.dtype + + return torch.float32 + + @property + def null_component_names(self) -> list[str]: + """ + Returns: + - list of names for components that needs to be loaded + """ + return [name for name in self._component_specs.keys() if hasattr(self, name) and getattr(self, name) is None] + + @property + def component_names(self) -> list[str]: + """ + Returns: + - list of names for all components + """ + return list(self.components.keys()) + + @property + def pretrained_component_names(self) -> list[str]: + """ + Returns: + - list of names for from_pretrained components + """ + return [ + name + for name in self._component_specs.keys() + if self._component_specs[name].default_creation_method == "from_pretrained" + ] + + @property + def config_component_names(self) -> list[str]: + """ + Returns: + - list of names for from_config components + """ + return [ + name + for name in self._component_specs.keys() + if self._component_specs[name].default_creation_method == "from_config" + ] + + @property + def components(self) -> dict[str, Any]: + """ + Returns: + - Dictionary mapping component names to their objects (include both from_pretrained and from_config + components) + """ + # return only components we've actually set as attributes on self + return {name: getattr(self, name) for name in self._component_specs.keys() if hasattr(self, name)} + + def get_component_spec(self, name: str) -> ComponentSpec: + """ + Returns: + - a copy of the ComponentSpec object for the given component name + """ + return deepcopy(self._component_specs[name]) + + def update_components(self, **kwargs): + """ + Update components and configuration values and specs after the pipeline has been instantiated. + + This method allows you to: + 1. Replace existing components with new ones (e.g., updating `self.unet` or `self.text_encoder`) + 2. Update configuration values (e.g., changing `self.requires_safety_checker` flag) + + In addition to updating the components and configuration values as pipeline attributes, the method also + updates: + - the corresponding specs in `_component_specs` and `_config_specs` + - the `config` dict, which will be saved as `modular_model_index.json` during `save_pretrained` + + Args: + **kwargs: Component objects or configuration values to update: + - Component objects: Models loaded with `AutoModel.from_pretrained()` or `ComponentSpec.load()` + are automatically tagged with loading information. ConfigMixin objects without weights (e.g., + schedulers, guiders) can be passed directly. + - Configuration values: Simple values to update configuration settings + (e.g., `requires_safety_checker=False`) + + Examples: + ```python + # Update pre-trained model + pipeline.update_components(unet=new_unet_model, text_encoder=new_text_encoder) + + # Update configuration values + pipeline.update_components(requires_safety_checker=False) + ``` + + Notes: + - Components loaded with `AutoModel.from_pretrained()` or `ComponentSpec.load()` will have + loading specs preserved for serialization. Custom or locally loaded components without Hub references will + have their `modular_model_index.json` entries updated automatically during `save_pretrained()`. + - ConfigMixin objects without weights (e.g., schedulers, guiders) can be passed directly. + """ + + passed_components = {k: kwargs.pop(k) for k in self._component_specs if k in kwargs} + passed_config_values = {k: kwargs.pop(k) for k in self._config_specs if k in kwargs} + + for name, component in passed_components.items(): + current_component_spec = self._component_specs[name] + + # log if type changed + if current_component_spec.type_hint is not None and not isinstance( + component, current_component_spec.type_hint + ): + logger.info( + f"ModularPipeline.update_components: adding {name} with new type: {component.__class__.__name__}, previous type: {current_component_spec.type_hint.__name__}" + ) + # update _component_specs based on the new component + if component is None: + new_component_spec = current_component_spec + if hasattr(self, name) and getattr(self, name) is not None: + logger.warning(f"ModularPipeline.update_components: setting {name} to None (spec unchanged)") + elif ( + current_component_spec.default_creation_method == "from_pretrained" + and getattr(component, "_diffusers_load_id", None) is None + ): + new_component_spec = ComponentSpec(name=name, type_hint=type(component)) + else: + new_component_spec = ComponentSpec.from_component(name, component) + + if new_component_spec.default_creation_method != current_component_spec.default_creation_method: + logger.info( + f"ModularPipeline.update_components: changing the default_creation_method of {name} from {current_component_spec.default_creation_method} to {new_component_spec.default_creation_method}." + ) + + self._component_specs[name] = new_component_spec + + if len(kwargs) > 0: + logger.warning(f"Unexpected keyword arguments, will be ignored: {kwargs.keys()}") + + self.register_components(**passed_components) + + config_to_register = {} + for name, new_value in passed_config_values.items(): + self._config_specs[name].default = new_value + config_to_register[name] = new_value + self.register_to_config(**config_to_register) + + def load_components(self, names: list[str] | str | None = None, **kwargs): + """ + Load selected components from specs. + + Args: + names: list of component names to load. If None, will load all components with + default_creation_method == "from_pretrained". If provided as a list or string, will load only the + specified components. + **kwargs: additional kwargs to be passed to `from_pretrained()`.Can be: + - a single value to be applied to all components to be loaded, e.g. torch_dtype=torch.bfloat16 + - a dict, e.g. torch_dtype={"unet": torch.bfloat16, "default": torch.float32} + - if potentially override ComponentSpec if passed a different loading field in kwargs, e.g. + `pretrained_model_name_or_path`, `variant`, `revision`, etc. + - if potentially override ComponentSpec if passed a different loading field in kwargs, e.g. + `pretrained_model_name_or_path`, `variant`, `revision`, etc. + """ + + if names is None: + names = [ + name + for name in self._component_specs.keys() + if self._component_specs[name].default_creation_method == "from_pretrained" + and self._component_specs[name].pretrained_model_name_or_path is not None + and getattr(self, name, None) is None + ] + elif isinstance(names, str): + names = [names] + elif not isinstance(names, list): + raise ValueError(f"Invalid type for names: {type(names)}") + + components_to_load = {name for name in names if name in self._component_specs} + unknown_names = {name for name in names if name not in self._component_specs} + if len(unknown_names) > 0: + logger.warning(f"Unknown components will be ignored: {unknown_names}") + + components_to_register = {} + for name in components_to_load: + spec = self._component_specs[name] + component_load_kwargs = {} + for key, value in kwargs.items(): + if not isinstance(value, dict): + # if the value is a single value, apply it to all components + component_load_kwargs[key] = value + else: + if name in value: + # if it is a dict, check if the component name is in the dict + component_load_kwargs[key] = value[name] + elif "default" in value: + # check if the default is specified + component_load_kwargs[key] = value["default"] + # Only pass trust_remote_code to components from the same repo as the pipeline. + # When a user passes trust_remote_code=True, they intend to trust code from the + # pipeline's repo, not from external repos referenced in modular_model_index.json. + trust_remote_code_stripped = False + if ( + "trust_remote_code" in component_load_kwargs + and self._pretrained_model_name_or_path is not None + and spec.pretrained_model_name_or_path != self._pretrained_model_name_or_path + ): + component_load_kwargs.pop("trust_remote_code") + trust_remote_code_stripped = True + + if not spec.pretrained_model_name_or_path: + logger.info(f"Skipping component `{name}`: no pretrained model path specified.") + continue + + try: + components_to_register[name] = spec.load(**component_load_kwargs) + except Exception: + tb = traceback.format_exc() + if trust_remote_code_stripped and "trust_remote_code" in tb: + warning_msg = ( + f"Failed to load component `{name}` from external repository " + f"`{spec.pretrained_model_name_or_path}`.\n\n" + f"`trust_remote_code=True` was not forwarded to `{name}` because it comes from " + f"a different repository than the pipeline (`{self._pretrained_model_name_or_path}`). " + f"For safety, `trust_remote_code` is only forwarded to components from the same " + f"repository as the pipeline.\n\n" + f"You need to load this component manually with `trust_remote_code=True` and pass it " + f"to the pipeline via `pipe.update_components()`. For example, if it is a custom model:\n\n" + f' {name} = AutoModel.from_pretrained("{spec.pretrained_model_name_or_path}", trust_remote_code=True)\n' + f" pipe.update_components({name}={name})\n" + ) + else: + warning_msg = ( + f"Failed to create component {name}:\n" + f"- Component spec: {spec}\n" + f"- load() called with kwargs: {component_load_kwargs}\n" + "If this component is not required for your workflow you can safely ignore this message.\n\n" + "Traceback:\n" + f"{tb}" + ) + logger.warning(warning_msg) + + # Register all components at once + self.register_components(**components_to_register) + + # Copied from diffusers.pipelines.pipeline_utils.DiffusionPipeline._maybe_raise_error_if_group_offload_active + def _maybe_raise_error_if_group_offload_active( + self, raise_error: bool = False, module: torch.nn.Module | None = None + ) -> bool: + from ..hooks.group_offloading import _is_group_offload_enabled + + components = self.components.values() if module is None else [module] + components = [component for component in components if isinstance(component, torch.nn.Module)] + for component in components: + if _is_group_offload_enabled(component): + if raise_error: + raise ValueError( + "You are trying to apply model/sequential CPU offloading to a pipeline that contains components " + "with group offloading enabled. This is not supported. Please disable group offloading for " + "components of the pipeline to use other offloading methods." + ) + return True + return False + + # Modified from diffusers.pipelines.pipeline_utils.DiffusionPipeline.to + def to(self, *args, **kwargs) -> Self: + r""" + Performs Pipeline dtype and/or device conversion. A torch.dtype and torch.device are inferred from the + arguments of `self.to(*args, **kwargs).` + + > [!TIP] > If the pipeline already has the correct torch.dtype and torch.device, then it is returned as is. + Otherwise, > the returned pipeline is a copy of self with the desired torch.dtype and torch.device. + + + Here are the ways to call `to`: + + - `to(dtype, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) + - `to(device, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified + [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) + - `to(device=None, dtype=None, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the + specified [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) and + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) + + Arguments: + dtype (`torch.dtype`, *optional*): + Returns a pipeline with the specified + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) + device (`torch.Device`, *optional*): + Returns a pipeline with the specified + [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) + silence_dtype_warnings (`str`, *optional*, defaults to `False`): + Whether to omit warnings if the target `dtype` is not compatible with the target `device`. + + Returns: + [`DiffusionPipeline`]: The pipeline converted to specified `dtype` and/or `dtype`. + """ + from ..pipelines.pipeline_utils import _check_bnb_status + from ..utils import is_accelerate_available, is_accelerate_version, is_hpu_available, is_transformers_version + + dtype = kwargs.pop("dtype", None) + device = kwargs.pop("device", None) + silence_dtype_warnings = kwargs.pop("silence_dtype_warnings", False) + + dtype_arg = None + device_arg = None + if len(args) == 1: + if isinstance(args[0], torch.dtype): + dtype_arg = args[0] + else: + device_arg = torch.device(args[0]) if args[0] is not None else None + elif len(args) == 2: + if isinstance(args[0], torch.dtype): + raise ValueError( + "When passing two arguments, make sure the first corresponds to `device` and the second to `dtype`." + ) + device_arg = torch.device(args[0]) if args[0] is not None else None + dtype_arg = args[1] + elif len(args) > 2: + raise ValueError("Please make sure to pass at most two arguments (`device` and `dtype`) `.to(...)`") + + if dtype is not None and dtype_arg is not None: + raise ValueError( + "You have passed `dtype` both as an argument and as a keyword argument. Please only pass one of the two." + ) + + dtype = dtype or dtype_arg + + if device is not None and device_arg is not None: + raise ValueError( + "You have passed `device` both as an argument and as a keyword argument. Please only pass one of the two." + ) + + device = device or device_arg + device_type = torch.device(device).type if device is not None else None + pipeline_has_bnb = any(any((_check_bnb_status(module))) for _, module in self.components.items()) + + # throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU. + def module_is_sequentially_offloaded(module): + if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): + return False + + _, _, is_loaded_in_8bit_bnb = _check_bnb_status(module) + + if is_loaded_in_8bit_bnb: + return False + + return hasattr(module, "_hf_hook") and ( + isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook) + or hasattr(module._hf_hook, "hooks") + and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook) + ) + + def module_is_offloaded(module): + if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"): + return False + + return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload) + + # .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer + pipeline_is_sequentially_offloaded = any( + module_is_sequentially_offloaded(module) for _, module in self.components.items() + ) + + is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1 + if is_pipeline_device_mapped: + raise ValueError( + "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline." + ) + + if device_type in ["cuda", "xpu"]: + if pipeline_is_sequentially_offloaded and not pipeline_has_bnb: + raise ValueError( + "It seems like you have activated sequential model offloading by calling `enable_sequential_cpu_offload`, but are now attempting to move the pipeline to GPU. This is not compatible with offloading. Please, move your pipeline `.to('cpu')` or consider removing the move altogether if you use sequential offloading." + ) + # PR: https://github.com/huggingface/accelerate/pull/3223/ + elif pipeline_has_bnb and is_accelerate_version("<", "1.1.0.dev0"): + raise ValueError( + "You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation." + ) + + # Display a warning in this case (the operation succeeds but the benefits are lost) + pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items()) + if pipeline_is_offloaded and device_type in ["cuda", "xpu"]: + logger.warning( + f"It seems like you have activated model offloading by calling `enable_model_cpu_offload`, but are now manually moving the pipeline to GPU. It is strongly recommended against doing so as memory gains from offloading are likely to be lost. Offloading automatically takes care of moving the individual components {', '.join(self.components.keys())} to GPU when needed. To make sure offloading works as expected, you should consider moving the pipeline back to CPU: `pipeline.to('cpu')` or removing the move altogether if you use offloading." + ) + + # Enable generic support for Intel Gaudi accelerator using GPU/HPU migration + if device_type == "hpu" and kwargs.pop("hpu_migration", True) and is_hpu_available(): + os.environ["PT_HPU_GPU_MIGRATION"] = "1" + logger.debug("Environment variable set: PT_HPU_GPU_MIGRATION=1") + + import habana_frameworks.torch # noqa: F401 + + # HPU hardware check + if not (hasattr(torch, "hpu") and torch.hpu.is_available()): + raise ValueError("You are trying to call `.to('hpu')` but HPU device is unavailable.") + + os.environ["PT_HPU_MAX_COMPOUND_OP_SIZE"] = "1" + logger.debug("Environment variable set: PT_HPU_MAX_COMPOUND_OP_SIZE=1") + + modules = self.components.values() + modules = [m for m in modules if isinstance(m, torch.nn.Module)] + + is_offloaded = pipeline_is_offloaded or pipeline_is_sequentially_offloaded + for module in modules: + _, is_loaded_in_4bit_bnb, is_loaded_in_8bit_bnb = _check_bnb_status(module) + is_group_offloaded = self._maybe_raise_error_if_group_offload_active(module=module) + + if (is_loaded_in_4bit_bnb or is_loaded_in_8bit_bnb) and dtype is not None: + logger.warning( + f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` {'4bit' if is_loaded_in_4bit_bnb else '8bit'} and conversion to {dtype} is not supported. Module is still in {'4bit' if is_loaded_in_4bit_bnb else '8bit'} precision." + ) + + if is_loaded_in_8bit_bnb and device is not None: + logger.warning( + f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` 8bit and moving it to {device} via `.to()` is not supported. Module is still on {module.device}." + ) + + # Note: we also handle this at the ModelMixin level. The reason for doing it here too is that modeling + # components can be from outside diffusers too, but still have group offloading enabled. + if ( + self._maybe_raise_error_if_group_offload_active(raise_error=False, module=module) + and device is not None + ): + logger.warning( + f"The module '{module.__class__.__name__}' is group offloaded and moving it to {device} via `.to()` is not supported." + ) + + # This can happen for `transformer` models. CPU placement was added in + # https://github.com/huggingface/transformers/pull/33122. So, we guard this accordingly. + if is_loaded_in_4bit_bnb and device is not None and is_transformers_version(">", "4.44.0"): + module.to(device=device) + elif not is_loaded_in_4bit_bnb and not is_loaded_in_8bit_bnb and not is_group_offloaded: + module.to(device, dtype) + + if ( + module.dtype == torch.float16 + and str(device) in ["cpu"] + and not silence_dtype_warnings + and not is_offloaded + ): + logger.warning( + "Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It" + " is not recommended to move them to `cpu` as running them will fail. Please make" + " sure to use an accelerator to run the pipeline in inference, due to the lack of" + " support for`float16` operations on this device in PyTorch. Please, remove the" + " `torch_dtype=torch.float16` argument, or use another device for inference." + ) + return self + + @staticmethod + def _component_spec_to_dict(component_spec: ComponentSpec) -> Any: + """ + Convert a ComponentSpec into a JSON‐serializable dict for saving as an entry in `modular_model_index.json`. If + the `default_creation_method` is not `from_pretrained`, return None. + + This dict contains: + - "type_hint": tuple[str, str] + Library name and class name of the component. (e.g. ("diffusers", "UNet2DConditionModel")) + - All loading fields defined by `component_spec.loading_fields()`, typically: + - "pretrained_model_name_or_path": str | None + The model pretrained_model_name_or_pathsitory (e.g., "stabilityai/stable-diffusion-xl"). + - "subfolder": str | None + A subfolder within the pretrained_model_name_or_path where this component lives. + - "variant": str | None + An optional variant identifier for the model. + - "revision": str | None + A specific git revision (commit hash, tag, or branch). + - ... any other loading fields defined on the spec. + + Args: + component_spec (ComponentSpec): + The spec object describing one pipeline component. + + Returns: + dict[str, Any]: A mapping suitable for JSON serialization. + + Example: + >>> from diffusers.pipelines.modular_pipeline_utils import ComponentSpec >>> from diffusers import + UNet2DConditionModel >>> spec = ComponentSpec( + ... name="unet", ... type_hint=UNet2DConditionModel, ... config=None, ... + pretrained_model_name_or_path="path/to/pretrained_model_name_or_path", ... subfolder="subfolder", ... + variant=None, ... revision=None, ... default_creation_method="from_pretrained", + ... ) >>> ModularPipeline._component_spec_to_dict(spec) { + "type_hint": ("diffusers", "UNet2DConditionModel"), "pretrained_model_name_or_path": "path/to/repo", + "subfolder": "subfolder", "variant": None, "revision": None, "type_hint": ("diffusers", + "UNet2DConditionModel"), "pretrained_model_name_or_path": "path/to/repo", "subfolder": "subfolder", + "variant": None, "revision": None, + } + """ + if component_spec.default_creation_method != "from_pretrained": + return None + + if component_spec.type_hint is not None: + lib_name, cls_name = _fetch_class_library_tuple(component_spec.type_hint) + else: + lib_name = None + cls_name = None + load_spec_dict = {k: getattr(component_spec, k) for k in component_spec.loading_fields()} + return { + "type_hint": (lib_name, cls_name), + **load_spec_dict, + } + + @staticmethod + def _dict_to_component_spec(name: str, spec_dict: dict[str, Any]) -> ComponentSpec: + """ + Reconstruct a ComponentSpec from a loading specdict. + + This method converts a dictionary representation back into a ComponentSpec object. The dict should contain: + - "type_hint": tuple[str, str] + Library name and class name of the component. (e.g. ("diffusers", "UNet2DConditionModel")) + - All loading fields defined by `component_spec.loading_fields()`, typically: + - "pretrained_model_name_or_path": str | None + The model repository (e.g., "stabilityai/stable-diffusion-xl"). + - "subfolder": str | None + A subfolder within the pretrained_model_name_or_path where this component lives. + - "variant": str | None + An optional variant identifier for the model. + - "revision": str | None + A specific git revision (commit hash, tag, or branch). + - ... any other loading fields defined on the spec. + + Args: + name (str): + The name of the component. + specdict (dict[str, Any]): + A dictionary containing the component specification data. + + Returns: + ComponentSpec: A reconstructed ComponentSpec object. + + Example: + >>> spec_dict = { ... "type_hint": ("diffusers", "UNet2DConditionModel"), ... + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl", ... "subfolder": "unet", ... "variant": + None, ... "revision": None, ... } >>> ModularPipeline._dict_to_component_spec("unet", spec_dict) + ComponentSpec( + name="unet", type_hint=UNet2DConditionModel, config=None, + pretrained_model_name_or_path="stabilityai/stable-diffusion-xl", subfolder="unet", variant=None, + revision=None, default_creation_method="from_pretrained" + >>> spec_dict = { ... "type_hint": ("diffusers", "UNet2DConditionModel"), ... + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl", ... "subfolder": "unet", ... "variant": + None, ... "revision": None, ... } >>> ModularPipeline._dict_to_component_spec("unet", spec_dict) + ComponentSpec( + name="unet", type_hint=UNet2DConditionModel, config=None, + pretrained_model_name_or_path="stabilityai/stable-diffusion-xl", subfolder="unet", variant=None, + revision=None, default_creation_method="from_pretrained" + ) + """ + # make a shallow copy so we can pop() safely + spec_dict = spec_dict.copy() + # pull out and resolve the stored type_hint + lib_name, cls_name = spec_dict.pop("type_hint") + if lib_name is not None and cls_name is not None: + type_hint = simple_get_class_obj(lib_name, cls_name) + else: + type_hint = None + + # re‐assemble the ComponentSpec + return ComponentSpec( + name=name, + type_hint=type_hint, + **spec_dict, + ) + + def set_progress_bar_config(self, **kwargs): + for sub_block_name, sub_block in self._blocks.sub_blocks.items(): + if hasattr(sub_block, "set_progress_bar_config"): + sub_block.set_progress_bar_config(**kwargs) + + def __call__(self, state: PipelineState = None, output: str | list[str] = None, **kwargs): + """ + Execute the pipeline by running the pipeline blocks with the given inputs. + + Args: + state (`PipelineState`, optional): + PipelineState instance contains inputs and intermediate values. If None, a new `PipelineState` will be + created based on the user inputs and the pipeline blocks's requirement. + output (`str` or `list[str]`, optional): + Optional specification of what to return: + - None: Returns the complete `PipelineState` with all inputs and intermediates (default) + - str: Returns a specific intermediate value from the state (e.g. `output="image"`) + - list[str]: Returns a dictionary of specific intermediate values (e.g. `output=["image", + "latents"]`) + + + Examples: + ```python + # Get complete pipeline state + state = pipeline(prompt="A beautiful sunset", num_inference_steps=20) + print(state.intermediates) # All intermediate outputs + + # Get specific output + image = pipeline(prompt="A beautiful sunset", output="image") + + # Get multiple specific outputs + results = pipeline(prompt="A beautiful sunset", output=["image", "latents"]) + image, latents = results["image"], results["latents"] + + # Continue from previous state + state = pipeline(prompt="A beautiful sunset") + new_state = pipeline(state=state, output="image") # Continue processing + ``` + + Returns: + - If `output` is None: Complete `PipelineState` containing all inputs and intermediates + - If `output` is str: The specific intermediate value from the state (e.g. `output="image"`) + - If `output` is list[str]: Dictionary mapping output names to their values from the state (e.g. + `output=["image", "latents"]`) + """ + if state is None: + state = PipelineState() + else: + state = deepcopy(state) + + # Make a copy of the input kwargs + passed_kwargs = kwargs.copy() + + # Add inputs to state, using defaults if not provided in the kwargs or the state + # if same input already in the state, will override it if provided in the kwargs + for expected_input_param in self._blocks.inputs: + name = expected_input_param.name + default = expected_input_param.default + kwargs_type = expected_input_param.kwargs_type + if name in passed_kwargs: + state.set(name, passed_kwargs.pop(name), kwargs_type) + elif kwargs_type is not None and kwargs_type in passed_kwargs: + kwargs_dict = passed_kwargs.pop(kwargs_type) + for k, v in kwargs_dict.items(): + state.set(k, v, kwargs_type) + elif name is not None and name not in state.values: + state.set(name, default, kwargs_type) + + # Warn about unexpected inputs + if len(passed_kwargs) > 0: + warnings.warn(f"Unexpected input '{passed_kwargs.keys()}' provided. This input will be ignored.") + # Run the pipeline + with torch.no_grad(): + try: + _, state = self._blocks(self, state) + except Exception: + error_msg = f"Error in block: ({self._blocks.__class__.__name__}):\n" + logger.error(error_msg) + raise + + if output is None: + return state + + if isinstance(output, str): + return state.get(output) + + elif isinstance(output, (list, tuple)): + return state.get(output) + else: + raise ValueError(f"Output '{output}' is not a valid output type") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/modular_pipeline_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/modular_pipeline_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fa82f17a9108e66036a64275336c1afbbac81a8f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/modular_pipelines/modular_pipeline_utils.py @@ -0,0 +1,1359 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import re +import warnings +from collections import OrderedDict +from dataclasses import dataclass, field +from types import UnionType +from typing import Any, Literal, Type, Union, get_args, get_origin + +import PIL.Image +import torch +from packaging.specifiers import InvalidSpecifier, SpecifierSet + +from ..configuration_utils import ConfigMixin, FrozenDict +from ..loaders.single_file_utils import _is_single_file_path_or_url +from ..utils import DIFFUSERS_LOAD_ID_FIELDS, is_torch_available, logging +from ..utils.import_utils import _is_package_available + + +if is_torch_available(): + pass + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + +# Template for modular pipeline model card description with placeholders +MODULAR_MODEL_CARD_TEMPLATE = """{model_description} + +## Example Usage + +[TODO] + +## Pipeline Architecture + +This modular pipeline is composed of the following blocks: + +{blocks_description} {trigger_inputs_section} + +## Model Components + +{components_description} {configs_section} + +{io_specification_section} +""" + + +class InsertableDict(OrderedDict): + def insert(self, key, value, index): + items = list(self.items()) + + # Remove key if it already exists to avoid duplicates + items = [(k, v) for k, v in items if k != key] + + # Insert at the specified index + items.insert(index, (key, value)) + + # Clear and update self + self.clear() + self.update(items) + + # Return self for method chaining + return self + + def __repr__(self): + if not self: + return "InsertableDict()" + + items = [] + for i, (key, value) in enumerate(self.items()): + if isinstance(value, type): + # For classes, show class name and + obj_repr = f"" + else: + # For objects (instances) and other types, show class name and module + obj_repr = f"" + items.append(f"{i}: ({repr(key)}, {obj_repr})") + + return "InsertableDict([\n " + ",\n ".join(items) + "\n])" + + +# YiYi TODO: +# 1. validate the dataclass fields +# 2. improve the docstring and potentially add a validator for load methods, make sure they are valid inputs to pass to from_pretrained() +@dataclass +class ComponentSpec: + """Specification for a pipeline component. + + A component can be created in two ways: + 1. From scratch using __init__ with a config dict + 2. using `from_pretrained` + + Attributes: + name: Name of the component + type_hint: Type of the component (e.g. UNet2DConditionModel) + description: Optional description of the component + config: Optional config dict for __init__ creation + pretrained_model_name_or_path: Optional pretrained_model_name_or_path path for from_pretrained creation + subfolder: Optional subfolder in pretrained_model_name_or_path + variant: Optional variant in pretrained_model_name_or_path + revision: Optional revision in pretrained_model_name_or_path + default_creation_method: Preferred creation method - "from_config" or "from_pretrained" + """ + + name: str | None = None + type_hint: Type | None = None + description: str | None = None + config: FrozenDict | None = None + pretrained_model_name_or_path: str | list[str] | None = field(default=None, metadata={"loading": True}) + subfolder: str | None = field(default="", metadata={"loading": True}) + variant: str | None = field(default=None, metadata={"loading": True}) + revision: str | None = field(default=None, metadata={"loading": True}) + default_creation_method: Literal["from_config", "from_pretrained"] = "from_pretrained" + + # Deprecated + repo: str | list[str] | None = field(default=None, metadata={"loading": False}) + + def __post_init__(self): + repo_value = self.repo + if repo_value is not None and self.pretrained_model_name_or_path is None: + object.__setattr__(self, "pretrained_model_name_or_path", repo_value) + + def __hash__(self): + """Make ComponentSpec hashable, using load_id as the hash value.""" + return hash((self.name, self.load_id, self.default_creation_method)) + + def __eq__(self, other): + """Compare ComponentSpec objects based on name and load_id.""" + if not isinstance(other, ComponentSpec): + return False + return ( + self.name == other.name + and self.load_id == other.load_id + and self.default_creation_method == other.default_creation_method + ) + + @classmethod + def from_component(cls, name: str, component: Any) -> Any: + """Create a ComponentSpec from a Component. + + Currently supports: + - Components created with `ComponentSpec.load()` method + - Components that are ConfigMixin subclasses but not nn.Modules (e.g. schedulers, guiders) + + Args: + name: Name of the component + component: Component object to create spec from + + Returns: + ComponentSpec object + + Raises: + ValueError: If component is not supported (e.g. nn.Module without load_id, non-ConfigMixin) + """ + + # Check if component was created with ComponentSpec.load() + if hasattr(component, "_diffusers_load_id") and component._diffusers_load_id != "null": + # component has a usable load_id -> from_pretrained, no warning needed + default_creation_method = "from_pretrained" + else: + # Component doesn't have a usable load_id, check if it's a nn.Module + if isinstance(component, torch.nn.Module): + raise ValueError( + "Cannot create ComponentSpec from a nn.Module that was not created with `ComponentSpec.load()` method." + ) + # ConfigMixin objects without weights (e.g. scheduler & guider) can be recreated with from_config + elif isinstance(component, ConfigMixin): + # warn if component was not created with `ComponentSpec` + if not hasattr(component, "_diffusers_load_id"): + logger.warning( + "Component was not created using `ComponentSpec`, defaulting to `from_config` creation method" + ) + default_creation_method = "from_config" + else: + # Not a ConfigMixin and not created with `ComponentSpec.load()` method -> throw error + raise ValueError( + f"Cannot create ComponentSpec from {name}({component.__class__.__name__}). Currently ComponentSpec.from_component() only supports: " + f" - components created with `ComponentSpec.load()` method" + f" - components that are a subclass of ConfigMixin but not a nn.Module (e.g. guider, scheduler)." + ) + + type_hint = component.__class__ + + if isinstance(component, ConfigMixin) and default_creation_method == "from_config": + config = component.config + else: + config = None + if hasattr(component, "_diffusers_load_id") and component._diffusers_load_id != "null": + load_spec = cls.decode_load_id(component._diffusers_load_id) + else: + load_spec = {} + + return cls( + name=name, type_hint=type_hint, config=config, default_creation_method=default_creation_method, **load_spec + ) + + @classmethod + def loading_fields(cls) -> list[str]: + """ + Return the names of all loading‐related fields (i.e. those whose field.metadata["loading"] is True). + """ + return DIFFUSERS_LOAD_ID_FIELDS.copy() + + @property + def load_id(self) -> str: + """ + Unique identifier for this spec's pretrained load, composed of + pretrained_model_name_or_path|subfolder|variant|revision (no empty segments). + """ + if self.default_creation_method == "from_config": + return "null" + parts = [getattr(self, k) for k in self.loading_fields()] + parts = ["null" if p is None else p for p in parts] + return "|".join(parts) + + @classmethod + def decode_load_id(cls, load_id: str) -> dict[str, str | None]: + """ + Decode a load_id string back into a dictionary of loading fields and values. + + Args: + load_id: The load_id string to decode, format: "pretrained_model_name_or_path|subfolder|variant|revision" + where None values are represented as "null" + + Returns: + Dict mapping loading field names to their values. e.g. { + "pretrained_model_name_or_path": "path/to/repo", "subfolder": "subfolder", "variant": "variant", + "revision": "revision" + } If a segment value is "null", it's replaced with None. Returns None if load_id is "null" (indicating + component not created with `load` method). + """ + + # Get all loading fields in order + loading_fields = cls.loading_fields() + result = dict.fromkeys(loading_fields) + + if load_id == "null": + return result + + # Split the load_id + parts = load_id.split("|") + + # Map parts to loading fields by position + for i, part in enumerate(parts): + if i < len(loading_fields): + # Convert "null" string back to None + result[loading_fields[i]] = None if part == "null" else part + + return result + + # YiYi TODO: I think we should only support ConfigMixin for this method (after we make guider and image_processors config mixin) + # otherwise we cannot do spec -> spec.create() -> component -> ComponentSpec.from_component(component) + # the config info is lost in the process + # remove error check in from_component spec and ModularPipeline.update_components() if we remove support for non configmixin in `create()` method + def create(self, config: FrozenDict | dict[str, Any] | None = None, **kwargs) -> Any: + """Create component using from_config with config.""" + + if self.type_hint is None or not isinstance(self.type_hint, type): + raise ValueError("`type_hint` is required when using from_config creation method.") + + config = config or self.config or {} + + if issubclass(self.type_hint, ConfigMixin): + component = self.type_hint.from_config(config, **kwargs) + else: + signature_params = inspect.signature(self.type_hint.__init__).parameters + init_kwargs = {} + for k, v in config.items(): + if k in signature_params: + init_kwargs[k] = v + for k, v in kwargs.items(): + if k in signature_params: + init_kwargs[k] = v + component = self.type_hint(**init_kwargs) + + component._diffusers_load_id = "null" + if hasattr(component, "config"): + self.config = component.config + + return component + + # YiYi TODO: add guard for type of model, if it is supported by from_pretrained + def load(self, **kwargs) -> Any: + """Load component using from_pretrained.""" + # select loading fields from kwargs passed from user: e.g. pretrained_model_name_or_path, subfolder, variant, revision, note the list could change + passed_loading_kwargs = {key: kwargs.pop(key) for key in self.loading_fields() if key in kwargs} + # merge loading field value in the spec with user passed values to create load_kwargs + load_kwargs = {key: passed_loading_kwargs.get(key, getattr(self, key)) for key in self.loading_fields()} + + pretrained_model_name_or_path = load_kwargs.pop("pretrained_model_name_or_path", None) + if pretrained_model_name_or_path is None: + raise ValueError( + "`pretrained_model_name_or_path` info is required when using `load` method (you can directly set it in `pretrained_model_name_or_path` field of the ComponentSpec or pass it as an argument)" + ) + is_single_file = _is_single_file_path_or_url(pretrained_model_name_or_path) + if is_single_file and self.type_hint is None: + raise ValueError( + f"`type_hint` is required when loading a single file model but is missing for component: {self.name}" + ) + + # `torch_dtype` is not an accepted parameter for tokenizers and processors. + # As a result, it gets stored in `init_kwargs`, which are written to the config + # during save. This causes JSON serialization to fail when saving the component. + if self.type_hint is not None and not issubclass(self.type_hint, torch.nn.Module): + kwargs.pop("torch_dtype", None) + + if self.type_hint is None: + try: + from diffusers import AutoModel + + component = AutoModel.from_pretrained(pretrained_model_name_or_path, **load_kwargs, **kwargs) + except Exception as e: + raise ValueError(f"Unable to load {self.name} without `type_hint`: {e}") + # update type_hint if AutoModel load successfully + self.type_hint = component.__class__ + else: + # determine load method + load_method = ( + getattr(self.type_hint, "from_single_file") + if is_single_file + else getattr(self.type_hint, "from_pretrained") + ) + + # `torch_dtype` is not an accepted parameter for tokenizers and processors. + # As a result, it gets stored in `init_kwargs`, which are written to the config + # during save. This causes JSON serialization to fail when saving the component. + if not issubclass(self.type_hint, torch.nn.Module): + kwargs.pop("torch_dtype", None) + + try: + component = load_method(pretrained_model_name_or_path, **load_kwargs, **kwargs) + except Exception as e: + raise ValueError(f"Unable to load {self.name} using load method: {e}") + + self.pretrained_model_name_or_path = pretrained_model_name_or_path + for k, v in load_kwargs.items(): + setattr(self, k, v) + component._diffusers_load_id = self.load_id + + return component + + +@dataclass +class ConfigSpec: + """Specification for a pipeline configuration parameter.""" + + name: str + default: Any + description: str | None = None + + +# ====================================================== +# InputParam and OutputParam templates +# ====================================================== + +INPUT_PARAM_TEMPLATES = { + "prompt": { + "type_hint": str, + "required": True, + "description": "The prompt or prompts to guide image generation.", + }, + "negative_prompt": { + "type_hint": str, + "description": "The prompt or prompts not to guide the image generation.", + }, + "max_sequence_length": { + "type_hint": int, + "default": 512, + "description": "Maximum sequence length for prompt encoding.", + }, + "height": { + "type_hint": int, + "description": "The height in pixels of the generated image.", + }, + "width": { + "type_hint": int, + "description": "The width in pixels of the generated image.", + }, + "num_inference_steps": { + "type_hint": int, + "default": 50, + "description": "The number of denoising steps.", + }, + "num_images_per_prompt": { + "type_hint": int, + "default": 1, + "description": "The number of images to generate per prompt.", + }, + "generator": { + "type_hint": torch.Generator, + "description": "Torch generator for deterministic generation.", + }, + "sigmas": { + "type_hint": list[float], + "description": "Custom sigmas for the denoising process.", + }, + "strength": { + "type_hint": float, + "default": 0.9, + "description": "Strength for img2img/inpainting.", + }, + "image": { + "type_hint": PIL.Image.Image | list[PIL.Image.Image], + "required": True, + "description": "Reference image(s) for denoising. Can be a single image or list of images.", + }, + "latents": { + "type_hint": torch.Tensor, + "description": "Pre-generated noisy latents for image generation.", + }, + "timesteps": { + "type_hint": torch.Tensor, + "description": "Timesteps for the denoising process.", + }, + "output_type": { + "type_hint": str, + "default": "pil", + "description": "Output format: 'pil', 'np', 'pt'.", + }, + "attention_kwargs": { + "type_hint": dict[str, Any], + "description": "Additional kwargs for attention processors.", + }, + "denoiser_input_fields": { + "name": None, + "kwargs_type": "denoiser_input_fields", + "description": "conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.", + }, + # inpainting + "mask_image": { + "type_hint": PIL.Image.Image, + "required": True, + "description": "Mask image for inpainting.", + }, + "padding_mask_crop": { + "type_hint": int, + "description": "Padding for mask cropping in inpainting.", + }, + # controlnet + "control_image": { + "type_hint": PIL.Image.Image, + "required": True, + "description": "Control image for ControlNet conditioning.", + }, + "control_guidance_start": { + "type_hint": float, + "default": 0.0, + "description": "When to start applying ControlNet.", + }, + "control_guidance_end": { + "type_hint": float, + "default": 1.0, + "description": "When to stop applying ControlNet.", + }, + "controlnet_conditioning_scale": { + "type_hint": float, + "default": 1.0, + "description": "Scale for ControlNet conditioning.", + }, + "layers": { + "type_hint": int, + "default": 4, + "description": "Number of layers to extract from the image", + }, + # common intermediate inputs + "prompt_embeds": { + "type_hint": torch.Tensor, + "required": True, + "description": "text embeddings used to guide the image generation. Can be generated from text_encoder step.", + }, + "prompt_embeds_mask": { + "type_hint": torch.Tensor, + "required": True, + "description": "mask for the text embeddings. Can be generated from text_encoder step.", + }, + "negative_prompt_embeds": { + "type_hint": torch.Tensor, + "description": "negative text embeddings used to guide the image generation. Can be generated from text_encoder step.", + }, + "negative_prompt_embeds_mask": { + "type_hint": torch.Tensor, + "description": "mask for the negative text embeddings. Can be generated from text_encoder step.", + }, + "image_latents": { + "type_hint": torch.Tensor, + "required": True, + "description": "image latents used to guide the image generation. Can be generated from vae_encoder step.", + }, + "batch_size": { + "type_hint": int, + "default": 1, + "description": "Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be generated in input step.", + }, + "dtype": { + "type_hint": torch.dtype, + "default": torch.float32, + "description": "The dtype of the model inputs, can be generated in input step.", + }, +} + +OUTPUT_PARAM_TEMPLATES = { + "images": { + "type_hint": list[PIL.Image.Image], + "description": "Generated images.", + }, + "videos": { + "type_hint": list[PIL.Image.Image], + "description": "The generated videos.", + }, + "latents": { + "type_hint": torch.Tensor, + "description": "Denoised latents.", + }, + # intermediate outputs + "prompt_embeds": { + "type_hint": torch.Tensor, + "kwargs_type": "denoiser_input_fields", + "description": "The prompt embeddings.", + }, + "prompt_embeds_mask": { + "type_hint": torch.Tensor, + "kwargs_type": "denoiser_input_fields", + "description": "The encoder attention mask.", + }, + "negative_prompt_embeds": { + "type_hint": torch.Tensor, + "kwargs_type": "denoiser_input_fields", + "description": "The negative prompt embeddings.", + }, + "negative_prompt_embeds_mask": { + "type_hint": torch.Tensor, + "kwargs_type": "denoiser_input_fields", + "description": "The negative prompt embeddings mask.", + }, + "image_latents": { + "type_hint": torch.Tensor, + "description": "The latent representation of the input image.", + }, +} + + +@dataclass +class InputParam: + """Specification for an input parameter.""" + + name: str = None + type_hint: Any = None + default: Any = None + required: bool = False + description: str = "" + kwargs_type: str = None + metadata: dict[str, Any] = None + + def __repr__(self): + return f"<{self.name}: {'required' if self.required else 'optional'}, default={self.default}>" + + @classmethod + def template(cls, template_name: str, note: str = None, **overrides) -> "InputParam": + """Get template for name if exists, otherwise raise ValueError.""" + if template_name not in INPUT_PARAM_TEMPLATES: + raise ValueError(f"InputParam template for {template_name} not found") + + template_kwargs = INPUT_PARAM_TEMPLATES[template_name].copy() + + # Determine the actual param name: + # 1. From overrides if provided + # 2. From template if present + # 3. Fall back to template_name + name = overrides.pop("name", template_kwargs.pop("name", template_name)) + + if note and "description" in template_kwargs: + template_kwargs["description"] = f"{template_kwargs['description']} ({note})" + + template_kwargs.update(overrides) + return cls(name=name, **template_kwargs) + + +@dataclass +class OutputParam: + """Specification for an output parameter.""" + + name: str + type_hint: Any = None + description: str = "" + kwargs_type: str = None + metadata: dict[str, Any] = None + + def __repr__(self): + return ( + f"<{self.name}: {self.type_hint.__name__ if hasattr(self.type_hint, '__name__') else str(self.type_hint)}>" + ) + + @classmethod + def template(cls, template_name: str, note: str = None, **overrides) -> "OutputParam": + """Get template for name if exists, otherwise raise ValueError.""" + if template_name not in OUTPUT_PARAM_TEMPLATES: + raise ValueError(f"OutputParam template for {template_name} not found") + + template_kwargs = OUTPUT_PARAM_TEMPLATES[template_name].copy() + + # Determine the actual param name: + # 1. From overrides if provided + # 2. From template if present + # 3. Fall back to template_name + name = overrides.pop("name", template_kwargs.pop("name", template_name)) + + if note and "description" in template_kwargs: + template_kwargs["description"] = f"{template_kwargs['description']} ({note})" + + template_kwargs.update(overrides) + return cls(name=name, **template_kwargs) + + +def format_inputs_short(inputs): + """ + Format input parameters into a string representation, with required params first followed by optional ones. + + Args: + inputs: list of input parameters with 'required' and 'name' attributes, and 'default' for optional params + + Returns: + str: Formatted string of input parameters + + Example: + >>> inputs = [ ... InputParam(name="prompt", required=True), ... InputParam(name="image", required=True), ... + InputParam(name="guidance_scale", required=False, default=7.5), ... InputParam(name="num_inference_steps", + required=False, default=50) ... ] >>> format_inputs_short(inputs) 'prompt, image, guidance_scale=7.5, + num_inference_steps=50' + """ + required_inputs = [param for param in inputs if param.required] + optional_inputs = [param for param in inputs if not param.required] + + required_str = ", ".join(param.name for param in required_inputs) + optional_str = ", ".join(f"{param.name}={param.default}" for param in optional_inputs) + + inputs_str = required_str + if optional_str: + inputs_str = f"{inputs_str}, {optional_str}" if required_str else optional_str + + return inputs_str + + +def format_intermediates_short(intermediate_inputs, required_intermediate_inputs, intermediate_outputs): + """ + Formats intermediate inputs and outputs of a block into a string representation. + + Args: + intermediate_inputs: list of intermediate input parameters + required_intermediate_inputs: list of required intermediate input names + intermediate_outputs: list of intermediate output parameters + + Returns: + str: Formatted string like: + Intermediates: + - inputs: Required(latents), dtype + - modified: latents # variables that appear in both inputs and outputs + - outputs: images # new outputs only + """ + # Handle inputs + input_parts = [] + for inp in intermediate_inputs: + if inp.name in required_intermediate_inputs: + input_parts.append(f"Required({inp.name})") + else: + if inp.name is None and inp.kwargs_type is not None: + inp_name = "*_" + inp.kwargs_type + else: + inp_name = inp.name + input_parts.append(inp_name) + + # Handle modified variables (appear in both inputs and outputs) + inputs_set = {inp.name for inp in intermediate_inputs} + modified_parts = [] + new_output_parts = [] + + for out in intermediate_outputs: + if out.name in inputs_set: + modified_parts.append(out.name) + else: + new_output_parts.append(out.name) + + result = [] + if input_parts: + result.append(f" - inputs: {', '.join(input_parts)}") + if modified_parts: + result.append(f" - modified: {', '.join(modified_parts)}") + if new_output_parts: + result.append(f" - outputs: {', '.join(new_output_parts)}") + + return "\n".join(result) if result else " (none)" + + +def format_params(params, header="Args", indent_level=4, max_line_length=115): + """Format a list of InputParam or OutputParam objects into a readable string representation. + + Args: + params: list of InputParam or OutputParam objects to format + header: Header text to use (e.g. "Args" or "Returns") + indent_level: Number of spaces to indent each parameter line (default: 4) + max_line_length: Maximum length for each line before wrapping (default: 115) + + Returns: + A formatted string representing all parameters + """ + if not params: + return "" + + base_indent = " " * indent_level + param_indent = " " * (indent_level + 4) + desc_indent = " " * (indent_level + 8) + formatted_params = [] + + def get_type_str(type_hint): + if isinstance(type_hint, UnionType) or get_origin(type_hint) is Union: + type_strs = [t.__name__ if hasattr(t, "__name__") else str(t) for t in get_args(type_hint)] + return " | ".join(type_strs) + return type_hint.__name__ if hasattr(type_hint, "__name__") else str(type_hint) + + def wrap_text(text, indent, max_length): + """Wrap text while preserving markdown links and maintaining indentation.""" + words = text.split() + lines = [] + current_line = [] + current_length = 0 + + for word in words: + word_length = len(word) + (1 if current_line else 0) + + if current_line and current_length + word_length > max_length: + lines.append(" ".join(current_line)) + current_line = [word] + current_length = len(word) + else: + current_line.append(word) + current_length += word_length + + if current_line: + lines.append(" ".join(current_line)) + + return f"\n{indent}".join(lines) + + # Add the header + formatted_params.append(f"{base_indent}{header}:") + + for param in params: + # Format parameter name and type + type_str = get_type_str(param.type_hint) if param.type_hint != Any else "" + # YiYi Notes: remove this line if we remove kwargs_type + name = f"**{param.kwargs_type}" if param.name is None and param.kwargs_type is not None else param.name + param_str = f"{param_indent}{name} (`{type_str}`" + + # Add optional tag and default value if parameter is an InputParam and optional + if hasattr(param, "required"): + if not param.required: + param_str += ", *optional*" + if param.default is not None: + param_str += f", defaults to {param.default}" + param_str += "):" + + # Add description on a new line with additional indentation and wrapping + if param.description: + desc = re.sub(r"\[(.*?)\]\((https?://[^\s\)]+)\)", r"[\1](\2)", param.description) + wrapped_desc = wrap_text(desc, desc_indent, max_line_length) + param_str += f"\n{desc_indent}{wrapped_desc}" + else: + param_str += f"\n{desc_indent}TODO: Add description." + + formatted_params.append(param_str) + + return "\n".join(formatted_params) + + +def format_input_params(input_params, indent_level=4, max_line_length=115): + """Format a list of InputParam objects into a readable string representation. + + Args: + input_params: list of InputParam objects to format + indent_level: Number of spaces to indent each parameter line (default: 4) + max_line_length: Maximum length for each line before wrapping (default: 115) + + Returns: + A formatted string representing all input parameters + """ + return format_params(input_params, "Inputs", indent_level, max_line_length) + + +def format_output_params(output_params, indent_level=4, max_line_length=115): + """Format a list of OutputParam objects into a readable string representation. + + Args: + output_params: list of OutputParam objects to format + indent_level: Number of spaces to indent each parameter line (default: 4) + max_line_length: Maximum length for each line before wrapping (default: 115) + + Returns: + A formatted string representing all output parameters + """ + return format_params(output_params, "Outputs", indent_level, max_line_length) + + +def format_params_markdown(params, header="Inputs"): + """Format a list of InputParam or OutputParam objects as a markdown bullet-point list. + + Suitable for model cards rendered on Hugging Face Hub. + + Args: + params: list of InputParam or OutputParam objects to format + header: Header text (e.g. "Inputs" or "Outputs") + + Returns: + A formatted markdown string, or empty string if params is empty. + """ + if not params: + return "" + + def get_type_str(type_hint): + if isinstance(type_hint, UnionType) or get_origin(type_hint) is Union: + type_strs = [t.__name__ if hasattr(t, "__name__") else str(t) for t in get_args(type_hint)] + return " | ".join(type_strs) + return type_hint.__name__ if hasattr(type_hint, "__name__") else str(type_hint) + + lines = [f"**{header}:**\n"] if header else [] + for param in params: + type_str = get_type_str(param.type_hint) if param.type_hint != Any else "" + name = f"**{param.kwargs_type}" if param.name is None and param.kwargs_type is not None else param.name + param_str = f"- `{name}` (`{type_str}`" + + if hasattr(param, "required") and not param.required: + param_str += ", *optional*" + if param.default is not None: + param_str += f", defaults to `{param.default}`" + param_str += ")" + + desc = param.description if param.description else "No description provided" + param_str += f": {desc}" + lines.append(param_str) + + return "\n".join(lines) + + +def format_components(components, indent_level=4, max_line_length=115, add_empty_lines=True): + """Format a list of ComponentSpec objects into a readable string representation. + + Args: + components: list of ComponentSpec objects to format + indent_level: Number of spaces to indent each component line (default: 4) + max_line_length: Maximum length for each line before wrapping (default: 115) + add_empty_lines: Whether to add empty lines between components (default: True) + + Returns: + A formatted string representing all components + """ + if not components: + return "" + + base_indent = " " * indent_level + component_indent = " " * (indent_level + 4) + formatted_components = [] + + # Add the header + formatted_components.append(f"{base_indent}Components:") + if add_empty_lines: + formatted_components.append("") + + # Add each component with optional empty lines between them + for i, component in enumerate(components): + # Get type name, handling special cases + type_name = ( + component.type_hint.__name__ if hasattr(component.type_hint, "__name__") else str(component.type_hint) + ) + + component_desc = f"{component_indent}{component.name} (`{type_name}`)" + if component.description: + component_desc += f": {component.description}" + + # Get the loading fields dynamically + loading_field_values = [] + for field_name in component.loading_fields(): + field_value = getattr(component, field_name) + if field_value: + loading_field_values.append(f"{field_name}={field_value}") + + # Add loading field information if available + if loading_field_values: + component_desc += f" [{', '.join(loading_field_values)}]" + + formatted_components.append(component_desc) + + # Add an empty line after each component except the last one + if add_empty_lines and i < len(components) - 1: + formatted_components.append("") + + return "\n".join(formatted_components) + + +def format_configs(configs, indent_level=4, max_line_length=115, add_empty_lines=True): + """Format a list of ConfigSpec objects into a readable string representation. + + Args: + configs: list of ConfigSpec objects to format + indent_level: Number of spaces to indent each config line (default: 4) + max_line_length: Maximum length for each line before wrapping (default: 115) + add_empty_lines: Whether to add empty lines between configs (default: True) + + Returns: + A formatted string representing all configs + """ + if not configs: + return "" + + base_indent = " " * indent_level + config_indent = " " * (indent_level + 4) + formatted_configs = [] + + # Add the header + formatted_configs.append(f"{base_indent}Configs:") + if add_empty_lines: + formatted_configs.append("") + + # Add each config with optional empty lines between them + for i, config in enumerate(configs): + config_desc = f"{config_indent}{config.name} (default: {config.default})" + if config.description: + config_desc += f": {config.description}" + formatted_configs.append(config_desc) + + # Add an empty line after each config except the last one + if add_empty_lines and i < len(configs) - 1: + formatted_configs.append("") + + return "\n".join(formatted_configs) + + +def format_workflow(workflow_map): + """Format a workflow map into a readable string representation. + + Args: + workflow_map: Dictionary mapping workflow names to trigger inputs + + Returns: + A formatted string representing all workflows + """ + if workflow_map is None: + return "" + + lines = ["Supported workflows:"] + for workflow_name, trigger_inputs in workflow_map.items(): + required_inputs = [k for k, v in trigger_inputs.items() if v] + if required_inputs: + inputs_str = ", ".join(f"`{t}`" for t in required_inputs) + lines.append(f" - `{workflow_name}`: requires {inputs_str}") + else: + lines.append(f" - `{workflow_name}`: default (no additional inputs required)") + + return "\n".join(lines) + + +def make_doc_string( + inputs, + outputs, + description="", + class_name=None, + expected_components=None, + expected_configs=None, +): + """ + Generates a formatted documentation string describing the pipeline block's parameters and structure. + + Args: + inputs: list of input parameters + intermediate_inputs: list of intermediate input parameters + outputs: list of output parameters + description (str, *optional*): Description of the block + class_name (str, *optional*): Name of the class to include in the documentation + expected_components (list[ComponentSpec], *optional*): list of expected components + expected_configs (list[ConfigSpec], *optional*): list of expected configurations + + Returns: + str: A formatted string containing information about components, configs, call parameters, + intermediate inputs/outputs, and final outputs. + """ + output = "" + + # Add class name if provided + if class_name: + output += f"class {class_name}\n\n" + + # Add description + if description: + desc_lines = description.strip().split("\n") + aligned_desc = "\n".join(" " + line.rstrip() for line in desc_lines) + output += aligned_desc + "\n\n" + + # Add components section if provided + if expected_components and len(expected_components) > 0: + components_str = format_components(expected_components, indent_level=2, add_empty_lines=False) + output += components_str + "\n\n" + + # Add configs section if provided + if expected_configs and len(expected_configs) > 0: + configs_str = format_configs(expected_configs, indent_level=2, add_empty_lines=False) + output += configs_str + "\n\n" + + # Add inputs section + output += format_input_params(inputs, indent_level=2) + + # Add outputs section + output += "\n\n" + output += format_output_params(outputs, indent_level=2) + + return output + + +def _validate_requirements(reqs): + if reqs is None: + normalized_reqs = {} + else: + if not isinstance(reqs, dict): + raise ValueError( + "Requirements must be provided as a dictionary mapping package names to version specifiers." + ) + normalized_reqs = _normalize_requirements(reqs) + + if not normalized_reqs: + return {} + + final: dict[str, str] = {} + for req, specified_ver in normalized_reqs.items(): + req_available, req_actual_ver = _is_package_available(req) + if not req_available: + logger.warning(f"{req} was specified in the requirements but wasn't found in the current environment.") + + if specified_ver: + try: + specifier = SpecifierSet(specified_ver) + except InvalidSpecifier as err: + raise ValueError(f"Requirement specifier '{specified_ver}' for {req} is invalid.") from err + + if req_actual_ver == "N/A": + logger.warning( + f"Version of {req} could not be determined to validate requirement '{specified_ver}'. Things might work unexpected." + ) + elif not specifier.contains(req_actual_ver, prereleases=True): + logger.warning( + f"{req} requirement '{specified_ver}' is not satisfied by the installed version {req_actual_ver}. Things might work unexpected." + ) + + final[req] = specified_ver + + return final + + +def _normalize_requirements(reqs): + if not reqs: + return {} + + normalized: "OrderedDict[str, str]" = OrderedDict() + + def _accumulate(mapping: dict[str, Any]): + for pkg, spec in mapping.items(): + if isinstance(spec, dict): + # This is recursive because blocks are composable. This way, we can merge requirements + # from multiple blocks. + _accumulate(spec) + continue + + pkg_name = str(pkg).strip() + if not pkg_name: + raise ValueError("Requirement package name cannot be empty.") + + spec_str = "" if spec is None else str(spec).strip() + if spec_str and not spec_str.startswith(("<", ">", "=", "!", "~")): + spec_str = f"=={spec_str}" + + existing_spec = normalized.get(pkg_name) + if existing_spec is not None: + if not existing_spec and spec_str: + normalized[pkg_name] = spec_str + elif existing_spec and spec_str and existing_spec != spec_str: + try: + combined_spec = SpecifierSet(",".join(filter(None, [existing_spec, spec_str]))) + except InvalidSpecifier: + logger.warning( + f"Conflicting requirements for '{pkg_name}' detected: '{existing_spec}' vs '{spec_str}'. Keeping '{existing_spec}'." + ) + else: + normalized[pkg_name] = str(combined_spec) + continue + + normalized[pkg_name] = spec_str + + _accumulate(reqs) + + return normalized + + +def combine_inputs(*named_input_lists: list[tuple[str, list[InputParam]]]) -> list[InputParam]: + """ + Combines multiple lists of InputParam objects from different blocks. For duplicate inputs, updates only if current + default value is None and new default value is not None. Warns if multiple non-None default values exist for the + same input. + + Args: + named_input_lists: List of tuples containing (block_name, input_param_list) pairs + + Returns: + List[InputParam]: Combined list of unique InputParam objects + """ + combined_dict = {} # name -> InputParam + value_sources = {} # name -> block_name + + for block_name, inputs in named_input_lists: + for input_param in inputs: + if input_param.name is None and input_param.kwargs_type is not None: + input_name = "*_" + input_param.kwargs_type + else: + input_name = input_param.name + if input_name in combined_dict: + current_param = combined_dict[input_name] + if ( + current_param.default is not None + and input_param.default is not None + and current_param.default != input_param.default + ): + warnings.warn( + f"Multiple different default values found for input '{input_name}': " + f"{current_param.default} (from block '{value_sources[input_name]}') and " + f"{input_param.default} (from block '{block_name}'). Using {current_param.default}." + ) + if current_param.default is None and input_param.default is not None: + combined_dict[input_name] = input_param + value_sources[input_name] = block_name + else: + combined_dict[input_name] = input_param + value_sources[input_name] = block_name + + return list(combined_dict.values()) + + +def combine_outputs(*named_output_lists: list[tuple[str, list[OutputParam]]]) -> list[OutputParam]: + """ + Combines multiple lists of OutputParam objects from different blocks. For duplicate outputs, keeps the first + occurrence of each output name. + + Args: + named_output_lists: List of tuples containing (block_name, output_param_list) pairs + + Returns: + List[OutputParam]: Combined list of unique OutputParam objects + """ + combined_dict = {} # name -> OutputParam + + for block_name, outputs in named_output_lists: + for output_param in outputs: + if (output_param.name not in combined_dict) or ( + combined_dict[output_param.name].kwargs_type is None and output_param.kwargs_type is not None + ): + combined_dict[output_param.name] = output_param + + return list(combined_dict.values()) + + +def generate_modular_model_card_content(blocks) -> dict[str, Any]: + """ + Generate model card content for a modular pipeline. + + This function creates a comprehensive model card with descriptions of the pipeline's architecture, components, + configurations, inputs, and outputs. + + Args: + blocks: The pipeline's blocks object containing all pipeline specifications + + Returns: + Dict[str, Any]: A dictionary containing formatted content sections: + - pipeline_name: Name of the pipeline + - model_description: Overall description with pipeline type + - blocks_description: Detailed architecture of blocks + - components_description: List of required components + - configs_section: Configuration parameters section + - io_specification_section: Input/Output specification (per-workflow or unified) + - trigger_inputs_section: Conditional execution information + - tags: List of relevant tags for the model card + """ + blocks_class_name = blocks.__class__.__name__ + pipeline_name = blocks_class_name.replace("Blocks", " Pipeline") + description = getattr(blocks, "description", "A modular diffusion pipeline.") + + # generate blocks architecture description + blocks_desc_parts = [] + sub_blocks = getattr(blocks, "sub_blocks", None) or {} + if sub_blocks: + for i, (name, block) in enumerate(sub_blocks.items()): + block_class = block.__class__.__name__ + block_desc = block.description.split("\n")[0] if getattr(block, "description", "") else "" + blocks_desc_parts.append(f"{i + 1}. **{name}** (`{block_class}`)") + if block_desc: + blocks_desc_parts.append(f" - {block_desc}") + + blocks_description = "\n".join(blocks_desc_parts) if blocks_desc_parts else "No blocks defined." + + components = getattr(blocks, "expected_components", []) + if components: + components_str = format_components(components, indent_level=0, add_empty_lines=False) + # remove the "Components:" header since template has its own + components_description = components_str.replace("Components:\n", "").strip() + if components_description: + # Convert to enumerated list + lines = [line.strip() for line in components_description.split("\n") if line.strip()] + enumerated_lines = [f"{i + 1}. {line}" for i, line in enumerate(lines)] + components_description = "\n".join(enumerated_lines) + else: + components_description = "No specific components required." + else: + components_description = "No specific components required. Components can be loaded dynamically." + + configs = getattr(blocks, "expected_configs", []) + configs_section = "" + if configs: + configs_str = format_configs(configs, indent_level=0, add_empty_lines=False) + configs_description = configs_str.replace("Configs:\n", "").strip() + if configs_description: + configs_section = f"\n\n## Configuration Parameters\n\n{configs_description}" + + # Branch on whether workflows are defined + has_workflows = getattr(blocks, "_workflow_map", None) is not None + + if has_workflows: + workflow_map = blocks._workflow_map + parts = [] + + # If blocks overrides outputs (e.g. to return just "images" instead of all intermediates), + # use that as the shared output for all workflows + blocks_outputs = blocks.outputs + blocks_intermediate = getattr(blocks, "intermediate_outputs", None) + shared_outputs = ( + blocks_outputs if blocks_intermediate is not None and blocks_outputs != blocks_intermediate else None + ) + + parts.append("## Workflow Input Specification\n") + + # Per-workflow details: show trigger inputs with full param descriptions + for wf_name, trigger_inputs in workflow_map.items(): + trigger_input_names = set(trigger_inputs.keys()) + try: + workflow_blocks = blocks.get_workflow(wf_name) + except Exception: + parts.append(f"
\n{wf_name}\n") + parts.append("*Could not resolve workflow blocks.*\n") + parts.append("
\n") + continue + + wf_inputs = workflow_blocks.inputs + # Show only trigger inputs with full parameter descriptions + trigger_params = [p for p in wf_inputs if p.name in trigger_input_names] + + parts.append(f"
\n{wf_name}\n") + + inputs_str = format_params_markdown(trigger_params, header=None) + parts.append(inputs_str if inputs_str else "No additional inputs required.") + parts.append("") + + parts.append("
\n") + + # Common Inputs & Outputs section (like non-workflow pipelines) + all_inputs = blocks.inputs + all_outputs = shared_outputs if shared_outputs is not None else blocks.outputs + + inputs_str = format_params_markdown(all_inputs, "Inputs") + outputs_str = format_params_markdown(all_outputs, "Outputs") + inputs_description = inputs_str if inputs_str else "No specific inputs defined." + outputs_description = outputs_str if outputs_str else "Standard pipeline outputs." + + parts.append(f"\n## Input/Output Specification\n\n{inputs_description}\n\n{outputs_description}") + + io_specification_section = "\n".join(parts) + # Suppress trigger_inputs_section when workflows are shown (it's redundant) + trigger_inputs_section = "" + else: + # Unified I/O section (original behavior) + inputs = blocks.inputs + outputs = blocks.outputs + inputs_str = format_params_markdown(inputs, "Inputs") + outputs_str = format_params_markdown(outputs, "Outputs") + inputs_description = inputs_str if inputs_str else "No specific inputs defined." + outputs_description = outputs_str if outputs_str else "Standard pipeline outputs." + io_specification_section = f"## Input/Output Specification\n\n{inputs_description}\n\n{outputs_description}" + + trigger_inputs_section = "" + if hasattr(blocks, "trigger_inputs") and blocks.trigger_inputs: + trigger_inputs_list = sorted([t for t in blocks.trigger_inputs if t is not None]) + if trigger_inputs_list: + trigger_inputs_str = ", ".join(f"`{t}`" for t in trigger_inputs_list) + trigger_inputs_section = f""" +### Conditional Execution + +This pipeline contains blocks that are selected at runtime based on inputs: +- **Trigger Inputs**: {trigger_inputs_str} +""" + + # generate tags based on pipeline characteristics + tags = ["modular-diffusers", "diffusers"] + + if hasattr(blocks, "model_name") and blocks.model_name: + tags.append(blocks.model_name) + + if has_workflows: + # Derive tags from workflow names + workflow_names = set(blocks._workflow_map.keys()) + if any("inpainting" in wf for wf in workflow_names): + tags.append("inpainting") + if any("image2image" in wf for wf in workflow_names): + tags.append("image-to-image") + if any("controlnet" in wf for wf in workflow_names): + tags.append("controlnet") + if any("text2image" in wf for wf in workflow_names): + tags.append("text-to-image") + elif hasattr(blocks, "trigger_inputs") and blocks.trigger_inputs: + triggers = blocks.trigger_inputs + if any(t in triggers for t in ["mask", "mask_image"]): + tags.append("inpainting") + if any(t in triggers for t in ["image", "image_latents"]): + tags.append("image-to-image") + if any(t in triggers for t in ["control_image", "controlnet_cond"]): + tags.append("controlnet") + if not any(t in triggers for t in ["image", "mask", "image_latents", "mask_image"]): + tags.append("text-to-image") + else: + tags.append("text-to-image") + + block_count = len(blocks.sub_blocks) + model_description = f"""This is a modular diffusion pipeline built with 🧨 Diffusers' modular pipeline framework. + +**Pipeline Type**: {blocks_class_name} + +**Description**: {description} + +This pipeline uses a {block_count}-block architecture that can be customized and extended.""" + + return { + "pipeline_name": pipeline_name, + "model_description": model_description, + "blocks_description": blocks_description, + "components_description": components_description, + "configs_section": configs_section, + "io_specification_section": io_specification_section, + "trigger_inputs_section": trigger_inputs_section, + "tags": tags, + } diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8007035338b028a99c6a86dc181b56018bc78ad2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/__init__.py @@ -0,0 +1,968 @@ +from typing import TYPE_CHECKING + +from ..utils import ( + DIFFUSERS_SLOW_IMPORT, + OptionalDependencyNotAvailable, + _LazyModule, + get_objects_from_module, + is_flax_available, + is_librosa_available, + is_note_seq_available, + is_onnx_available, + is_opencv_available, + is_sentencepiece_available, + is_torch_available, + is_torch_npu_available, + is_transformers_available, + is_transformers_version, +) + + +# These modules contain pipelines from multiple libraries/frameworks +_dummy_objects = {} +_import_structure = { + "controlnet": [], + "controlnet_hunyuandit": [], + "controlnet_sd3": [], + "controlnet_xs": [], + "deprecated": [], + "latent_diffusion": [], + "ledits_pp": [], + "marigold": [], + "pag": [], + "stable_diffusion": [], + "stable_diffusion_xl": [], +} + +try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_pt_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_pt_objects)) +else: + _import_structure["auto_pipeline"] = [ + "AutoPipelineForImage2Image", + "AutoPipelineForInpainting", + "AutoPipelineForText2Image", + ] + _import_structure["consistency_models"] = ["ConsistencyModelPipeline"] + _import_structure["dance_diffusion"] = ["DanceDiffusionPipeline"] + _import_structure["ddim"] = ["DDIMPipeline"] + _import_structure["ddpm"] = ["DDPMPipeline"] + _import_structure["dit"] = ["DiTPipeline"] + _import_structure["latent_diffusion"].extend(["LDMSuperResolutionPipeline"]) + _import_structure["pipeline_utils"] = [ + "AudioPipelineOutput", + "DiffusionPipeline", + "StableDiffusionMixin", + "ImagePipelineOutput", + ] + _import_structure["deprecated"].extend( + [ + "PNDMPipeline", + "LDMPipeline", + "RePaintPipeline", + "ScoreSdeVePipeline", + "KarrasVePipeline", + ] + ) +try: + if not (is_torch_available() and is_librosa_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_torch_and_librosa_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_torch_and_librosa_objects)) +else: + _import_structure["deprecated"].extend(["AudioDiffusionPipeline", "Mel"]) + +try: + if not (is_transformers_available() and is_torch_available() and is_note_seq_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_transformers_and_torch_and_note_seq_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_transformers_and_torch_and_note_seq_objects)) +else: + _import_structure["deprecated"].extend( + [ + "MidiProcessor", + "SpectrogramDiffusionPipeline", + ] + ) + +try: + if not (is_torch_available() and is_transformers_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_torch_and_transformers_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects)) +else: + _import_structure["deprecated"].extend( + [ + "VQDiffusionPipeline", + "AltDiffusionPipeline", + "AltDiffusionImg2ImgPipeline", + "CycleDiffusionPipeline", + "StableDiffusionInpaintPipelineLegacy", + "StableDiffusionPix2PixZeroPipeline", + "StableDiffusionParadigmsPipeline", + "StableDiffusionModelEditingPipeline", + "VersatileDiffusionDualGuidedPipeline", + "VersatileDiffusionImageVariationPipeline", + "VersatileDiffusionPipeline", + "VersatileDiffusionTextToImagePipeline", + ] + ) + _import_structure["allegro"] = ["AllegroPipeline"] + _import_structure["amused"] = ["AmusedImg2ImgPipeline", "AmusedInpaintPipeline", "AmusedPipeline"] + _import_structure["animatediff"] = [ + "AnimateDiffPipeline", + "AnimateDiffControlNetPipeline", + "AnimateDiffSDXLPipeline", + "AnimateDiffSparseControlNetPipeline", + "AnimateDiffVideoToVideoPipeline", + "AnimateDiffVideoToVideoControlNetPipeline", + ] + _import_structure["bria"] = ["BriaPipeline"] + _import_structure["bria_fibo"] = ["BriaFiboPipeline", "BriaFiboEditPipeline"] + _import_structure["flux2"] = ["Flux2Pipeline", "Flux2KleinPipeline"] + _import_structure["flux"] = [ + "FluxControlPipeline", + "FluxControlInpaintPipeline", + "FluxControlImg2ImgPipeline", + "FluxControlNetPipeline", + "FluxControlNetImg2ImgPipeline", + "FluxControlNetInpaintPipeline", + "FluxImg2ImgPipeline", + "FluxInpaintPipeline", + "FluxPipeline", + "FluxFillPipeline", + "FluxPriorReduxPipeline", + "ReduxImageEncoder", + "FluxKontextPipeline", + "FluxKontextInpaintPipeline", + ] + _import_structure["prx"] = ["PRXPipeline"] + _import_structure["audioldm"] = ["AudioLDMPipeline"] + _import_structure["audioldm2"] = [ + "AudioLDM2Pipeline", + "AudioLDM2ProjectionModel", + "AudioLDM2UNet2DConditionModel", + ] + _import_structure["blip_diffusion"] = ["BlipDiffusionPipeline"] + _import_structure["chroma"] = ["ChromaPipeline", "ChromaImg2ImgPipeline", "ChromaInpaintPipeline"] + _import_structure["cogvideo"] = [ + "CogVideoXPipeline", + "CogVideoXImageToVideoPipeline", + "CogVideoXVideoToVideoPipeline", + "CogVideoXFunControlPipeline", + ] + _import_structure["cogview3"] = ["CogView3PlusPipeline"] + _import_structure["cogview4"] = ["CogView4Pipeline", "CogView4ControlPipeline"] + _import_structure["consisid"] = ["ConsisIDPipeline"] + _import_structure["cosmos"] = [ + "Cosmos2_5_PredictBasePipeline", + "Cosmos2_5_TransferPipeline", + "Cosmos2TextToImagePipeline", + "CosmosTextToWorldPipeline", + "CosmosVideoToWorldPipeline", + "Cosmos2VideoToWorldPipeline", + ] + _import_structure["controlnet"].extend( + [ + "BlipDiffusionControlNetPipeline", + "StableDiffusionControlNetImg2ImgPipeline", + "StableDiffusionControlNetInpaintPipeline", + "StableDiffusionControlNetPipeline", + "StableDiffusionXLControlNetImg2ImgPipeline", + "StableDiffusionXLControlNetInpaintPipeline", + "StableDiffusionXLControlNetPipeline", + "StableDiffusionXLControlNetUnionPipeline", + "StableDiffusionXLControlNetUnionInpaintPipeline", + "StableDiffusionXLControlNetUnionImg2ImgPipeline", + ] + ) + _import_structure["pag"].extend( + [ + "StableDiffusionControlNetPAGInpaintPipeline", + "AnimateDiffPAGPipeline", + "KolorsPAGPipeline", + "HunyuanDiTPAGPipeline", + "StableDiffusion3PAGPipeline", + "StableDiffusion3PAGImg2ImgPipeline", + "StableDiffusionPAGPipeline", + "StableDiffusionPAGImg2ImgPipeline", + "StableDiffusionPAGInpaintPipeline", + "StableDiffusionControlNetPAGPipeline", + "StableDiffusionXLPAGPipeline", + "StableDiffusionXLPAGInpaintPipeline", + "StableDiffusionXLControlNetPAGImg2ImgPipeline", + "StableDiffusionXLControlNetPAGPipeline", + "StableDiffusionXLPAGImg2ImgPipeline", + "PixArtSigmaPAGPipeline", + "SanaPAGPipeline", + ] + ) + _import_structure["controlnet_xs"].extend( + [ + "StableDiffusionControlNetXSPipeline", + "StableDiffusionXLControlNetXSPipeline", + ] + ) + _import_structure["controlnet_hunyuandit"].extend( + [ + "HunyuanDiTControlNetPipeline", + ] + ) + _import_structure["controlnet_sd3"].extend( + [ + "StableDiffusion3ControlNetPipeline", + "StableDiffusion3ControlNetInpaintingPipeline", + ] + ) + _import_structure["deepfloyd_if"] = [ + "IFImg2ImgPipeline", + "IFImg2ImgSuperResolutionPipeline", + "IFInpaintingPipeline", + "IFInpaintingSuperResolutionPipeline", + "IFPipeline", + "IFSuperResolutionPipeline", + ] + _import_structure["easyanimate"] = [ + "EasyAnimatePipeline", + "EasyAnimateInpaintPipeline", + "EasyAnimateControlPipeline", + ] + _import_structure["helios"] = ["HeliosPipeline", "HeliosPyramidPipeline"] + _import_structure["hidream_image"] = ["HiDreamImagePipeline"] + _import_structure["hunyuandit"] = ["HunyuanDiTPipeline"] + _import_structure["hunyuan_video"] = [ + "HunyuanVideoPipeline", + "HunyuanSkyreelsImageToVideoPipeline", + "HunyuanVideoImageToVideoPipeline", + "HunyuanVideoFramepackPipeline", + ] + _import_structure["hunyuan_video1_5"] = ["HunyuanVideo15Pipeline", "HunyuanVideo15ImageToVideoPipeline"] + _import_structure["hunyuan_image"] = ["HunyuanImagePipeline", "HunyuanImageRefinerPipeline"] + _import_structure["kandinsky"] = [ + "KandinskyCombinedPipeline", + "KandinskyImg2ImgCombinedPipeline", + "KandinskyImg2ImgPipeline", + "KandinskyInpaintCombinedPipeline", + "KandinskyInpaintPipeline", + "KandinskyPipeline", + "KandinskyPriorPipeline", + ] + _import_structure["kandinsky2_2"] = [ + "KandinskyV22CombinedPipeline", + "KandinskyV22ControlnetImg2ImgPipeline", + "KandinskyV22ControlnetPipeline", + "KandinskyV22Img2ImgCombinedPipeline", + "KandinskyV22Img2ImgPipeline", + "KandinskyV22InpaintCombinedPipeline", + "KandinskyV22InpaintPipeline", + "KandinskyV22Pipeline", + "KandinskyV22PriorEmb2EmbPipeline", + "KandinskyV22PriorPipeline", + ] + _import_structure["kandinsky3"] = [ + "Kandinsky3Img2ImgPipeline", + "Kandinsky3Pipeline", + ] + _import_structure["latent_consistency_models"] = [ + "LatentConsistencyModelImg2ImgPipeline", + "LatentConsistencyModelPipeline", + ] + _import_structure["latent_diffusion"].extend(["LDMTextToImagePipeline"]) + _import_structure["ledits_pp"].extend( + [ + "LEditsPPPipelineStableDiffusion", + "LEditsPPPipelineStableDiffusionXL", + ] + ) + _import_structure["latte"] = ["LattePipeline"] + _import_structure["ltx"] = [ + "LTXPipeline", + "LTXImageToVideoPipeline", + "LTXConditionPipeline", + "LTXLatentUpsamplePipeline", + "LTXI2VLongMultiPromptPipeline", + ] + _import_structure["ltx2"] = [ + "LTX2Pipeline", + "LTX2ConditionPipeline", + "LTX2ImageToVideoPipeline", + "LTX2LatentUpsamplePipeline", + ] + _import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"] + _import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"] + _import_structure["lucy"] = ["LucyEditPipeline"] + _import_structure["longcat_image"] = ["LongCatImagePipeline", "LongCatImageEditPipeline"] + _import_structure["marigold"].extend( + [ + "MarigoldDepthPipeline", + "MarigoldIntrinsicsPipeline", + "MarigoldNormalsPipeline", + ] + ) + _import_structure["mochi"] = ["MochiPipeline"] + _import_structure["musicldm"] = ["MusicLDMPipeline"] + _import_structure["omnigen"] = ["OmniGenPipeline"] + _import_structure["ovis_image"] = ["OvisImagePipeline"] + _import_structure["visualcloze"] = ["VisualClozePipeline", "VisualClozeGenerationPipeline"] + _import_structure["paint_by_example"] = ["PaintByExamplePipeline"] + _import_structure["pia"] = ["PIAPipeline"] + _import_structure["pixart_alpha"] = ["PixArtAlphaPipeline", "PixArtSigmaPipeline"] + _import_structure["sana"] = [ + "SanaPipeline", + "SanaSprintPipeline", + "SanaControlNetPipeline", + "SanaSprintImg2ImgPipeline", + ] + _import_structure["sana_video"] = [ + "SanaVideoPipeline", + "SanaImageToVideoPipeline", + ] + _import_structure["semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"] + _import_structure["shap_e"] = ["ShapEImg2ImgPipeline", "ShapEPipeline"] + _import_structure["stable_audio"] = [ + "StableAudioProjectionModel", + "StableAudioPipeline", + ] + _import_structure["stable_cascade"] = [ + "StableCascadeCombinedPipeline", + "StableCascadeDecoderPipeline", + "StableCascadePriorPipeline", + ] + _import_structure["stable_diffusion"].extend( + [ + "CLIPImageProjection", + "StableDiffusionDepth2ImgPipeline", + "StableDiffusionImageVariationPipeline", + "StableDiffusionImg2ImgPipeline", + "StableDiffusionInpaintPipeline", + "StableDiffusionInstructPix2PixPipeline", + "StableDiffusionLatentUpscalePipeline", + "StableDiffusionPipeline", + "StableDiffusionUpscalePipeline", + "StableUnCLIPImg2ImgPipeline", + "StableUnCLIPPipeline", + "StableDiffusionLDM3DPipeline", + ] + ) + _import_structure["aura_flow"] = ["AuraFlowPipeline"] + _import_structure["stable_diffusion_3"] = [ + "StableDiffusion3Pipeline", + "StableDiffusion3Img2ImgPipeline", + "StableDiffusion3InpaintPipeline", + ] + _import_structure["stable_diffusion_attend_and_excite"] = ["StableDiffusionAttendAndExcitePipeline"] + _import_structure["stable_diffusion_safe"] = ["StableDiffusionPipelineSafe"] + _import_structure["stable_diffusion_sag"] = ["StableDiffusionSAGPipeline"] + _import_structure["stable_diffusion_gligen"] = [ + "StableDiffusionGLIGENPipeline", + "StableDiffusionGLIGENTextImagePipeline", + ] + _import_structure["stable_video_diffusion"] = ["StableVideoDiffusionPipeline"] + _import_structure["stable_diffusion_xl"].extend( + [ + "StableDiffusionXLImg2ImgPipeline", + "StableDiffusionXLInpaintPipeline", + "StableDiffusionXLInstructPix2PixPipeline", + "StableDiffusionXLPipeline", + ] + ) + _import_structure["stable_diffusion_diffedit"] = ["StableDiffusionDiffEditPipeline"] + _import_structure["stable_diffusion_ldm3d"] = ["StableDiffusionLDM3DPipeline"] + _import_structure["stable_diffusion_panorama"] = ["StableDiffusionPanoramaPipeline"] + _import_structure["t2i_adapter"] = [ + "StableDiffusionAdapterPipeline", + "StableDiffusionXLAdapterPipeline", + ] + _import_structure["text_to_video_synthesis"] = [ + "TextToVideoSDPipeline", + "TextToVideoZeroPipeline", + "TextToVideoZeroSDXLPipeline", + "VideoToVideoSDPipeline", + ] + _import_structure["i2vgen_xl"] = ["I2VGenXLPipeline"] + _import_structure["unclip"] = ["UnCLIPImageVariationPipeline", "UnCLIPPipeline"] + _import_structure["unidiffuser"] = [ + "ImageTextPipelineOutput", + "UniDiffuserModel", + "UniDiffuserPipeline", + "UniDiffuserTextDecoder", + ] + _import_structure["wuerstchen"] = [ + "WuerstchenCombinedPipeline", + "WuerstchenDecoderPipeline", + "WuerstchenPriorPipeline", + ] + _import_structure["wan"] = [ + "WanPipeline", + "WanImageToVideoPipeline", + "WanVideoToVideoPipeline", + "WanVACEPipeline", + "WanAnimatePipeline", + ] + _import_structure["kandinsky5"] = [ + "Kandinsky5T2VPipeline", + "Kandinsky5I2VPipeline", + "Kandinsky5T2IPipeline", + "Kandinsky5I2IPipeline", + ] + _import_structure["z_image"] = [ + "ZImageControlNetInpaintPipeline", + "ZImageControlNetPipeline", + "ZImageImg2ImgPipeline", + "ZImageInpaintPipeline", + "ZImageOmniPipeline", + "ZImagePipeline", + ] + _import_structure["skyreels_v2"] = [ + "SkyReelsV2DiffusionForcingPipeline", + "SkyReelsV2DiffusionForcingImageToVideoPipeline", + "SkyReelsV2DiffusionForcingVideoToVideoPipeline", + "SkyReelsV2ImageToVideoPipeline", + "SkyReelsV2Pipeline", + ] + _import_structure["qwenimage"] = [ + "QwenImagePipeline", + "QwenImageImg2ImgPipeline", + "QwenImageInpaintPipeline", + "QwenImageEditPipeline", + "QwenImageEditPlusPipeline", + "QwenImageEditInpaintPipeline", + "QwenImageControlNetInpaintPipeline", + "QwenImageControlNetPipeline", + "QwenImageLayeredPipeline", + ] + _import_structure["chronoedit"] = ["ChronoEditPipeline"] + _import_structure["glm_image"] = ["GlmImagePipeline"] + +try: + if not is_onnx_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_onnx_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_onnx_objects)) +else: + _import_structure["onnx_utils"] = ["OnnxRuntimeModel"] +try: + if not (is_torch_available() and is_transformers_available() and is_onnx_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_torch_and_transformers_and_onnx_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_onnx_objects)) +else: + _import_structure["stable_diffusion"].extend( + [ + "OnnxStableDiffusionImg2ImgPipeline", + "OnnxStableDiffusionInpaintPipeline", + "OnnxStableDiffusionPipeline", + "OnnxStableDiffusionUpscalePipeline", + "StableDiffusionOnnxPipeline", + ] + ) + +try: + if not (is_torch_available() and is_transformers_available() and is_sentencepiece_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import ( + dummy_torch_and_transformers_and_sentencepiece_objects, + ) + + _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_sentencepiece_objects)) +else: + _import_structure["kolors"] = [ + "KolorsPipeline", + "KolorsImg2ImgPipeline", + ] + +try: + if not (is_torch_available() and is_transformers_available() and is_opencv_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import ( + dummy_torch_and_transformers_and_opencv_objects, + ) + + _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_opencv_objects)) +else: + _import_structure["consisid"] = ["ConsisIDPipeline"] + +try: + if not is_flax_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_flax_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_flax_objects)) +else: + _import_structure["pipeline_flax_utils"] = ["FlaxDiffusionPipeline"] +try: + if not (is_flax_available() and is_transformers_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_flax_and_transformers_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_flax_and_transformers_objects)) +else: + _import_structure["controlnet"].extend(["FlaxStableDiffusionControlNetPipeline"]) + _import_structure["stable_diffusion"].extend( + [ + "FlaxStableDiffusionImg2ImgPipeline", + "FlaxStableDiffusionInpaintPipeline", + "FlaxStableDiffusionPipeline", + ] + ) + _import_structure["stable_diffusion_xl"].extend( + [ + "FlaxStableDiffusionXLPipeline", + ] + ) + +if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: + try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_pt_objects import * # noqa F403 + + else: + from .auto_pipeline import ( + AutoPipelineForImage2Image, + AutoPipelineForInpainting, + AutoPipelineForText2Image, + ) + from .consistency_models import ConsistencyModelPipeline + from .dance_diffusion import DanceDiffusionPipeline + from .ddim import DDIMPipeline + from .ddpm import DDPMPipeline + from .deprecated import KarrasVePipeline, LDMPipeline, PNDMPipeline, RePaintPipeline, ScoreSdeVePipeline + from .dit import DiTPipeline + from .latent_diffusion import LDMSuperResolutionPipeline + from .pipeline_utils import ( + AudioPipelineOutput, + DiffusionPipeline, + ImagePipelineOutput, + StableDiffusionMixin, + ) + + try: + if not (is_torch_available() and is_librosa_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_torch_and_librosa_objects import * + else: + from .deprecated import AudioDiffusionPipeline, Mel + + try: + if not (is_torch_available() and is_transformers_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_torch_and_transformers_objects import * + else: + from .allegro import AllegroPipeline + from .amused import AmusedImg2ImgPipeline, AmusedInpaintPipeline, AmusedPipeline + from .animatediff import ( + AnimateDiffControlNetPipeline, + AnimateDiffPipeline, + AnimateDiffSDXLPipeline, + AnimateDiffSparseControlNetPipeline, + AnimateDiffVideoToVideoControlNetPipeline, + AnimateDiffVideoToVideoPipeline, + ) + from .audioldm import AudioLDMPipeline + from .audioldm2 import ( + AudioLDM2Pipeline, + AudioLDM2ProjectionModel, + AudioLDM2UNet2DConditionModel, + ) + from .aura_flow import AuraFlowPipeline + from .blip_diffusion import BlipDiffusionPipeline + from .bria import BriaPipeline + from .bria_fibo import BriaFiboEditPipeline, BriaFiboPipeline + from .chroma import ChromaImg2ImgPipeline, ChromaInpaintPipeline, ChromaPipeline + from .chronoedit import ChronoEditPipeline + from .cogvideo import ( + CogVideoXFunControlPipeline, + CogVideoXImageToVideoPipeline, + CogVideoXPipeline, + CogVideoXVideoToVideoPipeline, + ) + from .cogview3 import CogView3PlusPipeline + from .cogview4 import CogView4ControlPipeline, CogView4Pipeline + from .controlnet import ( + BlipDiffusionControlNetPipeline, + StableDiffusionControlNetImg2ImgPipeline, + StableDiffusionControlNetInpaintPipeline, + StableDiffusionControlNetPipeline, + StableDiffusionXLControlNetImg2ImgPipeline, + StableDiffusionXLControlNetInpaintPipeline, + StableDiffusionXLControlNetPipeline, + StableDiffusionXLControlNetUnionImg2ImgPipeline, + StableDiffusionXLControlNetUnionInpaintPipeline, + StableDiffusionXLControlNetUnionPipeline, + ) + from .controlnet_hunyuandit import ( + HunyuanDiTControlNetPipeline, + ) + from .controlnet_sd3 import StableDiffusion3ControlNetInpaintingPipeline, StableDiffusion3ControlNetPipeline + from .controlnet_xs import ( + StableDiffusionControlNetXSPipeline, + StableDiffusionXLControlNetXSPipeline, + ) + from .cosmos import ( + Cosmos2_5_PredictBasePipeline, + Cosmos2_5_TransferPipeline, + Cosmos2TextToImagePipeline, + Cosmos2VideoToWorldPipeline, + CosmosTextToWorldPipeline, + CosmosVideoToWorldPipeline, + ) + from .deepfloyd_if import ( + IFImg2ImgPipeline, + IFImg2ImgSuperResolutionPipeline, + IFInpaintingPipeline, + IFInpaintingSuperResolutionPipeline, + IFPipeline, + IFSuperResolutionPipeline, + ) + from .deprecated import ( + AltDiffusionImg2ImgPipeline, + AltDiffusionPipeline, + CycleDiffusionPipeline, + StableDiffusionInpaintPipelineLegacy, + StableDiffusionModelEditingPipeline, + StableDiffusionParadigmsPipeline, + StableDiffusionPix2PixZeroPipeline, + VersatileDiffusionDualGuidedPipeline, + VersatileDiffusionImageVariationPipeline, + VersatileDiffusionPipeline, + VersatileDiffusionTextToImagePipeline, + VQDiffusionPipeline, + ) + from .easyanimate import ( + EasyAnimateControlPipeline, + EasyAnimateInpaintPipeline, + EasyAnimatePipeline, + ) + from .flux import ( + FluxControlImg2ImgPipeline, + FluxControlInpaintPipeline, + FluxControlNetImg2ImgPipeline, + FluxControlNetInpaintPipeline, + FluxControlNetPipeline, + FluxControlPipeline, + FluxFillPipeline, + FluxImg2ImgPipeline, + FluxInpaintPipeline, + FluxKontextInpaintPipeline, + FluxKontextPipeline, + FluxPipeline, + FluxPriorReduxPipeline, + ReduxImageEncoder, + ) + from .flux2 import Flux2KleinPipeline, Flux2Pipeline + from .glm_image import GlmImagePipeline + from .helios import HeliosPipeline, HeliosPyramidPipeline + from .hidream_image import HiDreamImagePipeline + from .hunyuan_image import HunyuanImagePipeline, HunyuanImageRefinerPipeline + from .hunyuan_video import ( + HunyuanSkyreelsImageToVideoPipeline, + HunyuanVideoFramepackPipeline, + HunyuanVideoImageToVideoPipeline, + HunyuanVideoPipeline, + ) + from .hunyuan_video1_5 import HunyuanVideo15ImageToVideoPipeline, HunyuanVideo15Pipeline + from .hunyuandit import HunyuanDiTPipeline + from .i2vgen_xl import I2VGenXLPipeline + from .kandinsky import ( + KandinskyCombinedPipeline, + KandinskyImg2ImgCombinedPipeline, + KandinskyImg2ImgPipeline, + KandinskyInpaintCombinedPipeline, + KandinskyInpaintPipeline, + KandinskyPipeline, + KandinskyPriorPipeline, + ) + from .kandinsky2_2 import ( + KandinskyV22CombinedPipeline, + KandinskyV22ControlnetImg2ImgPipeline, + KandinskyV22ControlnetPipeline, + KandinskyV22Img2ImgCombinedPipeline, + KandinskyV22Img2ImgPipeline, + KandinskyV22InpaintCombinedPipeline, + KandinskyV22InpaintPipeline, + KandinskyV22Pipeline, + KandinskyV22PriorEmb2EmbPipeline, + KandinskyV22PriorPipeline, + ) + from .kandinsky3 import ( + Kandinsky3Img2ImgPipeline, + Kandinsky3Pipeline, + ) + from .kandinsky5 import ( + Kandinsky5I2IPipeline, + Kandinsky5I2VPipeline, + Kandinsky5T2IPipeline, + Kandinsky5T2VPipeline, + ) + from .latent_consistency_models import ( + LatentConsistencyModelImg2ImgPipeline, + LatentConsistencyModelPipeline, + ) + from .latent_diffusion import LDMTextToImagePipeline + from .latte import LattePipeline + from .ledits_pp import ( + LEditsPPDiffusionPipelineOutput, + LEditsPPInversionPipelineOutput, + LEditsPPPipelineStableDiffusion, + LEditsPPPipelineStableDiffusionXL, + ) + from .longcat_image import LongCatImageEditPipeline, LongCatImagePipeline + from .ltx import ( + LTXConditionPipeline, + LTXI2VLongMultiPromptPipeline, + LTXImageToVideoPipeline, + LTXLatentUpsamplePipeline, + LTXPipeline, + ) + from .ltx2 import LTX2ConditionPipeline, LTX2ImageToVideoPipeline, LTX2LatentUpsamplePipeline, LTX2Pipeline + from .lucy import LucyEditPipeline + from .lumina import LuminaPipeline, LuminaText2ImgPipeline + from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline + from .marigold import ( + MarigoldDepthPipeline, + MarigoldIntrinsicsPipeline, + MarigoldNormalsPipeline, + ) + from .mochi import MochiPipeline + from .musicldm import MusicLDMPipeline + from .omnigen import OmniGenPipeline + from .ovis_image import OvisImagePipeline + from .pag import ( + AnimateDiffPAGPipeline, + HunyuanDiTPAGPipeline, + KolorsPAGPipeline, + PixArtSigmaPAGPipeline, + SanaPAGPipeline, + StableDiffusion3PAGImg2ImgPipeline, + StableDiffusion3PAGPipeline, + StableDiffusionControlNetPAGInpaintPipeline, + StableDiffusionControlNetPAGPipeline, + StableDiffusionPAGImg2ImgPipeline, + StableDiffusionPAGInpaintPipeline, + StableDiffusionPAGPipeline, + StableDiffusionXLControlNetPAGImg2ImgPipeline, + StableDiffusionXLControlNetPAGPipeline, + StableDiffusionXLPAGImg2ImgPipeline, + StableDiffusionXLPAGInpaintPipeline, + StableDiffusionXLPAGPipeline, + ) + from .paint_by_example import PaintByExamplePipeline + from .pia import PIAPipeline + from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline + from .prx import PRXPipeline + from .qwenimage import ( + QwenImageControlNetInpaintPipeline, + QwenImageControlNetPipeline, + QwenImageEditInpaintPipeline, + QwenImageEditPipeline, + QwenImageEditPlusPipeline, + QwenImageImg2ImgPipeline, + QwenImageInpaintPipeline, + QwenImageLayeredPipeline, + QwenImagePipeline, + ) + from .sana import ( + SanaControlNetPipeline, + SanaPipeline, + SanaSprintImg2ImgPipeline, + SanaSprintPipeline, + ) + from .sana_video import SanaImageToVideoPipeline, SanaVideoPipeline + from .semantic_stable_diffusion import SemanticStableDiffusionPipeline + from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline + from .stable_audio import StableAudioPipeline, StableAudioProjectionModel + from .stable_cascade import ( + StableCascadeCombinedPipeline, + StableCascadeDecoderPipeline, + StableCascadePriorPipeline, + ) + from .stable_diffusion import ( + CLIPImageProjection, + StableDiffusionDepth2ImgPipeline, + StableDiffusionImageVariationPipeline, + StableDiffusionImg2ImgPipeline, + StableDiffusionInpaintPipeline, + StableDiffusionInstructPix2PixPipeline, + StableDiffusionLatentUpscalePipeline, + StableDiffusionPipeline, + StableDiffusionUpscalePipeline, + StableUnCLIPImg2ImgPipeline, + StableUnCLIPPipeline, + ) + from .stable_diffusion_3 import ( + StableDiffusion3Img2ImgPipeline, + StableDiffusion3InpaintPipeline, + StableDiffusion3Pipeline, + ) + from .stable_diffusion_attend_and_excite import StableDiffusionAttendAndExcitePipeline + from .stable_diffusion_diffedit import StableDiffusionDiffEditPipeline + from .stable_diffusion_gligen import StableDiffusionGLIGENPipeline, StableDiffusionGLIGENTextImagePipeline + from .stable_diffusion_ldm3d import StableDiffusionLDM3DPipeline + from .stable_diffusion_panorama import StableDiffusionPanoramaPipeline + from .stable_diffusion_safe import StableDiffusionPipelineSafe + from .stable_diffusion_sag import StableDiffusionSAGPipeline + from .stable_diffusion_xl import ( + StableDiffusionXLImg2ImgPipeline, + StableDiffusionXLInpaintPipeline, + StableDiffusionXLInstructPix2PixPipeline, + StableDiffusionXLPipeline, + ) + from .stable_video_diffusion import StableVideoDiffusionPipeline + from .t2i_adapter import ( + StableDiffusionAdapterPipeline, + StableDiffusionXLAdapterPipeline, + ) + from .text_to_video_synthesis import ( + TextToVideoSDPipeline, + TextToVideoZeroPipeline, + TextToVideoZeroSDXLPipeline, + VideoToVideoSDPipeline, + ) + from .unclip import UnCLIPImageVariationPipeline, UnCLIPPipeline + from .unidiffuser import ( + ImageTextPipelineOutput, + UniDiffuserModel, + UniDiffuserPipeline, + UniDiffuserTextDecoder, + ) + from .visualcloze import VisualClozeGenerationPipeline, VisualClozePipeline + from .wan import ( + WanAnimatePipeline, + WanImageToVideoPipeline, + WanPipeline, + WanVACEPipeline, + WanVideoToVideoPipeline, + ) + from .wuerstchen import ( + WuerstchenCombinedPipeline, + WuerstchenDecoderPipeline, + WuerstchenPriorPipeline, + ) + from .z_image import ( + ZImageControlNetInpaintPipeline, + ZImageControlNetPipeline, + ZImageImg2ImgPipeline, + ZImageInpaintPipeline, + ZImageOmniPipeline, + ZImagePipeline, + ) + + try: + if not is_onnx_available(): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_onnx_objects import * # noqa F403 + + else: + from .onnx_utils import OnnxRuntimeModel + + try: + if not (is_torch_available() and is_transformers_available() and is_onnx_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_torch_and_transformers_and_onnx_objects import * + else: + from .stable_diffusion import ( + OnnxStableDiffusionImg2ImgPipeline, + OnnxStableDiffusionInpaintPipeline, + OnnxStableDiffusionPipeline, + OnnxStableDiffusionUpscalePipeline, + StableDiffusionOnnxPipeline, + ) + + try: + if not (is_torch_available() and is_transformers_available() and is_sentencepiece_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_torch_and_transformers_and_sentencepiece_objects import * + else: + from .kolors import ( + KolorsImg2ImgPipeline, + KolorsPipeline, + ) + + try: + if not (is_torch_available() and is_transformers_available() and is_opencv_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_torch_and_transformers_and_opencv_objects import * + else: + from .consisid import ConsisIDPipeline + + try: + if not is_flax_available(): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_flax_objects import * # noqa F403 + else: + from .pipeline_flax_utils import FlaxDiffusionPipeline + + try: + if not (is_flax_available() and is_transformers_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_flax_and_transformers_objects import * + else: + from .controlnet import FlaxStableDiffusionControlNetPipeline + from .stable_diffusion import ( + FlaxStableDiffusionImg2ImgPipeline, + FlaxStableDiffusionInpaintPipeline, + FlaxStableDiffusionPipeline, + ) + from .stable_diffusion_xl import ( + FlaxStableDiffusionXLPipeline, + ) + + try: + if not (is_transformers_available() and is_torch_available() and is_note_seq_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_transformers_and_torch_and_note_seq_objects import * # noqa F403 + + else: + from .deprecated import ( + MidiProcessor, + SpectrogramDiffusionPipeline, + ) + + from .skyreels_v2 import ( + SkyReelsV2DiffusionForcingImageToVideoPipeline, + SkyReelsV2DiffusionForcingPipeline, + SkyReelsV2DiffusionForcingVideoToVideoPipeline, + SkyReelsV2ImageToVideoPipeline, + SkyReelsV2Pipeline, + ) + +else: + import sys + + sys.modules[__name__] = _LazyModule( + __name__, + globals()["__file__"], + _import_structure, + module_spec=__spec__, + ) + for name, value in _dummy_objects.items(): + setattr(sys.modules[__name__], name, value) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/auto_pipeline.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/auto_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..72151dc40a5337929d39280cff1039a9a029e138 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/auto_pipeline.py @@ -0,0 +1,1233 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict + +from huggingface_hub.utils import validate_hf_hub_args + +from ..configuration_utils import ConfigMixin +from ..models.controlnets import ControlNetUnionModel +from ..utils import is_sentencepiece_available +from .aura_flow import AuraFlowPipeline +from .chroma import ChromaPipeline +from .cogview3 import CogView3PlusPipeline +from .cogview4 import CogView4ControlPipeline, CogView4Pipeline +from .controlnet import ( + StableDiffusionControlNetImg2ImgPipeline, + StableDiffusionControlNetInpaintPipeline, + StableDiffusionControlNetPipeline, + StableDiffusionXLControlNetImg2ImgPipeline, + StableDiffusionXLControlNetInpaintPipeline, + StableDiffusionXLControlNetPipeline, + StableDiffusionXLControlNetUnionImg2ImgPipeline, + StableDiffusionXLControlNetUnionInpaintPipeline, + StableDiffusionXLControlNetUnionPipeline, +) +from .controlnet_sd3 import ( + StableDiffusion3ControlNetInpaintingPipeline, + StableDiffusion3ControlNetPipeline, +) +from .deepfloyd_if import IFImg2ImgPipeline, IFInpaintingPipeline, IFPipeline +from .flux import ( + FluxControlImg2ImgPipeline, + FluxControlInpaintPipeline, + FluxControlNetImg2ImgPipeline, + FluxControlNetInpaintPipeline, + FluxControlNetPipeline, + FluxControlPipeline, + FluxImg2ImgPipeline, + FluxInpaintPipeline, + FluxKontextPipeline, + FluxPipeline, +) +from .flux2 import Flux2KleinPipeline, Flux2Pipeline +from .glm_image import GlmImagePipeline +from .helios import HeliosPipeline, HeliosPyramidPipeline +from .hunyuandit import HunyuanDiTPipeline +from .kandinsky import ( + KandinskyCombinedPipeline, + KandinskyImg2ImgCombinedPipeline, + KandinskyImg2ImgPipeline, + KandinskyInpaintCombinedPipeline, + KandinskyInpaintPipeline, + KandinskyPipeline, +) +from .kandinsky2_2 import ( + KandinskyV22CombinedPipeline, + KandinskyV22Img2ImgCombinedPipeline, + KandinskyV22Img2ImgPipeline, + KandinskyV22InpaintCombinedPipeline, + KandinskyV22InpaintPipeline, + KandinskyV22Pipeline, +) +from .kandinsky3 import Kandinsky3Img2ImgPipeline, Kandinsky3Pipeline +from .latent_consistency_models import LatentConsistencyModelImg2ImgPipeline, LatentConsistencyModelPipeline +from .lumina import LuminaPipeline +from .lumina2 import Lumina2Pipeline +from .ovis_image import OvisImagePipeline +from .pag import ( + HunyuanDiTPAGPipeline, + PixArtSigmaPAGPipeline, + SanaPAGPipeline, + StableDiffusion3PAGImg2ImgPipeline, + StableDiffusion3PAGPipeline, + StableDiffusionControlNetPAGInpaintPipeline, + StableDiffusionControlNetPAGPipeline, + StableDiffusionPAGImg2ImgPipeline, + StableDiffusionPAGInpaintPipeline, + StableDiffusionPAGPipeline, + StableDiffusionXLControlNetPAGImg2ImgPipeline, + StableDiffusionXLControlNetPAGPipeline, + StableDiffusionXLPAGImg2ImgPipeline, + StableDiffusionXLPAGInpaintPipeline, + StableDiffusionXLPAGPipeline, +) +from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline +from .qwenimage import ( + QwenImageControlNetPipeline, + QwenImageEditInpaintPipeline, + QwenImageEditPipeline, + QwenImageEditPlusPipeline, + QwenImageImg2ImgPipeline, + QwenImageInpaintPipeline, + QwenImageLayeredPipeline, + QwenImagePipeline, +) +from .sana import SanaPipeline +from .stable_cascade import StableCascadeCombinedPipeline, StableCascadeDecoderPipeline +from .stable_diffusion import ( + StableDiffusionImg2ImgPipeline, + StableDiffusionInpaintPipeline, + StableDiffusionPipeline, +) +from .stable_diffusion_3 import ( + StableDiffusion3Img2ImgPipeline, + StableDiffusion3InpaintPipeline, + StableDiffusion3Pipeline, +) +from .stable_diffusion_xl import ( + StableDiffusionXLImg2ImgPipeline, + StableDiffusionXLInpaintPipeline, + StableDiffusionXLPipeline, +) +from .wan import WanImageToVideoPipeline, WanPipeline, WanVideoToVideoPipeline +from .wuerstchen import WuerstchenCombinedPipeline, WuerstchenDecoderPipeline +from .z_image import ( + ZImageControlNetInpaintPipeline, + ZImageControlNetPipeline, + ZImageImg2ImgPipeline, + ZImageInpaintPipeline, + ZImageOmniPipeline, + ZImagePipeline, +) + + +AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", StableDiffusionPipeline), + ("stable-diffusion-xl", StableDiffusionXLPipeline), + ("stable-diffusion-3", StableDiffusion3Pipeline), + ("stable-diffusion-3-pag", StableDiffusion3PAGPipeline), + ("if", IFPipeline), + ("hunyuan", HunyuanDiTPipeline), + ("hunyuan-pag", HunyuanDiTPAGPipeline), + ("kandinsky", KandinskyCombinedPipeline), + ("kandinsky22", KandinskyV22CombinedPipeline), + ("kandinsky3", Kandinsky3Pipeline), + ("stable-diffusion-controlnet", StableDiffusionControlNetPipeline), + ("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetPipeline), + ("stable-diffusion-xl-controlnet-union", StableDiffusionXLControlNetUnionPipeline), + ("stable-diffusion-3-controlnet", StableDiffusion3ControlNetPipeline), + ("wuerstchen", WuerstchenCombinedPipeline), + ("cascade", StableCascadeCombinedPipeline), + ("lcm", LatentConsistencyModelPipeline), + ("pixart-alpha", PixArtAlphaPipeline), + ("pixart-sigma", PixArtSigmaPipeline), + ("sana", SanaPipeline), + ("sana-pag", SanaPAGPipeline), + ("stable-diffusion-pag", StableDiffusionPAGPipeline), + ("stable-diffusion-controlnet-pag", StableDiffusionControlNetPAGPipeline), + ("stable-diffusion-xl-pag", StableDiffusionXLPAGPipeline), + ("stable-diffusion-xl-controlnet-pag", StableDiffusionXLControlNetPAGPipeline), + ("pixart-sigma-pag", PixArtSigmaPAGPipeline), + ("auraflow", AuraFlowPipeline), + ("flux", FluxPipeline), + ("flux-control", FluxControlPipeline), + ("flux-controlnet", FluxControlNetPipeline), + ("flux-kontext", FluxKontextPipeline), + ("flux2-klein", Flux2KleinPipeline), + ("flux2", Flux2Pipeline), + ("lumina", LuminaPipeline), + ("lumina2", Lumina2Pipeline), + ("chroma", ChromaPipeline), + ("cogview3", CogView3PlusPipeline), + ("cogview4", CogView4Pipeline), + ("glm_image", GlmImagePipeline), + ("helios", HeliosPipeline), + ("helios-pyramid", HeliosPyramidPipeline), + ("cogview4-control", CogView4ControlPipeline), + ("qwenimage", QwenImagePipeline), + ("qwenimage-controlnet", QwenImageControlNetPipeline), + ("z-image", ZImagePipeline), + ("z-image-controlnet", ZImageControlNetPipeline), + ("z-image-controlnet-inpaint", ZImageControlNetInpaintPipeline), + ("z-image-omni", ZImageOmniPipeline), + ("ovis", OvisImagePipeline), + ] +) + +AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", StableDiffusionImg2ImgPipeline), + ("stable-diffusion-xl", StableDiffusionXLImg2ImgPipeline), + ("stable-diffusion-3", StableDiffusion3Img2ImgPipeline), + ("stable-diffusion-3-pag", StableDiffusion3PAGImg2ImgPipeline), + ("if", IFImg2ImgPipeline), + ("kandinsky", KandinskyImg2ImgCombinedPipeline), + ("kandinsky22", KandinskyV22Img2ImgCombinedPipeline), + ("kandinsky3", Kandinsky3Img2ImgPipeline), + ("stable-diffusion-controlnet", StableDiffusionControlNetImg2ImgPipeline), + ("stable-diffusion-pag", StableDiffusionPAGImg2ImgPipeline), + ("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetImg2ImgPipeline), + ("stable-diffusion-xl-controlnet-union", StableDiffusionXLControlNetUnionImg2ImgPipeline), + ("stable-diffusion-xl-pag", StableDiffusionXLPAGImg2ImgPipeline), + ("stable-diffusion-xl-controlnet-pag", StableDiffusionXLControlNetPAGImg2ImgPipeline), + ("lcm", LatentConsistencyModelImg2ImgPipeline), + ("flux", FluxImg2ImgPipeline), + ("flux-controlnet", FluxControlNetImg2ImgPipeline), + ("flux-control", FluxControlImg2ImgPipeline), + ("flux-kontext", FluxKontextPipeline), + ("flux2-klein", Flux2KleinPipeline), + ("flux2", Flux2Pipeline), + ("qwenimage", QwenImageImg2ImgPipeline), + ("qwenimage-edit", QwenImageEditPipeline), + ("qwenimage-edit-plus", QwenImageEditPlusPipeline), + ("qwenimage-layered", QwenImageLayeredPipeline), + ("z-image", ZImageImg2ImgPipeline), + ] +) + +AUTO_INPAINT_PIPELINES_MAPPING = OrderedDict( + [ + ("stable-diffusion", StableDiffusionInpaintPipeline), + ("stable-diffusion-xl", StableDiffusionXLInpaintPipeline), + ("stable-diffusion-3", StableDiffusion3InpaintPipeline), + ("if", IFInpaintingPipeline), + ("kandinsky", KandinskyInpaintCombinedPipeline), + ("kandinsky22", KandinskyV22InpaintCombinedPipeline), + ("stable-diffusion-controlnet", StableDiffusionControlNetInpaintPipeline), + ("stable-diffusion-controlnet-pag", StableDiffusionControlNetPAGInpaintPipeline), + ("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetInpaintPipeline), + ("stable-diffusion-xl-controlnet-union", StableDiffusionXLControlNetUnionInpaintPipeline), + ("stable-diffusion-3-controlnet", StableDiffusion3ControlNetInpaintingPipeline), + ("stable-diffusion-xl-pag", StableDiffusionXLPAGInpaintPipeline), + ("flux", FluxInpaintPipeline), + ("flux-controlnet", FluxControlNetInpaintPipeline), + ("flux-control", FluxControlInpaintPipeline), + ("stable-diffusion-pag", StableDiffusionPAGInpaintPipeline), + ("qwenimage", QwenImageInpaintPipeline), + ("qwenimage-edit", QwenImageEditInpaintPipeline), + ("z-image", ZImageInpaintPipeline), + ] +) + +AUTO_TEXT2VIDEO_PIPELINES_MAPPING = OrderedDict( + [ + ("wan", WanPipeline), + ] +) + +AUTO_IMAGE2VIDEO_PIPELINES_MAPPING = OrderedDict( + [ + ("wan-i2v", WanImageToVideoPipeline), + ] +) + +AUTO_VIDEO2VIDEO_PIPELINES_MAPPING = OrderedDict( + [ + ("wan", WanVideoToVideoPipeline), + ] +) + +_AUTO_TEXT2IMAGE_DECODER_PIPELINES_MAPPING = OrderedDict( + [ + ("kandinsky", KandinskyPipeline), + ("kandinsky22", KandinskyV22Pipeline), + ("wuerstchen", WuerstchenDecoderPipeline), + ("cascade", StableCascadeDecoderPipeline), + ] +) +_AUTO_IMAGE2IMAGE_DECODER_PIPELINES_MAPPING = OrderedDict( + [ + ("kandinsky", KandinskyImg2ImgPipeline), + ("kandinsky22", KandinskyV22Img2ImgPipeline), + ] +) +_AUTO_INPAINT_DECODER_PIPELINES_MAPPING = OrderedDict( + [ + ("kandinsky", KandinskyInpaintPipeline), + ("kandinsky22", KandinskyV22InpaintPipeline), + ] +) + +if is_sentencepiece_available(): + from .kolors import KolorsImg2ImgPipeline, KolorsPipeline + from .pag import KolorsPAGPipeline + + AUTO_TEXT2IMAGE_PIPELINES_MAPPING["kolors"] = KolorsPipeline + AUTO_TEXT2IMAGE_PIPELINES_MAPPING["kolors-pag"] = KolorsPAGPipeline + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["kolors"] = KolorsImg2ImgPipeline + +SUPPORTED_TASKS_MAPPINGS = [ + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, + AUTO_INPAINT_PIPELINES_MAPPING, + AUTO_TEXT2VIDEO_PIPELINES_MAPPING, + AUTO_IMAGE2VIDEO_PIPELINES_MAPPING, + AUTO_VIDEO2VIDEO_PIPELINES_MAPPING, + _AUTO_TEXT2IMAGE_DECODER_PIPELINES_MAPPING, + _AUTO_IMAGE2IMAGE_DECODER_PIPELINES_MAPPING, + _AUTO_INPAINT_DECODER_PIPELINES_MAPPING, +] + + +def _get_connected_pipeline(pipeline_cls): + # for now connected pipelines can only be loaded from decoder pipelines, such as kandinsky-community/kandinsky-2-2-decoder + if pipeline_cls in _AUTO_TEXT2IMAGE_DECODER_PIPELINES_MAPPING.values(): + return _get_task_class( + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, pipeline_cls.__name__, throw_error_if_not_exist=False + ) + if pipeline_cls in _AUTO_IMAGE2IMAGE_DECODER_PIPELINES_MAPPING.values(): + return _get_task_class( + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, pipeline_cls.__name__, throw_error_if_not_exist=False + ) + if pipeline_cls in _AUTO_INPAINT_DECODER_PIPELINES_MAPPING.values(): + return _get_task_class(AUTO_INPAINT_PIPELINES_MAPPING, pipeline_cls.__name__, throw_error_if_not_exist=False) + + +def _get_model(pipeline_class_name): + for task_mapping in SUPPORTED_TASKS_MAPPINGS: + for model_name, pipeline in task_mapping.items(): + if pipeline.__name__ == pipeline_class_name: + return model_name + + +def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True): + model_name = _get_model(pipeline_class_name) + + if model_name is not None: + task_class = mapping.get(model_name, None) + if task_class is not None: + return task_class + + if throw_error_if_not_exist: + raise ValueError(f"AutoPipeline can't find a pipeline linked to {pipeline_class_name} for {model_name}") + + +class AutoPipelineForText2Image(ConfigMixin): + r""" + + [`AutoPipelineForText2Image`] is a generic pipeline class that instantiates a text-to-image pipeline class. The + specific underlying pipeline class is automatically selected from either the + [`~AutoPipelineForText2Image.from_pretrained`] or [`~AutoPipelineForText2Image.from_pipe`] methods. + + This class cannot be instantiated using `__init__()` (throws an error). + + Class attributes: + + - **config_name** (`str`) -- The configuration filename that stores the class and module names of all the + diffusion pipeline's components. + + """ + + config_name = "model_index.json" + + def __init__(self, *args, **kwargs): + raise EnvironmentError( + f"{self.__class__.__name__} is designed to be instantiated " + f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or " + f"`{self.__class__.__name__}.from_pipe(pipeline)` methods." + ) + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_or_path, **kwargs): + r""" + Instantiates a text-to-image Pytorch diffusion pipeline from pretrained pipeline weight. + + The from_pretrained() method takes care of returning the correct pipeline class instance by: + 1. Detect the pipeline class of the pretrained_model_or_path based on the _class_name property of its + config object + 2. Find the text-to-image pipeline linked to the pipeline class using pattern matching on pipeline class + name. + + If a `controlnet` argument is passed, it will instantiate a [`StableDiffusionControlNetPipeline`] object. + + The pipeline is set in evaluation mode (`model.eval()`) by default. + + If you get the error message below, you need to finetune the weights for your downstream task: + + ``` + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + ``` + + Parameters: + pretrained_model_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *repo id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline + hosted on the Hub. + - A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights + saved using + [`~DiffusionPipeline.save_pretrained`]. + torch_dtype (`torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model with another dtype. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + custom_revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id similar to + `revision` when loading a custom pipeline from the Hub. It can be a 🤗 Diffusers version when loading a + custom pipeline from GitHub, otherwise it defaults to `"main"` when loading from the Hub. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + device_map (`str` or `dict[str, int | str | torch.device]`, *optional*): + A map that specifies where each submodule should go. It doesn’t need to be defined for each + parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the + same device. + + Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + max_memory (`Dict`, *optional*): + A dictionary device identifier for the maximum memory. Will default to the maximum memory available for + each GPU and the available CPU RAM if unset. + offload_folder (`str` or `os.PathLike`, *optional*): + The path to offload weights if device_map contains the value `"disk"`. + offload_state_dict (`bool`, *optional*): + If `True`, temporarily offloads the CPU state dict to the hard drive to avoid running out of CPU RAM if + the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to `True` + when there is some disk offload. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the safetensors weights are downloaded if they're available **and** if the + safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors + weights. If set to `False`, safetensors weights are not loaded. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load and saveable variables (the pipeline components of the specific pipeline + class). The overwritten components are passed directly to the pipelines `__init__` method. See example + below for more information. + variant (`str`, *optional*): + Load weights from a specified variant filename such as `"fp16"` or `"ema"`. This is ignored when + loading `from_flax`. + + > [!TIP] > To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in + with `hf > auth login`. + + Examples: + + ```py + >>> from diffusers import AutoPipelineForText2Image + + >>> pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> image = pipeline(prompt).images[0] + ``` + """ + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + token = kwargs.pop("token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + + load_config_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "token": token, + "local_files_only": local_files_only, + "revision": revision, + } + + config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) + orig_class_name = config["_class_name"] + if "ControlPipeline" in orig_class_name: + to_replace = "ControlPipeline" + else: + to_replace = "Pipeline" + + if "controlnet" in kwargs: + if isinstance(kwargs["controlnet"], ControlNetUnionModel): + orig_class_name = config["_class_name"].replace(to_replace, "ControlNetUnionPipeline") + else: + orig_class_name = config["_class_name"].replace(to_replace, "ControlNetPipeline") + if "enable_pag" in kwargs: + enable_pag = kwargs.pop("enable_pag") + if enable_pag: + orig_class_name = orig_class_name.replace(to_replace, "PAGPipeline") + + text_2_image_cls = _get_task_class(AUTO_TEXT2IMAGE_PIPELINES_MAPPING, orig_class_name) + + kwargs = {**load_config_kwargs, **kwargs} + return text_2_image_cls.from_pretrained(pretrained_model_or_path, **kwargs) + + @classmethod + def from_pipe(cls, pipeline, **kwargs): + r""" + Instantiates a text-to-image Pytorch diffusion pipeline from another instantiated diffusion pipeline class. + + The from_pipe() method takes care of returning the correct pipeline class instance by finding the text-to-image + pipeline linked to the pipeline class using pattern matching on pipeline class name. + + All the modules the pipeline contains will be used to initialize the new pipeline without reallocating + additional memory. + + The pipeline is set in evaluation mode (`model.eval()`) by default. + + Parameters: + pipeline (`DiffusionPipeline`): + an instantiated `DiffusionPipeline` object + + ```py + >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image + + >>> pipe_i2i = AutoPipelineForImage2Image.from_pretrained( + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", requires_safety_checker=False + ... ) + + >>> pipe_t2i = AutoPipelineForText2Image.from_pipe(pipe_i2i) + >>> image = pipe_t2i(prompt).images[0] + ``` + """ + + original_config = dict(pipeline.config) + original_cls_name = pipeline.__class__.__name__ + + # derive the pipeline class to instantiate + text_2_image_cls = _get_task_class(AUTO_TEXT2IMAGE_PIPELINES_MAPPING, original_cls_name) + + if "controlnet" in kwargs: + if kwargs["controlnet"] is not None: + to_replace = "PAGPipeline" if "PAG" in text_2_image_cls.__name__ else "Pipeline" + text_2_image_cls = _get_task_class( + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, + text_2_image_cls.__name__.replace("ControlNet", "").replace(to_replace, "ControlNet" + to_replace), + ) + else: + text_2_image_cls = _get_task_class( + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, + text_2_image_cls.__name__.replace("ControlNet", ""), + ) + + if "enable_pag" in kwargs: + enable_pag = kwargs.pop("enable_pag") + if enable_pag: + text_2_image_cls = _get_task_class( + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, + text_2_image_cls.__name__.replace("PAG", "").replace("Pipeline", "PAGPipeline"), + ) + else: + text_2_image_cls = _get_task_class( + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, + text_2_image_cls.__name__.replace("PAG", ""), + ) + + # define expected module and optional kwargs given the pipeline signature + expected_modules, optional_kwargs = text_2_image_cls._get_signature_keys(text_2_image_cls) + + pretrained_model_name_or_path = original_config.pop("_name_or_path", None) + + # allow users pass modules in `kwargs` to override the original pipeline's components + passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs} + original_class_obj = { + k: pipeline.components[k] + for k, v in pipeline.components.items() + if k in expected_modules and k not in passed_class_obj + } + + # allow users pass optional kwargs to override the original pipelines config attribute + passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs} + original_pipe_kwargs = { + k: original_config[k] + for k, v in original_config.items() + if k in optional_kwargs and k not in passed_pipe_kwargs + } + + # config that were not expected by original pipeline is stored as private attribute + # we will pass them as optional arguments if they can be accepted by the pipeline + additional_pipe_kwargs = [ + k[1:] + for k in original_config.keys() + if k.startswith("_") and k[1:] in optional_kwargs and k[1:] not in passed_pipe_kwargs + ] + for k in additional_pipe_kwargs: + original_pipe_kwargs[k] = original_config.pop(f"_{k}") + + text_2_image_kwargs = {**passed_class_obj, **original_class_obj, **passed_pipe_kwargs, **original_pipe_kwargs} + + # store unused config as private attribute + unused_original_config = { + f"{'' if k.startswith('_') else '_'}{k}": original_config[k] + for k, v in original_config.items() + if k not in text_2_image_kwargs + } + + missing_modules = ( + set(expected_modules) - set(text_2_image_cls._optional_components) - set(text_2_image_kwargs.keys()) + ) + + if len(missing_modules) > 0: + raise ValueError( + f"Pipeline {text_2_image_cls} expected {expected_modules}, but only {set(list(passed_class_obj.keys()) + list(original_class_obj.keys()))} were passed" + ) + + model = text_2_image_cls(**text_2_image_kwargs) + model.register_to_config(_name_or_path=pretrained_model_name_or_path) + model.register_to_config(**unused_original_config) + + return model + + +class AutoPipelineForImage2Image(ConfigMixin): + r""" + + [`AutoPipelineForImage2Image`] is a generic pipeline class that instantiates an image-to-image pipeline class. The + specific underlying pipeline class is automatically selected from either the + [`~AutoPipelineForImage2Image.from_pretrained`] or [`~AutoPipelineForImage2Image.from_pipe`] methods. + + This class cannot be instantiated using `__init__()` (throws an error). + + Class attributes: + + - **config_name** (`str`) -- The configuration filename that stores the class and module names of all the + diffusion pipeline's components. + + """ + + config_name = "model_index.json" + + def __init__(self, *args, **kwargs): + raise EnvironmentError( + f"{self.__class__.__name__} is designed to be instantiated " + f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or " + f"`{self.__class__.__name__}.from_pipe(pipeline)` methods." + ) + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_or_path, **kwargs): + r""" + Instantiates a image-to-image Pytorch diffusion pipeline from pretrained pipeline weight. + + The from_pretrained() method takes care of returning the correct pipeline class instance by: + 1. Detect the pipeline class of the pretrained_model_or_path based on the _class_name property of its + config object + 2. Find the image-to-image pipeline linked to the pipeline class using pattern matching on pipeline class + name. + + If a `controlnet` argument is passed, it will instantiate a [`StableDiffusionControlNetImg2ImgPipeline`] + object. + + The pipeline is set in evaluation mode (`model.eval()`) by default. + + If you get the error message below, you need to finetune the weights for your downstream task: + + ``` + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + ``` + + Parameters: + pretrained_model_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *repo id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline + hosted on the Hub. + - A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights + saved using + [`~DiffusionPipeline.save_pretrained`]. + torch_dtype (`str` or `torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model with another dtype. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + custom_revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id similar to + `revision` when loading a custom pipeline from the Hub. It can be a 🤗 Diffusers version when loading a + custom pipeline from GitHub, otherwise it defaults to `"main"` when loading from the Hub. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + device_map (`str` or `dict[str, int | str | torch.device]`, *optional*): + A map that specifies where each submodule should go. It doesn’t need to be defined for each + parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the + same device. + + Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + max_memory (`Dict`, *optional*): + A dictionary device identifier for the maximum memory. Will default to the maximum memory available for + each GPU and the available CPU RAM if unset. + offload_folder (`str` or `os.PathLike`, *optional*): + The path to offload weights if device_map contains the value `"disk"`. + offload_state_dict (`bool`, *optional*): + If `True`, temporarily offloads the CPU state dict to the hard drive to avoid running out of CPU RAM if + the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to `True` + when there is some disk offload. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the safetensors weights are downloaded if they're available **and** if the + safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors + weights. If set to `False`, safetensors weights are not loaded. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load and saveable variables (the pipeline components of the specific pipeline + class). The overwritten components are passed directly to the pipelines `__init__` method. See example + below for more information. + variant (`str`, *optional*): + Load weights from a specified variant filename such as `"fp16"` or `"ema"`. This is ignored when + loading `from_flax`. + + > [!TIP] > To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in + with `hf > auth login`. + + Examples: + + ```py + >>> from diffusers import AutoPipelineForImage2Image + + >>> pipeline = AutoPipelineForImage2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> image = pipeline(prompt, image).images[0] + ``` + """ + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + token = kwargs.pop("token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + + load_config_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "token": token, + "local_files_only": local_files_only, + "revision": revision, + } + + config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) + orig_class_name = config["_class_name"] + + # the `orig_class_name` can be: + # `- *Pipeline` (for regular text-to-image checkpoint) + # - `*ControlPipeline` (for Flux tools specific checkpoint) + # `- *Img2ImgPipeline` (for refiner checkpoint) + if "Img2Img" in orig_class_name: + to_replace = "Img2ImgPipeline" + elif "ControlPipeline" in orig_class_name: + to_replace = "ControlPipeline" + else: + to_replace = "Pipeline" + + if "controlnet" in kwargs: + if isinstance(kwargs["controlnet"], ControlNetUnionModel): + orig_class_name = orig_class_name.replace(to_replace, "ControlNetUnion" + to_replace) + else: + orig_class_name = orig_class_name.replace(to_replace, "ControlNet" + to_replace) + if "enable_pag" in kwargs: + enable_pag = kwargs.pop("enable_pag") + if enable_pag: + orig_class_name = orig_class_name.replace(to_replace, "PAG" + to_replace) + + if to_replace == "ControlPipeline": + orig_class_name = orig_class_name.replace(to_replace, "ControlImg2ImgPipeline") + + image_2_image_cls = _get_task_class(AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, orig_class_name) + + kwargs = {**load_config_kwargs, **kwargs} + return image_2_image_cls.from_pretrained(pretrained_model_or_path, **kwargs) + + @classmethod + def from_pipe(cls, pipeline, **kwargs): + r""" + Instantiates a image-to-image Pytorch diffusion pipeline from another instantiated diffusion pipeline class. + + The from_pipe() method takes care of returning the correct pipeline class instance by finding the + image-to-image pipeline linked to the pipeline class using pattern matching on pipeline class name. + + All the modules the pipeline contains will be used to initialize the new pipeline without reallocating + additional memory. + + The pipeline is set in evaluation mode (`model.eval()`) by default. + + Parameters: + pipeline (`DiffusionPipeline`): + an instantiated `DiffusionPipeline` object + + Examples: + + ```py + >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image + + >>> pipe_t2i = AutoPipelineForText2Image.from_pretrained( + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", requires_safety_checker=False + ... ) + + >>> pipe_i2i = AutoPipelineForImage2Image.from_pipe(pipe_t2i) + >>> image = pipe_i2i(prompt, image).images[0] + ``` + """ + + original_config = dict(pipeline.config) + original_cls_name = pipeline.__class__.__name__ + + # derive the pipeline class to instantiate + image_2_image_cls = _get_task_class(AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, original_cls_name) + + if "controlnet" in kwargs: + if kwargs["controlnet"] is not None: + to_replace = "Img2ImgPipeline" + if "PAG" in image_2_image_cls.__name__: + to_replace = "PAG" + to_replace + image_2_image_cls = _get_task_class( + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, + image_2_image_cls.__name__.replace("ControlNet", "").replace( + to_replace, "ControlNet" + to_replace + ), + ) + else: + image_2_image_cls = _get_task_class( + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, + image_2_image_cls.__name__.replace("ControlNet", ""), + ) + + if "enable_pag" in kwargs: + enable_pag = kwargs.pop("enable_pag") + if enable_pag: + image_2_image_cls = _get_task_class( + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, + image_2_image_cls.__name__.replace("PAG", "").replace("Img2ImgPipeline", "PAGImg2ImgPipeline"), + ) + else: + image_2_image_cls = _get_task_class( + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, + image_2_image_cls.__name__.replace("PAG", ""), + ) + + # define expected module and optional kwargs given the pipeline signature + expected_modules, optional_kwargs = image_2_image_cls._get_signature_keys(image_2_image_cls) + + pretrained_model_name_or_path = original_config.pop("_name_or_path", None) + + # allow users pass modules in `kwargs` to override the original pipeline's components + passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs} + original_class_obj = { + k: pipeline.components[k] + for k, v in pipeline.components.items() + if k in expected_modules and k not in passed_class_obj + } + + # allow users pass optional kwargs to override the original pipelines config attribute + passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs} + original_pipe_kwargs = { + k: original_config[k] + for k, v in original_config.items() + if k in optional_kwargs and k not in passed_pipe_kwargs + } + + # config attribute that were not expected by original pipeline is stored as its private attribute + # we will pass them as optional arguments if they can be accepted by the pipeline + additional_pipe_kwargs = [ + k[1:] + for k in original_config.keys() + if k.startswith("_") and k[1:] in optional_kwargs and k[1:] not in passed_pipe_kwargs + ] + for k in additional_pipe_kwargs: + original_pipe_kwargs[k] = original_config.pop(f"_{k}") + + image_2_image_kwargs = {**passed_class_obj, **original_class_obj, **passed_pipe_kwargs, **original_pipe_kwargs} + + # store unused config as private attribute + unused_original_config = { + f"{'' if k.startswith('_') else '_'}{k}": original_config[k] + for k, v in original_config.items() + if k not in image_2_image_kwargs + } + + missing_modules = ( + set(expected_modules) - set(image_2_image_cls._optional_components) - set(image_2_image_kwargs.keys()) + ) + + if len(missing_modules) > 0: + raise ValueError( + f"Pipeline {image_2_image_cls} expected {expected_modules}, but only {set(list(passed_class_obj.keys()) + list(original_class_obj.keys()))} were passed" + ) + + model = image_2_image_cls(**image_2_image_kwargs) + model.register_to_config(_name_or_path=pretrained_model_name_or_path) + model.register_to_config(**unused_original_config) + + return model + + +class AutoPipelineForInpainting(ConfigMixin): + r""" + + [`AutoPipelineForInpainting`] is a generic pipeline class that instantiates an inpainting pipeline class. The + specific underlying pipeline class is automatically selected from either the + [`~AutoPipelineForInpainting.from_pretrained`] or [`~AutoPipelineForInpainting.from_pipe`] methods. + + This class cannot be instantiated using `__init__()` (throws an error). + + Class attributes: + + - **config_name** (`str`) -- The configuration filename that stores the class and module names of all the + diffusion pipeline's components. + + """ + + config_name = "model_index.json" + + def __init__(self, *args, **kwargs): + raise EnvironmentError( + f"{self.__class__.__name__} is designed to be instantiated " + f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or " + f"`{self.__class__.__name__}.from_pipe(pipeline)` methods." + ) + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_or_path, **kwargs): + r""" + Instantiates a inpainting Pytorch diffusion pipeline from pretrained pipeline weight. + + The from_pretrained() method takes care of returning the correct pipeline class instance by: + 1. Detect the pipeline class of the pretrained_model_or_path based on the _class_name property of its + config object + 2. Find the inpainting pipeline linked to the pipeline class using pattern matching on pipeline class name. + + If a `controlnet` argument is passed, it will instantiate a [`StableDiffusionControlNetInpaintPipeline`] + object. + + The pipeline is set in evaluation mode (`model.eval()`) by default. + + If you get the error message below, you need to finetune the weights for your downstream task: + + ``` + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + ``` + + Parameters: + pretrained_model_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *repo id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline + hosted on the Hub. + - A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights + saved using + [`~DiffusionPipeline.save_pretrained`]. + torch_dtype (`str` or `torch.dtype`, *optional*): + Override the default `torch.dtype` and load the model with another dtype. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + custom_revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id similar to + `revision` when loading a custom pipeline from the Hub. It can be a 🤗 Diffusers version when loading a + custom pipeline from GitHub, otherwise it defaults to `"main"` when loading from the Hub. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + device_map (`str` or `dict[str, int | str | torch.device]`, *optional*): + A map that specifies where each submodule should go. It doesn’t need to be defined for each + parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the + same device. + + Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + max_memory (`Dict`, *optional*): + A dictionary device identifier for the maximum memory. Will default to the maximum memory available for + each GPU and the available CPU RAM if unset. + offload_folder (`str` or `os.PathLike`, *optional*): + The path to offload weights if device_map contains the value `"disk"`. + offload_state_dict (`bool`, *optional*): + If `True`, temporarily offloads the CPU state dict to the hard drive to avoid running out of CPU RAM if + the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to `True` + when there is some disk offload. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the safetensors weights are downloaded if they're available **and** if the + safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors + weights. If set to `False`, safetensors weights are not loaded. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load and saveable variables (the pipeline components of the specific pipeline + class). The overwritten components are passed directly to the pipelines `__init__` method. See example + below for more information. + variant (`str`, *optional*): + Load weights from a specified variant filename such as `"fp16"` or `"ema"`. This is ignored when + loading `from_flax`. + + > [!TIP] > To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in + with `hf > auth login`. + + Examples: + + ```py + >>> from diffusers import AutoPipelineForInpainting + + >>> pipeline = AutoPipelineForInpainting.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> image = pipeline(prompt, image=init_image, mask_image=mask_image).images[0] + ``` + """ + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + token = kwargs.pop("token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + + load_config_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "token": token, + "local_files_only": local_files_only, + "revision": revision, + } + + config = cls.load_config(pretrained_model_or_path, **load_config_kwargs) + orig_class_name = config["_class_name"] + + # The `orig_class_name`` can be: + # `- *InpaintPipeline` (for inpaint-specific checkpoint) + # - `*ControlPipeline` (for Flux tools specific checkpoint) + # - or *Pipeline (for regular text-to-image checkpoint) + if "Inpaint" in orig_class_name: + to_replace = "InpaintPipeline" + elif "ControlPipeline" in orig_class_name: + to_replace = "ControlPipeline" + else: + to_replace = "Pipeline" + + if "controlnet" in kwargs: + if isinstance(kwargs["controlnet"], ControlNetUnionModel): + orig_class_name = orig_class_name.replace(to_replace, "ControlNetUnion" + to_replace) + else: + orig_class_name = orig_class_name.replace(to_replace, "ControlNet" + to_replace) + if "enable_pag" in kwargs: + enable_pag = kwargs.pop("enable_pag") + if enable_pag: + orig_class_name = orig_class_name.replace(to_replace, "PAG" + to_replace) + if to_replace == "ControlPipeline": + orig_class_name = orig_class_name.replace(to_replace, "ControlInpaintPipeline") + inpainting_cls = _get_task_class(AUTO_INPAINT_PIPELINES_MAPPING, orig_class_name) + + kwargs = {**load_config_kwargs, **kwargs} + return inpainting_cls.from_pretrained(pretrained_model_or_path, **kwargs) + + @classmethod + def from_pipe(cls, pipeline, **kwargs): + r""" + Instantiates a inpainting Pytorch diffusion pipeline from another instantiated diffusion pipeline class. + + The from_pipe() method takes care of returning the correct pipeline class instance by finding the inpainting + pipeline linked to the pipeline class using pattern matching on pipeline class name. + + All the modules the pipeline class contain will be used to initialize the new pipeline without reallocating + additional memory. + + The pipeline is set in evaluation mode (`model.eval()`) by default. + + Parameters: + pipeline (`DiffusionPipeline`): + an instantiated `DiffusionPipeline` object + + Examples: + + ```py + >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForInpainting + + >>> pipe_t2i = AutoPipelineForText2Image.from_pretrained( + ... "DeepFloyd/IF-I-XL-v1.0", requires_safety_checker=False + ... ) + + >>> pipe_inpaint = AutoPipelineForInpainting.from_pipe(pipe_t2i) + >>> image = pipe_inpaint(prompt, image=init_image, mask_image=mask_image).images[0] + ``` + """ + original_config = dict(pipeline.config) + original_cls_name = pipeline.__class__.__name__ + + # derive the pipeline class to instantiate + inpainting_cls = _get_task_class(AUTO_INPAINT_PIPELINES_MAPPING, original_cls_name) + + if "controlnet" in kwargs: + if kwargs["controlnet"] is not None: + inpainting_cls = _get_task_class( + AUTO_INPAINT_PIPELINES_MAPPING, + inpainting_cls.__name__.replace("ControlNet", "").replace( + "InpaintPipeline", "ControlNetInpaintPipeline" + ), + ) + else: + inpainting_cls = _get_task_class( + AUTO_INPAINT_PIPELINES_MAPPING, + inpainting_cls.__name__.replace("ControlNetInpaintPipeline", "InpaintPipeline"), + ) + + if "enable_pag" in kwargs: + enable_pag = kwargs.pop("enable_pag") + if enable_pag: + inpainting_cls = _get_task_class( + AUTO_INPAINT_PIPELINES_MAPPING, + inpainting_cls.__name__.replace("PAG", "").replace("InpaintPipeline", "PAGInpaintPipeline"), + ) + else: + inpainting_cls = _get_task_class( + AUTO_INPAINT_PIPELINES_MAPPING, + inpainting_cls.__name__.replace("PAGInpaintPipeline", "InpaintPipeline"), + ) + + # define expected module and optional kwargs given the pipeline signature + expected_modules, optional_kwargs = inpainting_cls._get_signature_keys(inpainting_cls) + + pretrained_model_name_or_path = original_config.pop("_name_or_path", None) + + # allow users pass modules in `kwargs` to override the original pipeline's components + passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs} + original_class_obj = { + k: pipeline.components[k] + for k, v in pipeline.components.items() + if k in expected_modules and k not in passed_class_obj + } + + # allow users pass optional kwargs to override the original pipelines config attribute + passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs} + original_pipe_kwargs = { + k: original_config[k] + for k, v in original_config.items() + if k in optional_kwargs and k not in passed_pipe_kwargs + } + + # config that were not expected by original pipeline is stored as private attribute + # we will pass them as optional arguments if they can be accepted by the pipeline + additional_pipe_kwargs = [ + k[1:] + for k in original_config.keys() + if k.startswith("_") and k[1:] in optional_kwargs and k[1:] not in passed_pipe_kwargs + ] + for k in additional_pipe_kwargs: + original_pipe_kwargs[k] = original_config.pop(f"_{k}") + + inpainting_kwargs = {**passed_class_obj, **original_class_obj, **passed_pipe_kwargs, **original_pipe_kwargs} + + # store unused config as private attribute + unused_original_config = { + f"{'' if k.startswith('_') else '_'}{k}": original_config[k] + for k, v in original_config.items() + if k not in inpainting_kwargs + } + + missing_modules = ( + set(expected_modules) - set(inpainting_cls._optional_components) - set(inpainting_kwargs.keys()) + ) + + if len(missing_modules) > 0: + raise ValueError( + f"Pipeline {inpainting_cls} expected {expected_modules}, but only {set(list(passed_class_obj.keys()) + list(original_class_obj.keys()))} were passed" + ) + + model = inpainting_cls(**inpainting_kwargs) + model.register_to_config(_name_or_path=pretrained_model_name_or_path) + model.register_to_config(**unused_original_config) + + return model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/free_init_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/free_init_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..04a385edd819e70a429637e9f6a298fe18d38fa1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/free_init_utils.py @@ -0,0 +1,186 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import torch +import torch.fft as fft + +from ..utils.torch_utils import randn_tensor + + +class FreeInitMixin: + r"""Mixin class for FreeInit.""" + + def enable_free_init( + self, + num_iters: int = 3, + use_fast_sampling: bool = False, + method: str = "butterworth", + order: int = 4, + spatial_stop_frequency: float = 0.25, + temporal_stop_frequency: float = 0.25, + ): + """Enables the FreeInit mechanism as in https://huggingface.co/papers/2312.07537. + + This implementation has been adapted from the [official repository](https://github.com/TianxingWu/FreeInit). + + Args: + num_iters (`int`, *optional*, defaults to `3`): + Number of FreeInit noise re-initialization iterations. + use_fast_sampling (`bool`, *optional*, defaults to `False`): + Whether or not to speedup sampling procedure at the cost of probably lower quality results. Enables the + "Coarse-to-Fine Sampling" strategy, as mentioned in the paper, if set to `True`. + method (`str`, *optional*, defaults to `butterworth`): + Must be one of `butterworth`, `ideal` or `gaussian` to use as the filtering method for the FreeInit low + pass filter. + order (`int`, *optional*, defaults to `4`): + Order of the filter used in `butterworth` method. Larger values lead to `ideal` method behaviour + whereas lower values lead to `gaussian` method behaviour. + spatial_stop_frequency (`float`, *optional*, defaults to `0.25`): + Normalized stop frequency for spatial dimensions. Must be between 0 to 1. Referred to as `d_s` in the + original implementation. + temporal_stop_frequency (`float`, *optional*, defaults to `0.25`): + Normalized stop frequency for temporal dimensions. Must be between 0 to 1. Referred to as `d_t` in the + original implementation. + """ + self._free_init_num_iters = num_iters + self._free_init_use_fast_sampling = use_fast_sampling + self._free_init_method = method + self._free_init_order = order + self._free_init_spatial_stop_frequency = spatial_stop_frequency + self._free_init_temporal_stop_frequency = temporal_stop_frequency + + def disable_free_init(self): + """Disables the FreeInit mechanism if enabled.""" + self._free_init_num_iters = None + + @property + def free_init_enabled(self): + return hasattr(self, "_free_init_num_iters") and self._free_init_num_iters is not None + + def _get_free_init_freq_filter( + self, + shape: tuple[int, ...], + device: str | torch.dtype, + filter_type: str, + order: float, + spatial_stop_frequency: float, + temporal_stop_frequency: float, + ) -> torch.Tensor: + r"""Returns the FreeInit filter based on filter type and other input conditions.""" + + time, height, width = shape[-3], shape[-2], shape[-1] + mask = torch.zeros(shape) + + if spatial_stop_frequency == 0 or temporal_stop_frequency == 0: + return mask + + if filter_type == "butterworth": + + def retrieve_mask(x): + return 1 / (1 + (x / spatial_stop_frequency**2) ** order) + elif filter_type == "gaussian": + + def retrieve_mask(x): + return math.exp(-1 / (2 * spatial_stop_frequency**2) * x) + elif filter_type == "ideal": + + def retrieve_mask(x): + return 1 if x <= spatial_stop_frequency * 2 else 0 + else: + raise NotImplementedError("`filter_type` must be one of gaussian, butterworth or ideal") + + for t in range(time): + for h in range(height): + for w in range(width): + d_square = ( + ((spatial_stop_frequency / temporal_stop_frequency) * (2 * t / time - 1)) ** 2 + + (2 * h / height - 1) ** 2 + + (2 * w / width - 1) ** 2 + ) + mask[..., t, h, w] = retrieve_mask(d_square) + + return mask.to(device) + + def _apply_freq_filter(self, x: torch.Tensor, noise: torch.Tensor, low_pass_filter: torch.Tensor) -> torch.Tensor: + r"""Noise reinitialization.""" + # FFT + x_freq = fft.fftn(x, dim=(-3, -2, -1)) + x_freq = fft.fftshift(x_freq, dim=(-3, -2, -1)) + noise_freq = fft.fftn(noise, dim=(-3, -2, -1)) + noise_freq = fft.fftshift(noise_freq, dim=(-3, -2, -1)) + + # frequency mix + high_pass_filter = 1 - low_pass_filter + x_freq_low = x_freq * low_pass_filter + noise_freq_high = noise_freq * high_pass_filter + x_freq_mixed = x_freq_low + noise_freq_high # mix in freq domain + + # IFFT + x_freq_mixed = fft.ifftshift(x_freq_mixed, dim=(-3, -2, -1)) + x_mixed = fft.ifftn(x_freq_mixed, dim=(-3, -2, -1)).real + + return x_mixed + + def _apply_free_init( + self, + latents: torch.Tensor, + free_init_iteration: int, + num_inference_steps: int, + device: torch.device, + dtype: torch.dtype, + generator: torch.Generator, + ): + if free_init_iteration == 0: + self._free_init_initial_noise = latents.detach().clone() + else: + latent_shape = latents.shape + + free_init_filter_shape = (1, *latent_shape[1:]) + free_init_freq_filter = self._get_free_init_freq_filter( + shape=free_init_filter_shape, + device=device, + filter_type=self._free_init_method, + order=self._free_init_order, + spatial_stop_frequency=self._free_init_spatial_stop_frequency, + temporal_stop_frequency=self._free_init_temporal_stop_frequency, + ) + + current_diffuse_timestep = self.scheduler.config.num_train_timesteps - 1 + diffuse_timesteps = torch.full((latent_shape[0],), current_diffuse_timestep).long() + + z_t = self.scheduler.add_noise( + original_samples=latents, noise=self._free_init_initial_noise, timesteps=diffuse_timesteps.to(device) + ).to(dtype=torch.float32) + + z_rand = randn_tensor( + shape=latent_shape, + generator=generator, + device=device, + dtype=torch.float32, + ) + latents = self._apply_freq_filter(z_t, z_rand, low_pass_filter=free_init_freq_filter) + latents = latents.to(dtype) + + # Coarse-to-Fine Sampling for faster inference (can lead to lower quality) + if self._free_init_use_fast_sampling: + num_inference_steps = max( + 1, int(num_inference_steps / self._free_init_num_iters * (free_init_iteration + 1)) + ) + + if num_inference_steps > 0: + self.scheduler.set_timesteps(num_inference_steps, device=device) + + return latents, self.scheduler.timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/free_noise_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/free_noise_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5990e680ba0785522ec1bcaf236418d40477b6f7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/free_noise_utils.py @@ -0,0 +1,597 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable + +import torch +import torch.nn as nn + +from ..models.attention import BasicTransformerBlock, FreeNoiseTransformerBlock +from ..models.resnet import Downsample2D, ResnetBlock2D, Upsample2D +from ..models.transformers.transformer_2d import Transformer2DModel +from ..models.unets.unet_motion_model import ( + AnimateDiffTransformer3D, + CrossAttnDownBlockMotion, + DownBlockMotion, + UpBlockMotion, +) +from ..pipelines.pipeline_utils import DiffusionPipeline +from ..utils import logging +from ..utils.torch_utils import randn_tensor + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class SplitInferenceModule(nn.Module): + r""" + A wrapper module class that splits inputs along a specified dimension before performing a forward pass. + + This module is useful when you need to perform inference on large tensors in a memory-efficient way by breaking + them into smaller chunks, processing each chunk separately, and then reassembling the results. + + Args: + module (`nn.Module`): + The underlying PyTorch module that will be applied to each chunk of split inputs. + split_size (`int`, defaults to `1`): + The size of each chunk after splitting the input tensor. + split_dim (`int`, defaults to `0`): + The dimension along which the input tensors are split. + input_kwargs_to_split (`list[str]`, defaults to `["hidden_states"]`): + A list of keyword arguments (strings) that represent the input tensors to be split. + + Workflow: + 1. The keyword arguments specified in `input_kwargs_to_split` are split into smaller chunks using + `torch.split()` along the dimension `split_dim` and with a chunk size of `split_size`. + 2. The `module` is invoked once for each split with both the split inputs and any unchanged arguments + that were passed. + 3. The output tensors from each split are concatenated back together along `split_dim` before returning. + + Example: + ```python + >>> import torch + >>> import torch.nn as nn + + >>> model = nn.Linear(1000, 1000) + >>> split_module = SplitInferenceModule(model, split_size=2, split_dim=0, input_kwargs_to_split=["input"]) + + >>> input_tensor = torch.randn(42, 1000) + >>> # Will split the tensor into 21 slices of shape [2, 1000]. + >>> output = split_module(input=input_tensor) + ``` + + It is also possible to nest `SplitInferenceModule` across different split dimensions for more complex + multi-dimensional splitting. + """ + + def __init__( + self, + module: nn.Module, + split_size: int = 1, + split_dim: int = 0, + input_kwargs_to_split: list[str] = ["hidden_states"], + ) -> None: + super().__init__() + + self.module = module + self.split_size = split_size + self.split_dim = split_dim + self.input_kwargs_to_split = set(input_kwargs_to_split) + + def forward(self, *args, **kwargs) -> torch.Tensor | tuple[torch.Tensor]: + r"""Forward method for the `SplitInferenceModule`. + + This method processes the input by splitting specified keyword arguments along a given dimension, running the + underlying module on each split, and then concatenating the results. The splitting is controlled by the + `split_size` and `split_dim` parameters specified during initialization. + + Args: + *args (`Any`): + Positional arguments that are passed directly to the `module` without modification. + **kwargs (`dict[str, torch.Tensor]`): + Keyword arguments passed to the underlying `module`. Only keyword arguments whose names match the + entries in `input_kwargs_to_split` and are of type `torch.Tensor` will be split. The remaining keyword + arguments are passed unchanged. + + Returns: + `torch.Tensor | tuple[torch.Tensor]`: + The outputs obtained from `SplitInferenceModule` are the same as if the underlying module was inferred + without it. + - If the underlying module returns a single tensor, the result will be a single concatenated tensor + along the same `split_dim` after processing all splits. + - If the underlying module returns a tuple of tensors, each element of the tuple will be concatenated + along the `split_dim` across all splits, and the final result will be a tuple of concatenated tensors. + """ + split_inputs = {} + + # 1. Split inputs that were specified during initialization and also present in passed kwargs + for key in list(kwargs.keys()): + if key not in self.input_kwargs_to_split or not torch.is_tensor(kwargs[key]): + continue + split_inputs[key] = torch.split(kwargs[key], self.split_size, self.split_dim) + kwargs.pop(key) + + # 2. Invoke forward pass across each split + results = [] + for split_input in zip(*split_inputs.values()): + inputs = dict(zip(split_inputs.keys(), split_input)) + inputs.update(kwargs) + + intermediate_tensor_or_tensor_tuple = self.module(*args, **inputs) + results.append(intermediate_tensor_or_tensor_tuple) + + # 3. Concatenate split restuls to obtain final outputs + if isinstance(results[0], torch.Tensor): + return torch.cat(results, dim=self.split_dim) + elif isinstance(results[0], tuple): + return tuple([torch.cat(x, dim=self.split_dim) for x in zip(*results)]) + else: + raise ValueError( + "In order to use the SplitInferenceModule, it is necessary for the underlying `module` to either return a torch.Tensor or a tuple of torch.Tensor's." + ) + + +class AnimateDiffFreeNoiseMixin: + r"""Mixin class for [FreeNoise](https://huggingface.co/papers/2310.15169).""" + + def _enable_free_noise_in_block(self, block: CrossAttnDownBlockMotion | DownBlockMotion | UpBlockMotion): + r"""Helper function to enable FreeNoise in transformer blocks.""" + + for motion_module in block.motion_modules: + num_transformer_blocks = len(motion_module.transformer_blocks) + + for i in range(num_transformer_blocks): + if isinstance(motion_module.transformer_blocks[i], FreeNoiseTransformerBlock): + motion_module.transformer_blocks[i].set_free_noise_properties( + self._free_noise_context_length, + self._free_noise_context_stride, + self._free_noise_weighting_scheme, + ) + else: + assert isinstance(motion_module.transformer_blocks[i], BasicTransformerBlock) + basic_transfomer_block = motion_module.transformer_blocks[i] + + motion_module.transformer_blocks[i] = FreeNoiseTransformerBlock( + dim=basic_transfomer_block.dim, + num_attention_heads=basic_transfomer_block.num_attention_heads, + attention_head_dim=basic_transfomer_block.attention_head_dim, + dropout=basic_transfomer_block.dropout, + cross_attention_dim=basic_transfomer_block.cross_attention_dim, + activation_fn=basic_transfomer_block.activation_fn, + attention_bias=basic_transfomer_block.attention_bias, + only_cross_attention=basic_transfomer_block.only_cross_attention, + double_self_attention=basic_transfomer_block.double_self_attention, + positional_embeddings=basic_transfomer_block.positional_embeddings, + num_positional_embeddings=basic_transfomer_block.num_positional_embeddings, + context_length=self._free_noise_context_length, + context_stride=self._free_noise_context_stride, + weighting_scheme=self._free_noise_weighting_scheme, + ).to(device=self.device, dtype=self.dtype) + + motion_module.transformer_blocks[i].load_state_dict( + basic_transfomer_block.state_dict(), strict=True + ) + motion_module.transformer_blocks[i].set_chunk_feed_forward( + basic_transfomer_block._chunk_size, basic_transfomer_block._chunk_dim + ) + + def _disable_free_noise_in_block(self, block: CrossAttnDownBlockMotion | DownBlockMotion | UpBlockMotion): + r"""Helper function to disable FreeNoise in transformer blocks.""" + + for motion_module in block.motion_modules: + num_transformer_blocks = len(motion_module.transformer_blocks) + + for i in range(num_transformer_blocks): + if isinstance(motion_module.transformer_blocks[i], FreeNoiseTransformerBlock): + free_noise_transfomer_block = motion_module.transformer_blocks[i] + + motion_module.transformer_blocks[i] = BasicTransformerBlock( + dim=free_noise_transfomer_block.dim, + num_attention_heads=free_noise_transfomer_block.num_attention_heads, + attention_head_dim=free_noise_transfomer_block.attention_head_dim, + dropout=free_noise_transfomer_block.dropout, + cross_attention_dim=free_noise_transfomer_block.cross_attention_dim, + activation_fn=free_noise_transfomer_block.activation_fn, + attention_bias=free_noise_transfomer_block.attention_bias, + only_cross_attention=free_noise_transfomer_block.only_cross_attention, + double_self_attention=free_noise_transfomer_block.double_self_attention, + positional_embeddings=free_noise_transfomer_block.positional_embeddings, + num_positional_embeddings=free_noise_transfomer_block.num_positional_embeddings, + ).to(device=self.device, dtype=self.dtype) + + motion_module.transformer_blocks[i].load_state_dict( + free_noise_transfomer_block.state_dict(), strict=True + ) + motion_module.transformer_blocks[i].set_chunk_feed_forward( + free_noise_transfomer_block._chunk_size, free_noise_transfomer_block._chunk_dim + ) + + def _check_inputs_free_noise( + self, + prompt, + negative_prompt, + prompt_embeds, + negative_prompt_embeds, + num_frames, + ) -> None: + if not isinstance(prompt, (str, dict)): + raise ValueError(f"Expected `prompt` to have type `str` or `dict` but found {type(prompt)=}") + + if negative_prompt is not None: + if not isinstance(negative_prompt, (str, dict)): + raise ValueError( + f"Expected `negative_prompt` to have type `str` or `dict` but found {type(negative_prompt)=}" + ) + + if prompt_embeds is not None or negative_prompt_embeds is not None: + raise ValueError("`prompt_embeds` and `negative_prompt_embeds` is not supported in FreeNoise yet.") + + frame_indices = [isinstance(x, int) for x in prompt.keys()] + frame_prompts = [isinstance(x, str) for x in prompt.values()] + min_frame = min(list(prompt.keys())) + max_frame = max(list(prompt.keys())) + + if not all(frame_indices): + raise ValueError("Expected integer keys in `prompt` dict for FreeNoise.") + if not all(frame_prompts): + raise ValueError("Expected str values in `prompt` dict for FreeNoise.") + if min_frame != 0: + raise ValueError("The minimum frame index in `prompt` dict must be 0 as a starting prompt is necessary.") + if max_frame >= num_frames: + raise ValueError( + f"The maximum frame index in `prompt` dict must be lesser than {num_frames=} and follow 0-based indexing." + ) + + def _encode_prompt_free_noise( + self, + prompt: str | dict[int, str], + num_frames: int, + device: torch.device, + num_videos_per_prompt: int, + do_classifier_free_guidance: bool, + negative_prompt: str | dict[int, str] | None = None, + prompt_embeds: torch.Tensor | None = None, + negative_prompt_embeds: torch.Tensor | None = None, + lora_scale: float | None = None, + clip_skip: int | None = None, + ) -> torch.Tensor: + if negative_prompt is None: + negative_prompt = "" + + # Ensure that we have a dictionary of prompts + if isinstance(prompt, str): + prompt = {0: prompt} + if isinstance(negative_prompt, str): + negative_prompt = {0: negative_prompt} + + self._check_inputs_free_noise(prompt, negative_prompt, prompt_embeds, negative_prompt_embeds, num_frames) + + # Sort the prompts based on frame indices + prompt = dict(sorted(prompt.items())) + negative_prompt = dict(sorted(negative_prompt.items())) + + # Ensure that we have a prompt for the last frame index + prompt[num_frames - 1] = prompt[list(prompt.keys())[-1]] + negative_prompt[num_frames - 1] = negative_prompt[list(negative_prompt.keys())[-1]] + + frame_indices = list(prompt.keys()) + frame_prompts = list(prompt.values()) + frame_negative_indices = list(negative_prompt.keys()) + frame_negative_prompts = list(negative_prompt.values()) + + # Generate and interpolate positive prompts + prompt_embeds, _ = self.encode_prompt( + prompt=frame_prompts, + device=device, + num_images_per_prompt=num_videos_per_prompt, + do_classifier_free_guidance=False, + negative_prompt=None, + prompt_embeds=None, + negative_prompt_embeds=None, + lora_scale=lora_scale, + clip_skip=clip_skip, + ) + + shape = (num_frames, *prompt_embeds.shape[1:]) + prompt_interpolation_embeds = prompt_embeds.new_zeros(shape) + + for i in range(len(frame_indices) - 1): + start_frame = frame_indices[i] + end_frame = frame_indices[i + 1] + start_tensor = prompt_embeds[i].unsqueeze(0) + end_tensor = prompt_embeds[i + 1].unsqueeze(0) + + prompt_interpolation_embeds[start_frame : end_frame + 1] = self._free_noise_prompt_interpolation_callback( + start_frame, end_frame, start_tensor, end_tensor + ) + + # Generate and interpolate negative prompts + negative_prompt_embeds = None + negative_prompt_interpolation_embeds = None + + if do_classifier_free_guidance: + _, negative_prompt_embeds = self.encode_prompt( + prompt=[""] * len(frame_negative_prompts), + device=device, + num_images_per_prompt=num_videos_per_prompt, + do_classifier_free_guidance=True, + negative_prompt=frame_negative_prompts, + prompt_embeds=None, + negative_prompt_embeds=None, + lora_scale=lora_scale, + clip_skip=clip_skip, + ) + + negative_prompt_interpolation_embeds = negative_prompt_embeds.new_zeros(shape) + + for i in range(len(frame_negative_indices) - 1): + start_frame = frame_negative_indices[i] + end_frame = frame_negative_indices[i + 1] + start_tensor = negative_prompt_embeds[i].unsqueeze(0) + end_tensor = negative_prompt_embeds[i + 1].unsqueeze(0) + + negative_prompt_interpolation_embeds[start_frame : end_frame + 1] = ( + self._free_noise_prompt_interpolation_callback(start_frame, end_frame, start_tensor, end_tensor) + ) + + prompt_embeds = prompt_interpolation_embeds + negative_prompt_embeds = negative_prompt_interpolation_embeds + + if do_classifier_free_guidance: + prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds]) + + return prompt_embeds, negative_prompt_embeds + + def _prepare_latents_free_noise( + self, + batch_size: int, + num_channels_latents: int, + num_frames: int, + height: int, + width: int, + dtype: torch.dtype, + device: torch.device, + generator: torch.Generator | None = None, + latents: torch.Tensor | None = None, + ): + if isinstance(generator, list) and len(generator) != batch_size: + raise ValueError( + f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" + f" size of {batch_size}. Make sure the batch size matches the length of the generators." + ) + + context_num_frames = ( + self._free_noise_context_length if self._free_noise_context_length == "repeat_context" else num_frames + ) + + shape = ( + batch_size, + num_channels_latents, + context_num_frames, + height // self.vae_scale_factor, + width // self.vae_scale_factor, + ) + + if latents is None: + latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype) + if self._free_noise_noise_type == "random": + return latents + else: + if latents.size(2) == num_frames: + return latents + elif latents.size(2) != self._free_noise_context_length: + raise ValueError( + f"You have passed `latents` as a parameter to FreeNoise. The expected number of frames is either {num_frames} or {self._free_noise_context_length}, but found {latents.size(2)}" + ) + latents = latents.to(device) + + if self._free_noise_noise_type == "shuffle_context": + for i in range(self._free_noise_context_length, num_frames, self._free_noise_context_stride): + # ensure window is within bounds + window_start = max(0, i - self._free_noise_context_length) + window_end = min(num_frames, window_start + self._free_noise_context_stride) + window_length = window_end - window_start + + if window_length == 0: + break + + indices = torch.LongTensor(list(range(window_start, window_end))) + shuffled_indices = indices[torch.randperm(window_length, generator=generator)] + + current_start = i + current_end = min(num_frames, current_start + window_length) + if current_end == current_start + window_length: + # batch of frames perfectly fits the window + latents[:, :, current_start:current_end] = latents[:, :, shuffled_indices] + else: + # handle the case where the last batch of frames does not fit perfectly with the window + prefix_length = current_end - current_start + shuffled_indices = shuffled_indices[:prefix_length] + latents[:, :, current_start:current_end] = latents[:, :, shuffled_indices] + + elif self._free_noise_noise_type == "repeat_context": + num_repeats = (num_frames + self._free_noise_context_length - 1) // self._free_noise_context_length + latents = torch.cat([latents] * num_repeats, dim=2) + + latents = latents[:, :, :num_frames] + return latents + + def _lerp( + self, start_index: int, end_index: int, start_tensor: torch.Tensor, end_tensor: torch.Tensor + ) -> torch.Tensor: + num_indices = end_index - start_index + 1 + interpolated_tensors = [] + + for i in range(num_indices): + alpha = i / (num_indices - 1) + interpolated_tensor = (1 - alpha) * start_tensor + alpha * end_tensor + interpolated_tensors.append(interpolated_tensor) + + interpolated_tensors = torch.cat(interpolated_tensors) + return interpolated_tensors + + def enable_free_noise( + self, + context_length: int | None = 16, + context_stride: int = 4, + weighting_scheme: str = "pyramid", + noise_type: str = "shuffle_context", + prompt_interpolation_callback: Callable[ + [DiffusionPipeline, int, int, torch.Tensor, torch.Tensor], torch.Tensor + ] + | None = None, + ) -> None: + r""" + Enable long video generation using FreeNoise. + + Args: + context_length (`int`, defaults to `16`, *optional*): + The number of video frames to process at once. It's recommended to set this to the maximum frames the + Motion Adapter was trained with (usually 16/24/32). If `None`, the default value from the motion + adapter config is used. + context_stride (`int`, *optional*): + Long videos are generated by processing many frames. FreeNoise processes these frames in sliding + windows of size `context_length`. Context stride allows you to specify how many frames to skip between + each window. For example, a context length of 16 and context stride of 4 would process 24 frames as: + [0, 15], [4, 19], [8, 23] (0-based indexing) + weighting_scheme (`str`, defaults to `pyramid`): + Weighting scheme for averaging latents after accumulation in FreeNoise blocks. The following weighting + schemes are supported currently: + - "flat" + Performs weighting averaging with a flat weight pattern: [1, 1, 1, 1, 1]. + - "pyramid" + Performs weighted averaging with a pyramid like weight pattern: [1, 2, 3, 2, 1]. + - "delayed_reverse_sawtooth" + Performs weighted averaging with low weights for earlier frames and high-to-low weights for + later frames: [0.01, 0.01, 3, 2, 1]. + noise_type (`str`, defaults to "shuffle_context"): + Must be one of ["shuffle_context", "repeat_context", "random"]. + - "shuffle_context" + Shuffles a fixed batch of `context_length` latents to create a final latent of size + `num_frames`. This is usually the best setting for most generation scenarios. However, there + might be visible repetition noticeable in the kinds of motion/animation generated. + - "repeated_context" + Repeats a fixed batch of `context_length` latents to create a final latent of size + `num_frames`. + - "random" + The final latents are random without any repetition. + """ + + allowed_weighting_scheme = ["flat", "pyramid", "delayed_reverse_sawtooth"] + allowed_noise_type = ["shuffle_context", "repeat_context", "random"] + + if context_length > self.motion_adapter.config.motion_max_seq_length: + logger.warning( + f"You have set {context_length=} which is greater than {self.motion_adapter.config.motion_max_seq_length=}. This can lead to bad generation results." + ) + if weighting_scheme not in allowed_weighting_scheme: + raise ValueError( + f"The parameter `weighting_scheme` must be one of {allowed_weighting_scheme}, but got {weighting_scheme=}" + ) + if noise_type not in allowed_noise_type: + raise ValueError(f"The parameter `noise_type` must be one of {allowed_noise_type}, but got {noise_type=}") + + self._free_noise_context_length = context_length or self.motion_adapter.config.motion_max_seq_length + self._free_noise_context_stride = context_stride + self._free_noise_weighting_scheme = weighting_scheme + self._free_noise_noise_type = noise_type + self._free_noise_prompt_interpolation_callback = prompt_interpolation_callback or self._lerp + + if hasattr(self.unet.mid_block, "motion_modules"): + blocks = [*self.unet.down_blocks, self.unet.mid_block, *self.unet.up_blocks] + else: + blocks = [*self.unet.down_blocks, *self.unet.up_blocks] + + for block in blocks: + self._enable_free_noise_in_block(block) + + def disable_free_noise(self) -> None: + r"""Disable the FreeNoise sampling mechanism.""" + self._free_noise_context_length = None + + if hasattr(self.unet.mid_block, "motion_modules"): + blocks = [*self.unet.down_blocks, self.unet.mid_block, *self.unet.up_blocks] + else: + blocks = [*self.unet.down_blocks, *self.unet.up_blocks] + + blocks = [*self.unet.down_blocks, self.unet.mid_block, *self.unet.up_blocks] + for block in blocks: + self._disable_free_noise_in_block(block) + + def _enable_split_inference_motion_modules_( + self, motion_modules: list[AnimateDiffTransformer3D], spatial_split_size: int + ) -> None: + for motion_module in motion_modules: + motion_module.proj_in = SplitInferenceModule(motion_module.proj_in, spatial_split_size, 0, ["input"]) + + for i in range(len(motion_module.transformer_blocks)): + motion_module.transformer_blocks[i] = SplitInferenceModule( + motion_module.transformer_blocks[i], + spatial_split_size, + 0, + ["hidden_states", "encoder_hidden_states"], + ) + + motion_module.proj_out = SplitInferenceModule(motion_module.proj_out, spatial_split_size, 0, ["input"]) + + def _enable_split_inference_attentions_( + self, attentions: list[Transformer2DModel], temporal_split_size: int + ) -> None: + for i in range(len(attentions)): + attentions[i] = SplitInferenceModule( + attentions[i], temporal_split_size, 0, ["hidden_states", "encoder_hidden_states"] + ) + + def _enable_split_inference_resnets_(self, resnets: list[ResnetBlock2D], temporal_split_size: int) -> None: + for i in range(len(resnets)): + resnets[i] = SplitInferenceModule(resnets[i], temporal_split_size, 0, ["input_tensor", "temb"]) + + def _enable_split_inference_samplers_( + self, samplers: list[Downsample2D] | list[Upsample2D], temporal_split_size: int + ) -> None: + for i in range(len(samplers)): + samplers[i] = SplitInferenceModule(samplers[i], temporal_split_size, 0, ["hidden_states"]) + + def enable_free_noise_split_inference(self, spatial_split_size: int = 256, temporal_split_size: int = 16) -> None: + r""" + Enable FreeNoise memory optimizations by utilizing + [`~diffusers.pipelines.free_noise_utils.SplitInferenceModule`] across different intermediate modeling blocks. + + Args: + spatial_split_size (`int`, defaults to `256`): + The split size across spatial dimensions for internal blocks. This is used in facilitating split + inference across the effective batch dimension (`[B x H x W, F, C]`) of intermediate tensors in motion + modeling blocks. + temporal_split_size (`int`, defaults to `16`): + The split size across temporal dimensions for internal blocks. This is used in facilitating split + inference across the effective batch dimension (`[B x F, H x W, C]`) of intermediate tensors in spatial + attention, resnets, downsampling and upsampling blocks. + """ + # TODO(aryan): Discuss on what's the best way to provide more control to users + blocks = [*self.unet.down_blocks, self.unet.mid_block, *self.unet.up_blocks] + for block in blocks: + if getattr(block, "motion_modules", None) is not None: + self._enable_split_inference_motion_modules_(block.motion_modules, spatial_split_size) + if getattr(block, "attentions", None) is not None: + self._enable_split_inference_attentions_(block.attentions, temporal_split_size) + if getattr(block, "resnets", None) is not None: + self._enable_split_inference_resnets_(block.resnets, temporal_split_size) + if getattr(block, "downsamplers", None) is not None: + self._enable_split_inference_samplers_(block.downsamplers, temporal_split_size) + if getattr(block, "upsamplers", None) is not None: + self._enable_split_inference_samplers_(block.upsamplers, temporal_split_size) + + @property + def free_noise_enabled(self): + return hasattr(self, "_free_noise_context_length") and self._free_noise_context_length is not None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/onnx_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/onnx_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..657750f90addb57e59a2a1600bb85c6ef95695aa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/onnx_utils.py @@ -0,0 +1,230 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import shutil +from pathlib import Path + +import numpy as np +from huggingface_hub import hf_hub_download +from huggingface_hub.utils import validate_hf_hub_args + +from ..utils import ONNX_EXTERNAL_WEIGHTS_NAME, ONNX_WEIGHTS_NAME, is_onnx_available, logging + + +if is_onnx_available(): + import onnxruntime as ort + + +logger = logging.get_logger(__name__) + +ORT_TO_NP_TYPE = { + "tensor(bool)": np.bool_, + "tensor(int8)": np.int8, + "tensor(uint8)": np.uint8, + "tensor(int16)": np.int16, + "tensor(uint16)": np.uint16, + "tensor(int32)": np.int32, + "tensor(uint32)": np.uint32, + "tensor(int64)": np.int64, + "tensor(uint64)": np.uint64, + "tensor(float16)": np.float16, + "tensor(float)": np.float32, + "tensor(double)": np.float64, +} + + +class OnnxRuntimeModel: + def __init__(self, model=None, **kwargs): + logger.info("`diffusers.OnnxRuntimeModel` is experimental and might change in the future.") + self.model = model + self.model_save_dir = kwargs.get("model_save_dir", None) + self.latest_model_name = kwargs.get("latest_model_name", ONNX_WEIGHTS_NAME) + + def __call__(self, **kwargs): + inputs = {k: np.array(v) for k, v in kwargs.items()} + return self.model.run(None, inputs) + + @staticmethod + def load_model(path: str | Path, provider=None, sess_options=None, provider_options=None): + """ + Loads an ONNX Inference session with an ExecutionProvider. Default provider is `CPUExecutionProvider` + + Arguments: + path (`str` or `Path`): + Directory from which to load + provider(`str`, *optional*): + Onnxruntime execution provider to use for loading the model, defaults to `CPUExecutionProvider` + """ + if provider is None: + logger.info("No onnxruntime provider specified, using CPUExecutionProvider") + provider = "CPUExecutionProvider" + + if provider_options is None: + provider_options = [] + elif not isinstance(provider_options, list): + provider_options = [provider_options] + + return ort.InferenceSession( + path, providers=[provider], sess_options=sess_options, provider_options=provider_options + ) + + def _save_pretrained(self, save_directory: str | Path, file_name: str | None = None, **kwargs): + """ + Save a model and its configuration file to a directory, so that it can be re-loaded using the + [`~optimum.onnxruntime.modeling_ort.ORTModel.from_pretrained`] class method. It will always save the + latest_model_name. + + Arguments: + save_directory (`str` or `Path`): + Directory where to save the model file. + file_name(`str`, *optional*): + Overwrites the default model file name from `"model.onnx"` to `file_name`. This allows you to save the + model with a different name. + """ + model_file_name = file_name if file_name is not None else ONNX_WEIGHTS_NAME + + src_path = self.model_save_dir.joinpath(self.latest_model_name) + dst_path = Path(save_directory).joinpath(model_file_name) + try: + shutil.copyfile(src_path, dst_path) + except shutil.SameFileError: + pass + + # copy external weights (for models >2GB) + src_path = self.model_save_dir.joinpath(ONNX_EXTERNAL_WEIGHTS_NAME) + if src_path.exists(): + dst_path = Path(save_directory).joinpath(ONNX_EXTERNAL_WEIGHTS_NAME) + try: + shutil.copyfile(src_path, dst_path) + except shutil.SameFileError: + pass + + def save_pretrained( + self, + save_directory: str | os.PathLike, + **kwargs, + ): + """ + Save a model to a directory, so that it can be re-loaded using the [`~OnnxModel.from_pretrained`] class + method.: + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to which to save. Will be created if it doesn't exist. + """ + if os.path.isfile(save_directory): + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") + return + + os.makedirs(save_directory, exist_ok=True) + + # saving model weights/files + self._save_pretrained(save_directory, **kwargs) + + @classmethod + @validate_hf_hub_args + def _from_pretrained( + cls, + model_id: str | Path, + token: bool | str | None | None = None, + revision: str | None | None = None, + force_download: bool = False, + cache_dir: str | None = None, + file_name: str | None = None, + provider: str | None = None, + sess_options: "ort.SessionOptions" | None = None, + **kwargs, + ): + """ + Load a model from a directory or the HF Hub. + + Arguments: + model_id (`str` or `Path`): + Directory from which to load + token (`str` or `bool`): + Is needed to load models from a private or gated repository + revision (`str`): + Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id + cache_dir (`str | Path`, *optional*): + Path to a directory in which a downloaded pretrained model configuration should be cached if the + standard cache should not be used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + file_name(`str`): + Overwrites the default model file name from `"model.onnx"` to `file_name`. This allows you to load + different model files from the same repository or directory. + provider(`str`): + The ONNX runtime provider, e.g. `CPUExecutionProvider` or `CUDAExecutionProvider`. + kwargs (`Dict`, *optional*): + kwargs will be passed to the model during initialization + """ + model_file_name = file_name if file_name is not None else ONNX_WEIGHTS_NAME + # load model from local directory + if os.path.isdir(model_id): + model = OnnxRuntimeModel.load_model( + Path(model_id, model_file_name).as_posix(), + provider=provider, + sess_options=sess_options, + provider_options=kwargs.pop("provider_options"), + ) + kwargs["model_save_dir"] = Path(model_id) + # load model from hub + else: + # download model + model_cache_path = hf_hub_download( + repo_id=model_id, + filename=model_file_name, + token=token, + revision=revision, + cache_dir=cache_dir, + force_download=force_download, + ) + kwargs["model_save_dir"] = Path(model_cache_path).parent + kwargs["latest_model_name"] = Path(model_cache_path).name + model = OnnxRuntimeModel.load_model( + model_cache_path, + provider=provider, + sess_options=sess_options, + provider_options=kwargs.pop("provider_options"), + ) + return cls(model=model, **kwargs) + + @classmethod + @validate_hf_hub_args + def from_pretrained( + cls, + model_id: str | Path, + force_download: bool = True, + token: str | None = None, + cache_dir: str | None = None, + **model_kwargs, + ): + revision = None + if len(str(model_id).split("@")) == 2: + model_id, revision = model_id.split("@") + + return cls._from_pretrained( + model_id=model_id, + revision=revision, + cache_dir=cache_dir, + force_download=force_download, + token=token, + **model_kwargs, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_flax_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_flax_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..51dcf9a2fecf2adf7492e9ae5770edac5f1d24ed --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_flax_utils.py @@ -0,0 +1,611 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import inspect +import os +from typing import Any + +import flax +import numpy as np +import PIL.Image +from flax.core.frozen_dict import FrozenDict +from huggingface_hub import create_repo, snapshot_download +from huggingface_hub.utils import validate_hf_hub_args +from PIL import Image +from tqdm.auto import tqdm + +from ..configuration_utils import ConfigMixin +from ..models.modeling_flax_utils import FLAX_WEIGHTS_NAME, FlaxModelMixin +from ..schedulers.scheduling_utils_flax import SCHEDULER_CONFIG_NAME, FlaxSchedulerMixin +from ..utils import ( + CONFIG_NAME, + BaseOutput, + PushToHubMixin, + http_user_agent, + is_transformers_available, + logging, +) + + +if is_transformers_available(): + from transformers import FlaxPreTrainedModel + +INDEX_FILE = "diffusion_flax_model.bin" + + +logger = logging.get_logger(__name__) + + +LOADABLE_CLASSES = { + "diffusers": { + "FlaxModelMixin": ["save_pretrained", "from_pretrained"], + "FlaxSchedulerMixin": ["save_pretrained", "from_pretrained"], + "FlaxDiffusionPipeline": ["save_pretrained", "from_pretrained"], + }, + "transformers": { + "PreTrainedTokenizer": ["save_pretrained", "from_pretrained"], + "PreTrainedTokenizerFast": ["save_pretrained", "from_pretrained"], + "FlaxPreTrainedModel": ["save_pretrained", "from_pretrained"], + "FeatureExtractionMixin": ["save_pretrained", "from_pretrained"], + "ProcessorMixin": ["save_pretrained", "from_pretrained"], + "ImageProcessingMixin": ["save_pretrained", "from_pretrained"], + }, +} + +ALL_IMPORTABLE_CLASSES = {} +for library in LOADABLE_CLASSES: + ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES[library]) + + +def import_flax_or_no_model(module, class_name): + try: + # 1. First make sure that if a Flax object is present, import this one + class_obj = getattr(module, "Flax" + class_name) + except AttributeError: + # 2. If this doesn't work, it's not a model and we don't append "Flax" + class_obj = getattr(module, class_name) + except AttributeError: + raise ValueError(f"Neither Flax{class_name} nor {class_name} exist in {module}") + + return class_obj + + +@flax.struct.dataclass +class FlaxImagePipelineOutput(BaseOutput): + """ + Output class for image pipelines. + + Args: + images (`list[PIL.Image.Image]` or `np.ndarray`) + list of denoised PIL images of length `batch_size` or NumPy array of shape `(batch_size, height, width, + num_channels)`. + """ + + images: list[PIL.Image.Image] | np.ndarray + + +class FlaxDiffusionPipeline(ConfigMixin, PushToHubMixin): + r""" + Base class for Flax-based pipelines. + + [`FlaxDiffusionPipeline`] stores all components (models, schedulers, and processors) for diffusion pipelines and + provides methods for loading, downloading and saving models. It also includes methods to: + + - enable/disable the progress bar for the denoising iteration + + Class attributes: + + - **config_name** ([`str`]) -- The configuration filename that stores the class and module names of all the + diffusion pipeline's components. + """ + + config_name = "model_index.json" + + def register_modules(self, **kwargs): + # import it here to avoid circular import + from diffusers import pipelines + + for name, module in kwargs.items(): + if module is None: + register_dict = {name: (None, None)} + else: + # retrieve library + library = module.__module__.split(".")[0] + + # check if the module is a pipeline module + pipeline_dir = module.__module__.split(".")[-2] + path = module.__module__.split(".") + is_pipeline_module = pipeline_dir in path and hasattr(pipelines, pipeline_dir) + + # if library is not in LOADABLE_CLASSES, then it is a custom module. + # Or if it's a pipeline module, then the module is inside the pipeline + # folder so we set the library to module name. + if library not in LOADABLE_CLASSES or is_pipeline_module: + library = pipeline_dir + + # retrieve class_name + class_name = module.__class__.__name__ + + register_dict = {name: (library, class_name)} + + # save model index config + self.register_to_config(**register_dict) + + # set models + setattr(self, name, module) + + def save_pretrained( + self, + save_directory: str | os.PathLike, + params: dict | FrozenDict, + push_to_hub: bool = False, + **kwargs, + ): + # TODO: handle inference_state + """ + Save all saveable variables of the pipeline to a directory. A pipeline variable can be saved and loaded if its + class implements both a save and loading method. The pipeline is easily reloaded using the + [`~FlaxDiffusionPipeline.from_pretrained`] class method. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to which to save. Will be created if it doesn't exist. + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the + repository you want to push to with `repo_id` (will default to the name of `save_directory` in your + namespace). + kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + self.save_config(save_directory) + + model_index_dict = dict(self.config) + model_index_dict.pop("_class_name") + model_index_dict.pop("_diffusers_version") + model_index_dict.pop("_module", None) + + if push_to_hub: + commit_message = kwargs.pop("commit_message", None) + private = kwargs.pop("private", None) + create_pr = kwargs.pop("create_pr", False) + token = kwargs.pop("token", None) + repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id + + for pipeline_component_name in model_index_dict.keys(): + sub_model = getattr(self, pipeline_component_name) + if sub_model is None: + # edge case for saving a pipeline with safety_checker=None + continue + + model_cls = sub_model.__class__ + + save_method_name = None + # search for the model's base class in LOADABLE_CLASSES + for library_name, library_classes in LOADABLE_CLASSES.items(): + library = importlib.import_module(library_name) + for base_class, save_load_methods in library_classes.items(): + class_candidate = getattr(library, base_class, None) + if class_candidate is not None and issubclass(model_cls, class_candidate): + # if we found a suitable base class in LOADABLE_CLASSES then grab its save method + save_method_name = save_load_methods[0] + break + if save_method_name is not None: + break + + save_method = getattr(sub_model, save_method_name) + expects_params = "params" in set(inspect.signature(save_method).parameters.keys()) + + if expects_params: + save_method( + os.path.join(save_directory, pipeline_component_name), params=params[pipeline_component_name] + ) + else: + save_method(os.path.join(save_directory, pipeline_component_name)) + + if push_to_hub: + self._upload_folder( + save_directory, + repo_id, + token=token, + commit_message=commit_message, + create_pr=create_pr, + ) + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_name_or_path: str | os.PathLike | None, **kwargs): + r""" + Instantiate a Flax-based diffusion pipeline from pretrained pipeline weights. + + The pipeline is set in evaluation mode (`model.eval()) by default and dropout modules are deactivated. + + If you get the error message below, you need to finetune the weights for your downstream task: + + ``` + Some weights of FlaxUNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + ``` + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *repo id* (for example `stable-diffusion-v1-5/stable-diffusion-v1-5`) of a + pretrained pipeline hosted on the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved + using [`~FlaxDiffusionPipeline.save_pretrained`]. + dtype (`jnp.dtype`, *optional*): + Override the default `jnp.dtype` and load the model under this dtype. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you're downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load and saveable variables (the pipeline components) of the specific pipeline + class. The overwritten components are passed directly to the pipelines `__init__` method. + + > [!TIP] > To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in + with `hf > auth login`. + + Examples: + + ```py + >>> from diffusers import FlaxDiffusionPipeline + + >>> # Download pipeline from huggingface.co and cache. + >>> # Requires to be logged in to Hugging Face hub, + >>> # see more in [the documentation](https://huggingface.co/docs/hub/security-tokens) + >>> pipeline, params = FlaxDiffusionPipeline.from_pretrained( + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", + ... variant="bf16", + ... dtype=jnp.bfloat16, + ... ) + + >>> # Download pipeline, but use a different scheduler + >>> from diffusers import FlaxDPMSolverMultistepScheduler + + >>> model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" + >>> dpmpp, dpmpp_state = FlaxDPMSolverMultistepScheduler.from_pretrained( + ... model_id, + ... subfolder="scheduler", + ... ) + + >>> dpm_pipe, dpm_params = FlaxStableDiffusionPipeline.from_pretrained( + ... model_id, variant="bf16", dtype=jnp.bfloat16, scheduler=dpmpp + ... ) + >>> dpm_params["scheduler"] = dpmpp_state + ``` + """ + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + cache_dir = kwargs.pop("cache_dir", None) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", False) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + from_pt = kwargs.pop("from_pt", False) + use_memory_efficient_attention = kwargs.pop("use_memory_efficient_attention", False) + split_head_dim = kwargs.pop("split_head_dim", False) + dtype = kwargs.pop("dtype", None) + + # 1. Download the checkpoints and configs + # use snapshot download here to get it working from from_pretrained + if not os.path.isdir(pretrained_model_name_or_path): + config_dict = cls.load_config( + pretrained_model_name_or_path, + cache_dir=cache_dir, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + ) + # make sure we only download sub-folders and `diffusers` filenames + folder_names = [k for k in config_dict.keys() if not k.startswith("_")] + allow_patterns = [os.path.join(k, "*") for k in folder_names] + allow_patterns += [FLAX_WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, CONFIG_NAME, cls.config_name] + + ignore_patterns = ["*.bin", "*.safetensors"] if not from_pt else [] + ignore_patterns += ["*.onnx", "*.onnx_data", "*.xml", "*.pb"] + + if cls != FlaxDiffusionPipeline: + requested_pipeline_class = cls.__name__ + else: + requested_pipeline_class = config_dict.get("_class_name", cls.__name__) + requested_pipeline_class = ( + requested_pipeline_class + if requested_pipeline_class.startswith("Flax") + else "Flax" + requested_pipeline_class + ) + + user_agent = {"pipeline_class": requested_pipeline_class} + user_agent = http_user_agent(user_agent) + + # download all allow_patterns + cached_folder = snapshot_download( + pretrained_model_name_or_path, + cache_dir=cache_dir, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + user_agent=user_agent, + ) + else: + cached_folder = pretrained_model_name_or_path + + config_dict = cls.load_config(cached_folder) + + # 2. Load the pipeline class, if using custom module then load it from the hub + # if we load from explicit class, let's use it + if cls != FlaxDiffusionPipeline: + pipeline_class = cls + else: + diffusers_module = importlib.import_module(cls.__module__.split(".")[0]) + class_name = ( + config_dict["_class_name"] + if config_dict["_class_name"].startswith("Flax") + else "Flax" + config_dict["_class_name"] + ) + pipeline_class = getattr(diffusers_module, class_name) + + # some modules can be passed directly to the init + # in this case they are already instantiated in `kwargs` + # extract them here + expected_modules, optional_kwargs = cls._get_signature_keys(pipeline_class) + passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs} + passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs} + + init_dict, unused_kwargs, _ = pipeline_class.extract_init_dict(config_dict, **kwargs) + + # define init kwargs + init_kwargs = {k: init_dict.pop(k) for k in optional_kwargs if k in init_dict} + init_kwargs = {**init_kwargs, **passed_pipe_kwargs} + + # remove `null` components + def load_module(name, value): + if value[0] is None: + return False + if name in passed_class_obj and passed_class_obj[name] is None: + return False + return True + + init_dict = {k: v for k, v in init_dict.items() if load_module(k, v)} + + # Throw nice warnings / errors for fast accelerate loading + if len(unused_kwargs) > 0: + logger.warning( + f"Keyword arguments {unused_kwargs} are not expected by {pipeline_class.__name__} and will be ignored." + ) + + # inference_params + params = {} + + # import it here to avoid circular import + from diffusers import pipelines + + # 3. Load each module in the pipeline + for name, (library_name, class_name) in init_dict.items(): + if class_name is None: + # edge case for when the pipeline was saved with safety_checker=None + init_kwargs[name] = None + continue + + is_pipeline_module = hasattr(pipelines, library_name) + loaded_sub_model = None + sub_model_should_be_defined = True + + # if the model is in a pipeline module, then we load it from the pipeline + if name in passed_class_obj: + # 1. check that passed_class_obj has correct parent class + if not is_pipeline_module: + library = importlib.import_module(library_name) + class_obj = getattr(library, class_name) + importable_classes = LOADABLE_CLASSES[library_name] + class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()} + + expected_class_obj = None + for class_name, class_candidate in class_candidates.items(): + if class_candidate is not None and issubclass(class_obj, class_candidate): + expected_class_obj = class_candidate + + if not issubclass(passed_class_obj[name].__class__, expected_class_obj): + raise ValueError( + f"{passed_class_obj[name]} is of type: {type(passed_class_obj[name])}, but should be" + f" {expected_class_obj}" + ) + elif passed_class_obj[name] is None: + logger.warning( + f"You have passed `None` for {name} to disable its functionality in {pipeline_class}. Note" + f" that this might lead to problems when using {pipeline_class} and is not recommended." + ) + sub_model_should_be_defined = False + else: + logger.warning( + f"You have passed a non-standard module {passed_class_obj[name]}. We cannot verify whether it" + " has the correct type" + ) + + # set passed class object + loaded_sub_model = passed_class_obj[name] + elif is_pipeline_module: + pipeline_module = getattr(pipelines, library_name) + class_obj = import_flax_or_no_model(pipeline_module, class_name) + + importable_classes = ALL_IMPORTABLE_CLASSES + class_candidates = dict.fromkeys(importable_classes.keys(), class_obj) + else: + # else we just import it from the library. + library = importlib.import_module(library_name) + class_obj = import_flax_or_no_model(library, class_name) + + importable_classes = LOADABLE_CLASSES[library_name] + class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()} + + if loaded_sub_model is None and sub_model_should_be_defined: + load_method_name = None + for class_name, class_candidate in class_candidates.items(): + if class_candidate is not None and issubclass(class_obj, class_candidate): + load_method_name = importable_classes[class_name][1] + + load_method = getattr(class_obj, load_method_name) + + # check if the module is in a subdirectory + if os.path.isdir(os.path.join(cached_folder, name)): + loadable_folder = os.path.join(cached_folder, name) + else: + loaded_sub_model = cached_folder + + if issubclass(class_obj, FlaxModelMixin): + loaded_sub_model, loaded_params = load_method( + loadable_folder, + from_pt=from_pt, + use_memory_efficient_attention=use_memory_efficient_attention, + split_head_dim=split_head_dim, + dtype=dtype, + ) + params[name] = loaded_params + elif is_transformers_available() and issubclass(class_obj, FlaxPreTrainedModel): + if from_pt: + # TODO(Suraj): Fix this in Transformers. We should be able to use `_do_init=False` here + loaded_sub_model = load_method(loadable_folder, from_pt=from_pt) + loaded_params = loaded_sub_model.params + del loaded_sub_model._params + else: + loaded_sub_model, loaded_params = load_method(loadable_folder, _do_init=False) + params[name] = loaded_params + elif issubclass(class_obj, FlaxSchedulerMixin): + loaded_sub_model, scheduler_state = load_method(loadable_folder) + params[name] = scheduler_state + else: + loaded_sub_model = load_method(loadable_folder) + + init_kwargs[name] = loaded_sub_model # UNet(...), # DiffusionSchedule(...) + + # 4. Potentially add passed objects if expected + missing_modules = set(expected_modules) - set(init_kwargs.keys()) + passed_modules = list(passed_class_obj.keys()) + + if len(missing_modules) > 0 and missing_modules <= set(passed_modules): + for module in missing_modules: + init_kwargs[module] = passed_class_obj.get(module, None) + elif len(missing_modules) > 0: + passed_modules = set(list(init_kwargs.keys()) + list(passed_class_obj.keys())) - optional_kwargs + raise ValueError( + f"Pipeline {pipeline_class} expected {expected_modules}, but only {passed_modules} were passed." + ) + + model = pipeline_class(**init_kwargs, dtype=dtype) + return model, params + + @classmethod + def _get_signature_keys(cls, obj): + parameters = inspect.signature(obj.__init__).parameters + required_parameters = {k: v for k, v in parameters.items() if v.default == inspect._empty} + optional_parameters = set({k for k, v in parameters.items() if v.default != inspect._empty}) + expected_modules = set(required_parameters.keys()) - {"self"} + + return expected_modules, optional_parameters + + @property + def components(self) -> dict[str, Any]: + r""" + + The `self.components` property can be useful to run different pipelines with the same weights and + configurations to not have to re-allocate memory. + + Examples: + + ```py + >>> from diffusers import ( + ... FlaxStableDiffusionPipeline, + ... FlaxStableDiffusionImg2ImgPipeline, + ... ) + + >>> text2img = FlaxStableDiffusionPipeline.from_pretrained( + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", variant="bf16", dtype=jnp.bfloat16 + ... ) + >>> img2img = FlaxStableDiffusionImg2ImgPipeline(**text2img.components) + ``` + + Returns: + A dictionary containing all the modules needed to initialize the pipeline. + """ + expected_modules, optional_parameters = self._get_signature_keys(self) + components = { + k: getattr(self, k) for k in self.config.keys() if not k.startswith("_") and k not in optional_parameters + } + + if set(components.keys()) != expected_modules: + raise ValueError( + f"{self} has been incorrectly initialized or {self.__class__} is incorrectly implemented. Expected" + f" {expected_modules} to be defined, but {components} are defined." + ) + + return components + + @staticmethod + def numpy_to_pil(images): + """ + Convert a NumPy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + if images.shape[-1] == 1: + # special case for grayscale (single channel) images + pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images] + else: + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + # TODO: make it compatible with jax.lax + def progress_bar(self, iterable): + if not hasattr(self, "_progress_bar_config"): + self._progress_bar_config = {} + elif not isinstance(self._progress_bar_config, dict): + raise ValueError( + f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}." + ) + + return tqdm(iterable, **self._progress_bar_config) + + def set_progress_bar_config(self, **kwargs): + self._progress_bar_config = kwargs diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_loading_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_loading_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b2564d25505e1136edaaf1e63f5870b68d27199d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_loading_utils.py @@ -0,0 +1,1218 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import os +import re +import warnings +from pathlib import Path +from typing import Any, Callable + +import httpx +import requests +import torch +from huggingface_hub import DDUFEntry, ModelCard, model_info, snapshot_download +from huggingface_hub.utils import HfHubHTTPError, OfflineModeIsEnabled, validate_hf_hub_args +from packaging import version + +from .. import __version__ +from ..utils import ( + FLAX_WEIGHTS_NAME, + ONNX_EXTERNAL_WEIGHTS_NAME, + ONNX_WEIGHTS_NAME, + SAFETENSORS_WEIGHTS_NAME, + WEIGHTS_NAME, + _maybe_remap_transformers_class, + deprecate, + get_class_from_dynamic_module, + is_accelerate_available, + is_peft_available, + is_transformers_available, + is_transformers_version, + logging, +) +from ..utils.torch_utils import is_compiled_module +from .transformers_loading_utils import _load_tokenizer_from_dduf, _load_transformers_model_from_dduf + + +if is_transformers_available(): + import transformers + from transformers import PreTrainedModel, PreTrainedTokenizerBase + from transformers.utils import SAFE_WEIGHTS_NAME as TRANSFORMERS_SAFE_WEIGHTS_NAME + from transformers.utils import WEIGHTS_NAME as TRANSFORMERS_WEIGHTS_NAME + + if is_transformers_version("<=", "4.56.2"): + from transformers.utils import FLAX_WEIGHTS_NAME as TRANSFORMERS_FLAX_WEIGHTS_NAME + +if is_accelerate_available(): + import accelerate + from accelerate import dispatch_model + from accelerate.hooks import remove_hook_from_module + from accelerate.utils import compute_module_sizes, get_max_memory + + +INDEX_FILE = "diffusion_pytorch_model.bin" +CUSTOM_PIPELINE_FILE_NAME = "pipeline.py" +DUMMY_MODULES_FOLDER = "diffusers.utils" +TRANSFORMERS_DUMMY_MODULES_FOLDER = "transformers.utils" +CONNECTED_PIPES_KEYS = ["prior"] + +logger = logging.get_logger(__name__) + +LOADABLE_CLASSES = { + "diffusers": { + "ModelMixin": ["save_pretrained", "from_pretrained"], + "SchedulerMixin": ["save_pretrained", "from_pretrained"], + "DiffusionPipeline": ["save_pretrained", "from_pretrained"], + "OnnxRuntimeModel": ["save_pretrained", "from_pretrained"], + "BaseGuidance": ["save_pretrained", "from_pretrained"], + }, + "transformers": { + "PreTrainedTokenizer": ["save_pretrained", "from_pretrained"], + "PreTrainedTokenizerFast": ["save_pretrained", "from_pretrained"], + "PreTrainedModel": ["save_pretrained", "from_pretrained"], + "FeatureExtractionMixin": ["save_pretrained", "from_pretrained"], + "ProcessorMixin": ["save_pretrained", "from_pretrained"], + "ImageProcessingMixin": ["save_pretrained", "from_pretrained"], + }, + "onnxruntime.training": { + "ORTModule": ["save_pretrained", "from_pretrained"], + }, +} + +ALL_IMPORTABLE_CLASSES = {} +for library in LOADABLE_CLASSES: + ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES[library]) + + +def is_safetensors_compatible(filenames, passed_components=None, folder_names=None, variant=None) -> bool: + """ + Checking for safetensors compatibility: + - The model is safetensors compatible only if there is a safetensors file for each model component present in + filenames. + + Converting default pytorch serialized filenames to safetensors serialized filenames: + - For models from the diffusers library, just replace the ".bin" extension with ".safetensors" + - For models from the transformers library, the filename changes from "pytorch_model" to "model", and the ".bin" + extension is replaced with ".safetensors" + """ + weight_names = [ + WEIGHTS_NAME, + SAFETENSORS_WEIGHTS_NAME, + FLAX_WEIGHTS_NAME, + ONNX_WEIGHTS_NAME, + ONNX_EXTERNAL_WEIGHTS_NAME, + ] + + if is_transformers_available(): + weight_names += [TRANSFORMERS_WEIGHTS_NAME, TRANSFORMERS_SAFE_WEIGHTS_NAME] + if is_transformers_version("<=", "4.56.2"): + weight_names += [TRANSFORMERS_FLAX_WEIGHTS_NAME] + + # model_pytorch, diffusion_model_pytorch, ... + weight_prefixes = [w.split(".")[0] for w in weight_names] + # .bin, .safetensors, ... + weight_suffixs = [w.split(".")[-1] for w in weight_names] + # -00001-of-00002 + transformers_index_format = r"\d{5}-of-\d{5}" + # `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetensors` + variant_file_re = re.compile( + rf"({'|'.join(weight_prefixes)})\.({variant}|{variant}-{transformers_index_format})\.({'|'.join(weight_suffixs)})$" + ) + non_variant_file_re = re.compile( + rf"({'|'.join(weight_prefixes)})(-{transformers_index_format})?\.({'|'.join(weight_suffixs)})$" + ) + + passed_components = passed_components or [] + if folder_names: + filenames = {f for f in filenames if os.path.split(f)[0] in folder_names} + + # extract all components of the pipeline and their associated files + components = {} + for filename in filenames: + if not len(filename.split("/")) == 2: + continue + + component, component_filename = filename.split("/") + if component in passed_components: + continue + + components.setdefault(component, []) + components[component].append(component_filename) + + # If there are no component folders check the main directory for safetensors files + filtered_filenames = set() + if not components: + if variant is not None: + filtered_filenames = filter_with_regex(filenames, variant_file_re) + + # If no variant filenames exist check if non-variant files are available + if not filtered_filenames: + filtered_filenames = filter_with_regex(filenames, non_variant_file_re) + return any(".safetensors" in filename for filename in filtered_filenames) + + # iterate over all files of a component + # check if safetensor files exist for that component + for component, component_filenames in components.items(): + matches = [] + filtered_component_filenames = set() + # if variant is provided check if the variant of the safetensors exists + if variant is not None: + filtered_component_filenames = filter_with_regex(component_filenames, variant_file_re) + + # if variant safetensor files do not exist check for non-variants + if not filtered_component_filenames: + filtered_component_filenames = filter_with_regex(component_filenames, non_variant_file_re) + for component_filename in filtered_component_filenames: + filename, extension = os.path.splitext(component_filename) + + match_exists = extension == ".safetensors" + matches.append(match_exists) + + if not any(matches): + return False + + return True + + +def filter_model_files(filenames): + """Filter model repo files for just files/folders that contain model weights""" + weight_names = [ + WEIGHTS_NAME, + SAFETENSORS_WEIGHTS_NAME, + FLAX_WEIGHTS_NAME, + ONNX_WEIGHTS_NAME, + ONNX_EXTERNAL_WEIGHTS_NAME, + ] + + if is_transformers_available(): + weight_names += [TRANSFORMERS_WEIGHTS_NAME, TRANSFORMERS_SAFE_WEIGHTS_NAME] + if is_transformers_version("<=", "4.56.2"): + weight_names += [TRANSFORMERS_FLAX_WEIGHTS_NAME] + + allowed_extensions = [wn.split(".")[-1] for wn in weight_names] + + return [f for f in filenames if any(f.endswith(extension) for extension in allowed_extensions)] + + +def filter_with_regex(filenames, pattern_re): + return {f for f in filenames if pattern_re.match(f.split("/")[-1]) is not None} + + +def variant_compatible_siblings(filenames, variant=None, ignore_patterns=None) -> list[os.PathLike] | str: + weight_names = [ + WEIGHTS_NAME, + SAFETENSORS_WEIGHTS_NAME, + FLAX_WEIGHTS_NAME, + ONNX_WEIGHTS_NAME, + ONNX_EXTERNAL_WEIGHTS_NAME, + ] + + if is_transformers_available(): + weight_names += [TRANSFORMERS_WEIGHTS_NAME, TRANSFORMERS_SAFE_WEIGHTS_NAME] + if is_transformers_version("<=", "4.56.2"): + weight_names += [TRANSFORMERS_FLAX_WEIGHTS_NAME] + + # model_pytorch, diffusion_model_pytorch, ... + weight_prefixes = [w.split(".")[0] for w in weight_names] + # .bin, .safetensors, ... + weight_suffixs = [w.split(".")[-1] for w in weight_names] + # -00001-of-00002 + transformers_index_format = r"\d{5}-of-\d{5}" + + if variant is not None: + # `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.safetensors` + variant_file_re = re.compile( + rf"({'|'.join(weight_prefixes)})\.({variant}|{variant}-{transformers_index_format})\.({'|'.join(weight_suffixs)})$" + ) + # `text_encoder/pytorch_model.bin.index.fp16.json` + variant_index_re = re.compile( + rf"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.index\.{variant}\.json$" + ) + legacy_variant_file_re = re.compile(rf".*-{transformers_index_format}\.{variant}\.[a-z]+$") + legacy_variant_index_re = re.compile( + rf"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.{variant}\.index\.json$" + ) + + # `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetensors` + non_variant_file_re = re.compile( + rf"({'|'.join(weight_prefixes)})(-{transformers_index_format})?\.({'|'.join(weight_suffixs)})$" + ) + # `text_encoder/pytorch_model.bin.index.json` + non_variant_index_re = re.compile(rf"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.index\.json") + + def filter_for_compatible_extensions(filenames, ignore_patterns=None): + if not ignore_patterns: + return filenames + + # ignore patterns uses glob style patterns e.g *.safetensors but we're only + # interested in the extension name + return {f for f in filenames if not any(f.endswith(pat.lstrip("*.")) for pat in ignore_patterns)} + + # Group files by component + components = {} + for filename in filenames: + if not len(filename.split("/")) == 2: + components.setdefault("", []).append(filename) + continue + + component, _ = filename.split("/") + components.setdefault(component, []).append(filename) + + usable_filenames = set() + variant_filenames = set() + for component, component_filenames in components.items(): + component_filenames = filter_for_compatible_extensions(component_filenames, ignore_patterns=ignore_patterns) + + component_variants = set() + component_legacy_variants = set() + component_non_variants = set() + if variant is not None: + component_variants = filter_with_regex(component_filenames, variant_file_re) + component_variant_index_files = filter_with_regex(component_filenames, variant_index_re) + + component_legacy_variants = filter_with_regex(component_filenames, legacy_variant_file_re) + component_legacy_variant_index_files = filter_with_regex(component_filenames, legacy_variant_index_re) + + if component_variants or component_legacy_variants: + variant_filenames.update( + component_variants | component_variant_index_files + if component_variants + else component_legacy_variants | component_legacy_variant_index_files + ) + + else: + component_non_variants = filter_with_regex(component_filenames, non_variant_file_re) + component_variant_index_files = filter_with_regex(component_filenames, non_variant_index_re) + + usable_filenames.update(component_non_variants | component_variant_index_files) + + usable_filenames.update(variant_filenames) + + if len(variant_filenames) == 0 and variant is not None: + error_message = f"You are trying to load model files of the `variant={variant}`, but no such modeling files are available. " + raise ValueError(error_message) + + if len(variant_filenames) > 0 and usable_filenames != variant_filenames: + logger.warning( + f"\nA mixture of {variant} and non-{variant} filenames will be loaded.\nLoaded {variant} filenames:\n" + f"[{', '.join(variant_filenames)}]\nLoaded non-{variant} filenames:\n" + f"[{', '.join(usable_filenames - variant_filenames)}\nIf this behavior is not " + f"expected, please check your folder structure." + ) + + return usable_filenames, variant_filenames + + +@validate_hf_hub_args +def warn_deprecated_model_variant(pretrained_model_name_or_path, token, variant, revision, model_filenames): + info = model_info( + pretrained_model_name_or_path, + token=token, + revision=None, + ) + filenames = {sibling.rfilename for sibling in info.siblings} + comp_model_filenames, _ = variant_compatible_siblings(filenames, variant=revision) + comp_model_filenames = [".".join(f.split(".")[:1] + f.split(".")[2:]) for f in comp_model_filenames] + + if set(model_filenames).issubset(set(comp_model_filenames)): + warnings.warn( + f"You are loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'` even though you can load it via `variant=`{revision}`. Loading model variants via `revision='{revision}'` is deprecated and will be removed in diffusers v1. Please use `variant='{revision}'` instead.", + FutureWarning, + ) + else: + warnings.warn( + f"You are loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'`. This behavior is deprecated and will be removed in diffusers v1. One should use `variant='{revision}'` instead. However, it appears that {pretrained_model_name_or_path} currently does not have the required variant filenames in the 'main' branch. \n The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title '{pretrained_model_name_or_path} is missing {revision} files' so that the correct variant file can be added.", + FutureWarning, + ) + + +def _unwrap_model(model): + """Unwraps a model.""" + if is_compiled_module(model): + model = model._orig_mod + + if is_peft_available(): + from peft import PeftModel + + if isinstance(model, PeftModel): + model = model.base_model.model + + return model + + +def maybe_raise_or_warn( + library_name, library, class_name, importable_classes, passed_class_obj, name, is_pipeline_module +): + """Simple helper method to raise or warn in case incorrect module has been passed""" + if not is_pipeline_module: + library = importlib.import_module(library_name) + + # Handle deprecated Transformers classes + if library_name == "transformers": + class_name = _maybe_remap_transformers_class(class_name) or class_name + + class_obj = getattr(library, class_name) + class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()} + + expected_class_obj = None + for class_name, class_candidate in class_candidates.items(): + if class_candidate is not None and issubclass(class_obj, class_candidate): + expected_class_obj = class_candidate + + # Dynamo wraps the original model in a private class. + # I didn't find a public API to get the original class. + sub_model = passed_class_obj[name] + unwrapped_sub_model = _unwrap_model(sub_model) + model_cls = unwrapped_sub_model.__class__ + + if not issubclass(model_cls, expected_class_obj): + raise ValueError(f"{passed_class_obj[name]} is of type: {model_cls}, but should be {expected_class_obj}") + else: + logger.warning( + f"You have passed a non-standard module {passed_class_obj[name]}. We cannot verify whether it" + " has the correct type" + ) + + +# a simpler version of get_class_obj_and_candidates, it won't work with custom code +def simple_get_class_obj(library_name, class_name): + from diffusers import pipelines + + is_pipeline_module = hasattr(pipelines, library_name) + + if is_pipeline_module: + pipeline_module = getattr(pipelines, library_name) + class_obj = getattr(pipeline_module, class_name) + else: + library = importlib.import_module(library_name) + + # Handle deprecated Transformers classes + if library_name == "transformers": + class_name = _maybe_remap_transformers_class(class_name) or class_name + + class_obj = getattr(library, class_name) + + return class_obj + + +def get_class_obj_and_candidates( + library_name, class_name, importable_classes, pipelines, is_pipeline_module, component_name=None, cache_dir=None +): + """Simple helper method to retrieve class object of module as well as potential parent class objects""" + component_folder = os.path.join(cache_dir, component_name) if component_name and cache_dir else None + + if is_pipeline_module: + pipeline_module = getattr(pipelines, library_name) + + class_obj = getattr(pipeline_module, class_name) + class_candidates = dict.fromkeys(importable_classes.keys(), class_obj) + elif component_folder and os.path.isfile(os.path.join(component_folder, library_name + ".py")): + # load custom component + class_obj = get_class_from_dynamic_module( + component_folder, module_file=library_name + ".py", class_name=class_name + ) + class_candidates = dict.fromkeys(importable_classes.keys(), class_obj) + else: + # else we just import it from the library. + library = importlib.import_module(library_name) + + # Handle deprecated Transformers classes + if library_name == "transformers": + class_name = _maybe_remap_transformers_class(class_name) or class_name + + class_obj = getattr(library, class_name) + class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()} + + return class_obj, class_candidates + + +def _get_custom_pipeline_class( + custom_pipeline, + repo_id=None, + hub_revision=None, + class_name=None, + cache_dir=None, + revision=None, +): + if custom_pipeline.endswith(".py"): + path = Path(custom_pipeline) + # decompose into folder & file + file_name = path.name + custom_pipeline = path.parent.absolute() + elif repo_id is not None: + file_name = f"{custom_pipeline}.py" + custom_pipeline = repo_id + else: + file_name = CUSTOM_PIPELINE_FILE_NAME + + if repo_id is not None and hub_revision is not None: + # if we load the pipeline code from the Hub + # make sure to overwrite the `revision` + revision = hub_revision + + return get_class_from_dynamic_module( + custom_pipeline, + module_file=file_name, + class_name=class_name, + cache_dir=cache_dir, + revision=revision, + ) + + +def _get_pipeline_class( + class_obj, + config=None, + load_connected_pipeline=False, + custom_pipeline=None, + repo_id=None, + hub_revision=None, + class_name=None, + cache_dir=None, + revision=None, +): + if custom_pipeline is not None: + return _get_custom_pipeline_class( + custom_pipeline, + repo_id=repo_id, + hub_revision=hub_revision, + class_name=class_name, + cache_dir=cache_dir, + revision=revision, + ) + + if class_obj.__name__ != "DiffusionPipeline" and class_obj.__name__ != "ModularPipeline": + return class_obj + + diffusers_module = importlib.import_module(class_obj.__module__.split(".")[0]) + class_name = class_name or config["_class_name"] + if not class_name: + raise ValueError( + "The class name could not be found in the configuration file. Please make sure to pass the correct `class_name`." + ) + + class_name = class_name[4:] if class_name.startswith("Flax") else class_name + + pipeline_cls = getattr(diffusers_module, class_name) + + if load_connected_pipeline: + from .auto_pipeline import _get_connected_pipeline + + connected_pipeline_cls = _get_connected_pipeline(pipeline_cls) + if connected_pipeline_cls is not None: + logger.info( + f"Loading connected pipeline {connected_pipeline_cls.__name__} instead of {pipeline_cls.__name__} as specified via `load_connected_pipeline=True`" + ) + else: + logger.info(f"{pipeline_cls.__name__} has no connected pipeline class. Loading {pipeline_cls.__name__}.") + + pipeline_cls = connected_pipeline_cls or pipeline_cls + + return pipeline_cls + + +def _load_empty_model( + library_name: str, + class_name: str, + importable_classes: list[Any], + pipelines: Any, + is_pipeline_module: bool, + name: str, + torch_dtype: str | torch.dtype, + cached_folder: str | os.PathLike, + **kwargs, +): + # retrieve class objects. + class_obj, _ = get_class_obj_and_candidates( + library_name, + class_name, + importable_classes, + pipelines, + is_pipeline_module, + component_name=name, + cache_dir=cached_folder, + ) + + if is_transformers_available(): + transformers_version = version.parse(version.parse(transformers.__version__).base_version) + else: + transformers_version = "N/A" + + # Determine library. + is_transformers_model = ( + is_transformers_available() + and issubclass(class_obj, PreTrainedModel) + and transformers_version >= version.parse("4.20.0") + ) + diffusers_module = importlib.import_module(__name__.split(".")[0]) + is_diffusers_model = issubclass(class_obj, diffusers_module.ModelMixin) + + model = None + config_path = cached_folder + user_agent = { + "diffusers": __version__, + "file_type": "model", + "framework": "pytorch", + } + + if is_diffusers_model: + # Load config and then the model on meta. + config, unused_kwargs, commit_hash = class_obj.load_config( + os.path.join(config_path, name), + cache_dir=cached_folder, + return_unused_kwargs=True, + return_commit_hash=True, + force_download=kwargs.pop("force_download", False), + proxies=kwargs.pop("proxies", None), + local_files_only=kwargs.pop("local_files_only", False), + token=kwargs.pop("token", None), + revision=kwargs.pop("revision", None), + subfolder=kwargs.pop("subfolder", None), + user_agent=user_agent, + ) + with accelerate.init_empty_weights(): + model = class_obj.from_config(config, **unused_kwargs) + elif is_transformers_model: + config_class = getattr(class_obj, "config_class", None) + if config_class is None: + raise ValueError("`config_class` cannot be None. Please double-check the model.") + + config = config_class.from_pretrained( + cached_folder, + subfolder=name, + force_download=kwargs.pop("force_download", False), + proxies=kwargs.pop("proxies", None), + local_files_only=kwargs.pop("local_files_only", False), + token=kwargs.pop("token", None), + revision=kwargs.pop("revision", None), + user_agent=user_agent, + ) + with accelerate.init_empty_weights(): + model = class_obj(config) + + if model is not None: + model = model.to(dtype=torch_dtype) + return model + + +def _assign_components_to_devices( + module_sizes: dict[str, float], device_memory: dict[str, float], device_mapping_strategy: str = "balanced" +): + device_ids = list(device_memory.keys()) + device_cycle = device_ids + device_ids[::-1] + device_memory = device_memory.copy() + + device_id_component_mapping = {} + current_device_index = 0 + for component in module_sizes: + device_id = device_cycle[current_device_index % len(device_cycle)] + component_memory = module_sizes[component] + curr_device_memory = device_memory[device_id] + + # If the GPU doesn't fit the current component offload to the CPU. + if component_memory > curr_device_memory: + device_id_component_mapping["cpu"] = [component] + else: + if device_id not in device_id_component_mapping: + device_id_component_mapping[device_id] = [component] + else: + device_id_component_mapping[device_id].append(component) + + # Update the device memory. + device_memory[device_id] -= component_memory + current_device_index += 1 + + return device_id_component_mapping + + +def _get_final_device_map(device_map, pipeline_class, passed_class_obj, init_dict, library, max_memory, **kwargs): + # TODO: separate out different device_map methods when it gets to it. + if device_map != "balanced": + return device_map + # To avoid circular import problem. + from diffusers import pipelines + + torch_dtype = kwargs.get("torch_dtype", torch.float32) + + # Load each module in the pipeline on a meta device so that we can derive the device map. + init_empty_modules = {} + for name, (library_name, class_name) in init_dict.items(): + if class_name.startswith("Flax"): + raise ValueError("Flax pipelines are not supported with `device_map`.") + + # Define all importable classes + is_pipeline_module = hasattr(pipelines, library_name) + importable_classes = ALL_IMPORTABLE_CLASSES + loaded_sub_model = None + + # Use passed sub model or load class_name from library_name + if name in passed_class_obj: + # if the model is in a pipeline module, then we load it from the pipeline + # check that passed_class_obj has correct parent class + maybe_raise_or_warn( + library_name, + library, + class_name, + importable_classes, + passed_class_obj, + name, + is_pipeline_module, + ) + with accelerate.init_empty_weights(): + loaded_sub_model = passed_class_obj[name] + + else: + sub_model_dtype = ( + torch_dtype.get(name, torch_dtype.get("default", torch.float32)) + if isinstance(torch_dtype, dict) + else torch_dtype + ) + loaded_sub_model = _load_empty_model( + library_name=library_name, + class_name=class_name, + importable_classes=importable_classes, + pipelines=pipelines, + is_pipeline_module=is_pipeline_module, + pipeline_class=pipeline_class, + name=name, + torch_dtype=sub_model_dtype, + cached_folder=kwargs.get("cached_folder", None), + force_download=kwargs.get("force_download", None), + proxies=kwargs.get("proxies", None), + local_files_only=kwargs.get("local_files_only", None), + token=kwargs.get("token", None), + revision=kwargs.get("revision", None), + ) + + if loaded_sub_model is not None: + init_empty_modules[name] = loaded_sub_model + + # determine device map + # Obtain a sorted dictionary for mapping the model-level components + # to their sizes. + module_sizes = { + module_name: compute_module_sizes( + module, + dtype=torch_dtype.get(module_name, torch_dtype.get("default", torch.float32)) + if isinstance(torch_dtype, dict) + else torch_dtype, + )[""] + for module_name, module in init_empty_modules.items() + if isinstance(module, torch.nn.Module) + } + module_sizes = dict(sorted(module_sizes.items(), key=lambda item: item[1], reverse=True)) + + # Obtain maximum memory available per device (GPUs only). + max_memory = get_max_memory(max_memory) + max_memory = dict(sorted(max_memory.items(), key=lambda item: item[1], reverse=True)) + max_memory = {k: v for k, v in max_memory.items() if k != "cpu"} + + # Obtain a dictionary mapping the model-level components to the available + # devices based on the maximum memory and the model sizes. + final_device_map = None + if len(max_memory) > 0: + device_id_component_mapping = _assign_components_to_devices( + module_sizes, max_memory, device_mapping_strategy=device_map + ) + + # Obtain the final device map, e.g., `{"unet": 0, "text_encoder": 1, "vae": 1, ...}` + final_device_map = {} + for device_id, components in device_id_component_mapping.items(): + for component in components: + final_device_map[component] = device_id + + return final_device_map + + +def load_sub_model( + library_name: str, + class_name: str, + importable_classes: list[Any], + pipelines: Any, + is_pipeline_module: bool, + pipeline_class: Any, + torch_dtype: torch.dtype, + provider: Any, + sess_options: Any, + device_map: dict[str, torch.device] | str | None, + max_memory: dict[int | str, int | str] | None, + offload_folder: str | os.PathLike | None, + offload_state_dict: bool, + model_variants: dict[str, str], + name: str, + from_flax: bool, + variant: str, + low_cpu_mem_usage: bool, + cached_folder: str | os.PathLike, + use_safetensors: bool, + dduf_entries: dict[str, DDUFEntry] | None, + provider_options: Any, + disable_mmap: bool, + quantization_config: Any | None = None, +): + """Helper method to load the module `name` from `library_name` and `class_name`""" + from ..quantizers import PipelineQuantizationConfig + + # retrieve class candidates + + class_obj, class_candidates = get_class_obj_and_candidates( + library_name, + class_name, + importable_classes, + pipelines, + is_pipeline_module, + component_name=name, + cache_dir=cached_folder, + ) + + load_method_name = None + # retrieve load method name + for class_name, class_candidate in class_candidates.items(): + if class_candidate is not None and issubclass(class_obj, class_candidate): + load_method_name = importable_classes[class_name][1] + + # if load method name is None, then we have a dummy module -> raise Error + if load_method_name is None: + none_module = class_obj.__module__ + is_dummy_path = none_module.startswith(DUMMY_MODULES_FOLDER) or none_module.startswith( + TRANSFORMERS_DUMMY_MODULES_FOLDER + ) + if is_dummy_path and "dummy" in none_module: + # call class_obj for nice error message of missing requirements + class_obj() + + raise ValueError( + f"The component {class_obj} of {pipeline_class} cannot be loaded as it does not seem to have" + f" any of the loading methods defined in {ALL_IMPORTABLE_CLASSES}." + ) + + load_method = _get_load_method(class_obj, load_method_name, is_dduf=dduf_entries is not None) + + # add kwargs to loading method + diffusers_module = importlib.import_module(__name__.split(".")[0]) + loading_kwargs = {} + + is_diffusers_model = issubclass(class_obj, diffusers_module.ModelMixin) + + if is_transformers_available(): + transformers_version = version.parse(version.parse(transformers.__version__).base_version) + else: + transformers_version = "N/A" + + is_transformers_model = ( + is_transformers_available() + and issubclass(class_obj, PreTrainedModel) + and transformers_version >= version.parse("4.20.0") + ) + + # For transformers models >= 4.56.0, use 'dtype' instead of 'torch_dtype' to avoid deprecation warnings + if issubclass(class_obj, torch.nn.Module): + if is_transformers_model and transformers_version >= version.parse("4.56.0"): + loading_kwargs["dtype"] = torch_dtype + else: + loading_kwargs["torch_dtype"] = torch_dtype + if issubclass(class_obj, diffusers_module.OnnxRuntimeModel): + loading_kwargs["provider"] = provider + loading_kwargs["sess_options"] = sess_options + loading_kwargs["provider_options"] = provider_options + + # When loading a transformers model, if the device_map is None, the weights will be initialized as opposed to diffusers. + # To make default loading faster we set the `low_cpu_mem_usage=low_cpu_mem_usage` flag which is `True` by default. + # This makes sure that the weights won't be initialized which significantly speeds up loading. + if is_diffusers_model or is_transformers_model: + loading_kwargs["device_map"] = device_map + loading_kwargs["max_memory"] = max_memory + loading_kwargs["offload_folder"] = offload_folder + loading_kwargs["offload_state_dict"] = offload_state_dict + loading_kwargs["variant"] = model_variants.pop(name, None) + loading_kwargs["use_safetensors"] = use_safetensors + + if from_flax: + loading_kwargs["from_flax"] = True + + # the following can be deleted once the minimum required `transformers` version + # is higher than 4.27 + if ( + is_transformers_model + and loading_kwargs["variant"] is not None + and transformers_version < version.parse("4.27.0") + ): + raise ImportError( + f"When passing `variant='{variant}'`, please make sure to upgrade your `transformers` version to at least 4.27.0.dev0" + ) + elif is_transformers_model and loading_kwargs["variant"] is None: + loading_kwargs.pop("variant") + + # if `from_flax` and model is transformer model, can currently not load with `low_cpu_mem_usage` + if not (from_flax and is_transformers_model): + loading_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage + else: + loading_kwargs["low_cpu_mem_usage"] = False + + if is_diffusers_model: + loading_kwargs["disable_mmap"] = disable_mmap + + if is_transformers_model and is_transformers_version(">=", "4.57.0"): + loading_kwargs.pop("offload_state_dict") + + if ( + quantization_config is not None + and isinstance(quantization_config, PipelineQuantizationConfig) + and issubclass(class_obj, torch.nn.Module) + ): + model_quant_config = quantization_config._resolve_quant_config( + is_diffusers=is_diffusers_model, module_name=name + ) + if model_quant_config is not None: + loading_kwargs["quantization_config"] = model_quant_config + + # check if the module is in a subdirectory + if dduf_entries: + loading_kwargs["dduf_entries"] = dduf_entries + loaded_sub_model = load_method(name, **loading_kwargs) + elif os.path.isdir(os.path.join(cached_folder, name)): + loaded_sub_model = load_method(os.path.join(cached_folder, name), **loading_kwargs) + else: + # else load from the root directory + loaded_sub_model = load_method(cached_folder, **loading_kwargs) + + if isinstance(loaded_sub_model, torch.nn.Module) and isinstance(device_map, dict): + # remove hooks + remove_hook_from_module(loaded_sub_model, recurse=True) + needs_offloading_to_cpu = device_map[""] == "cpu" + skip_keys = None + if hasattr(loaded_sub_model, "_skip_keys") and loaded_sub_model._skip_keys is not None: + skip_keys = loaded_sub_model._skip_keys + + if needs_offloading_to_cpu: + dispatch_model( + loaded_sub_model, + state_dict=loaded_sub_model.state_dict(), + device_map=device_map, + force_hooks=True, + main_device=0, + skip_keys=skip_keys, + ) + else: + dispatch_model(loaded_sub_model, device_map=device_map, force_hooks=True, skip_keys=skip_keys) + + return loaded_sub_model + + +def _get_load_method(class_obj: object, load_method_name: str, is_dduf: bool) -> Callable: + """ + Return the method to load the sub model. + + In practice, this method will return the `"from_pretrained"` (or `load_method_name`) method of the class object + except if loading from a DDUF checkpoint. In that case, transformers models and tokenizers have a specific loading + method that we need to use. + """ + if is_dduf: + if issubclass(class_obj, PreTrainedTokenizerBase): + return lambda *args, **kwargs: _load_tokenizer_from_dduf(class_obj, *args, **kwargs) + if issubclass(class_obj, PreTrainedModel): + return lambda *args, **kwargs: _load_transformers_model_from_dduf(class_obj, *args, **kwargs) + return getattr(class_obj, load_method_name) + + +def _fetch_class_library_tuple(module): + # import it here to avoid circular import + diffusers_module = importlib.import_module(__name__.split(".")[0]) + pipelines = getattr(diffusers_module, "pipelines") + + # register the config from the original module, not the dynamo compiled one + not_compiled_module = _unwrap_model(module) + library = not_compiled_module.__module__.split(".")[0] + + # check if the module is a pipeline module + module_path_items = not_compiled_module.__module__.split(".") + pipeline_dir = module_path_items[-2] if len(module_path_items) > 2 else None + + path = not_compiled_module.__module__.split(".") + is_pipeline_module = pipeline_dir in path and hasattr(pipelines, pipeline_dir) + + # if library is not in LOADABLE_CLASSES, then it is a custom module. + # Or if it's a pipeline module, then the module is inside the pipeline + # folder so we set the library to module name. + if is_pipeline_module: + library = pipeline_dir + elif library not in LOADABLE_CLASSES: + library = not_compiled_module.__module__ + + # retrieve class_name + if isinstance(not_compiled_module, type): + class_name = not_compiled_module.__name__ + else: + class_name = not_compiled_module.__class__.__name__ + + return (library, class_name) + + +def _identify_model_variants(folder: str, variant: str, config: dict) -> dict: + model_variants = {} + if variant is not None: + for sub_folder in os.listdir(folder): + folder_path = os.path.join(folder, sub_folder) + is_folder = os.path.isdir(folder_path) and sub_folder in config + variant_exists = is_folder and any(p.split(".")[1].startswith(variant) for p in os.listdir(folder_path)) + if variant_exists: + model_variants[sub_folder] = variant + return model_variants + + +def _resolve_custom_pipeline_and_cls(folder, config, custom_pipeline): + custom_class_name = None + if os.path.isfile(os.path.join(folder, f"{custom_pipeline}.py")): + custom_pipeline = os.path.join(folder, f"{custom_pipeline}.py") + elif isinstance(config["_class_name"], (list, tuple)) and os.path.isfile( + os.path.join(folder, f"{config['_class_name'][0]}.py") + ): + custom_pipeline = os.path.join(folder, f"{config['_class_name'][0]}.py") + custom_class_name = config["_class_name"][1] + + return custom_pipeline, custom_class_name + + +def _maybe_raise_warning_for_inpainting(pipeline_class, pretrained_model_name_or_path: str, config: dict): + if pipeline_class.__name__ == "StableDiffusionInpaintPipeline" and version.parse( + version.parse(config["_diffusers_version"]).base_version + ) <= version.parse("0.5.1"): + from diffusers import StableDiffusionInpaintPipeline, StableDiffusionInpaintPipelineLegacy + + pipeline_class = StableDiffusionInpaintPipelineLegacy + + deprecation_message = ( + "You are using a legacy checkpoint for inpainting with Stable Diffusion, therefore we are loading the" + f" {StableDiffusionInpaintPipelineLegacy} class instead of {StableDiffusionInpaintPipeline}. For" + " better inpainting results, we strongly suggest using Stable Diffusion's official inpainting" + " checkpoint: https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting instead or adapting your" + f" checkpoint {pretrained_model_name_or_path} to the format of" + " https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting. Note that we do not actively maintain" + " the {StableDiffusionInpaintPipelineLegacy} class and will likely remove it in version 1.0.0." + ) + deprecate("StableDiffusionInpaintPipelineLegacy", "1.0.0", deprecation_message, standard_warn=False) + + +def _update_init_kwargs_with_connected_pipeline( + init_kwargs: dict, passed_pipe_kwargs: dict, passed_class_objs: dict, folder: str, **pipeline_loading_kwargs +) -> dict: + from .pipeline_utils import DiffusionPipeline + + modelcard = ModelCard.load(os.path.join(folder, "README.md")) + connected_pipes = {prefix: getattr(modelcard.data, prefix, [None])[0] for prefix in CONNECTED_PIPES_KEYS} + + # We don't scheduler argument to match the existing logic: + # https://github.com/huggingface/diffusers/blob/867e0c919e1aa7ef8b03c8eb1460f4f875a683ae/src/diffusers/pipelines/pipeline_utils.py#L906C13-L925C14 + pipeline_loading_kwargs_cp = pipeline_loading_kwargs.copy() + if pipeline_loading_kwargs_cp is not None and len(pipeline_loading_kwargs_cp) >= 1: + for k in pipeline_loading_kwargs: + if "scheduler" in k: + _ = pipeline_loading_kwargs_cp.pop(k) + + def get_connected_passed_kwargs(prefix): + connected_passed_class_obj = { + k.replace(f"{prefix}_", ""): w for k, w in passed_class_objs.items() if k.split("_")[0] == prefix + } + connected_passed_pipe_kwargs = { + k.replace(f"{prefix}_", ""): w for k, w in passed_pipe_kwargs.items() if k.split("_")[0] == prefix + } + + connected_passed_kwargs = {**connected_passed_class_obj, **connected_passed_pipe_kwargs} + return connected_passed_kwargs + + connected_pipes = { + prefix: DiffusionPipeline.from_pretrained( + repo_id, **pipeline_loading_kwargs_cp, **get_connected_passed_kwargs(prefix) + ) + for prefix, repo_id in connected_pipes.items() + if repo_id is not None + } + + for prefix, connected_pipe in connected_pipes.items(): + # add connected pipes to `init_kwargs` with _, e.g. "prior_text_encoder" + init_kwargs.update( + {"_".join([prefix, name]): component for name, component in connected_pipe.components.items()} + ) + + return init_kwargs + + +def _get_custom_components_and_folders( + pretrained_model_name: str, + config_dict: dict[str, Any], + filenames: list[str] | None = None, + variant_filenames: list[str] | None = None, + variant: str | None = None, +): + config_dict = config_dict.copy() + + # retrieve all folder_names that contain relevant files + folder_names = [k for k, v in config_dict.items() if isinstance(v, list) and k != "_class_name"] + + diffusers_module = importlib.import_module(__name__.split(".")[0]) + pipelines = getattr(diffusers_module, "pipelines") + + # optionally create a custom component <> custom file mapping + custom_components = {} + for component in folder_names: + module_candidate = config_dict[component][0] + + if module_candidate is None or not isinstance(module_candidate, str): + continue + + # We compute candidate file path on the Hub. Do not use `os.path.join`. + candidate_file = f"{component}/{module_candidate}.py" + + if candidate_file in filenames: + custom_components[component] = module_candidate + elif module_candidate not in LOADABLE_CLASSES and not hasattr(pipelines, module_candidate): + raise ValueError( + f"{candidate_file} as defined in `model_index.json` does not exist in {pretrained_model_name} and is not a module in 'diffusers/pipelines'." + ) + + return custom_components, folder_names + + +def _get_ignore_patterns( + passed_components, + model_folder_names: list[str], + model_filenames: list[str], + use_safetensors: bool, + from_flax: bool, + allow_pickle: bool, + use_onnx: bool, + is_onnx: bool, + variant: str | None = None, +) -> list[str]: + if ( + use_safetensors + and not allow_pickle + and not is_safetensors_compatible( + model_filenames, passed_components=passed_components, folder_names=model_folder_names, variant=variant + ) + ): + raise EnvironmentError( + f"Could not find the necessary `safetensors` weights in {model_filenames} (variant={variant})" + ) + + if from_flax: + ignore_patterns = ["*.bin", "*.safetensors", "*.onnx", "*.pb"] + + elif use_safetensors and is_safetensors_compatible( + model_filenames, passed_components=passed_components, folder_names=model_folder_names, variant=variant + ): + ignore_patterns = ["*.bin", "*.msgpack"] + + use_onnx = use_onnx if use_onnx is not None else is_onnx + if not use_onnx: + ignore_patterns += ["*.onnx", "*.pb"] + + else: + ignore_patterns = ["*.safetensors", "*.msgpack"] + + use_onnx = use_onnx if use_onnx is not None else is_onnx + if not use_onnx: + ignore_patterns += ["*.onnx", "*.pb"] + + return ignore_patterns + + +def _download_dduf_file( + pretrained_model_name: str, + dduf_file: str, + pipeline_class_name: str, + cache_dir: str, + proxies: str, + local_files_only: bool, + token: str, + revision: str, +): + model_info_call_error = None + if not local_files_only: + try: + info = model_info(pretrained_model_name, token=token, revision=revision) + except (HfHubHTTPError, OfflineModeIsEnabled, requests.ConnectionError, httpx.NetworkError) as e: + logger.warning(f"Couldn't connect to the Hub: {e}.\nWill try to load from local cache.") + local_files_only = True + model_info_call_error = e # save error to reraise it if model is not cached locally + + if ( + not local_files_only + and dduf_file is not None + and dduf_file not in (sibling.rfilename for sibling in info.siblings) + ): + raise ValueError(f"Requested {dduf_file} file is not available in {pretrained_model_name}.") + + try: + user_agent = {"pipeline_class": pipeline_class_name, "dduf": True} + cached_folder = snapshot_download( + pretrained_model_name, + cache_dir=cache_dir, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + allow_patterns=[dduf_file], + user_agent=user_agent, + ) + return cached_folder + except FileNotFoundError: + # Means we tried to load pipeline with `local_files_only=True` but the files have not been found in local cache. + # This can happen in two cases: + # 1. If the user passed `local_files_only=True` => we raise the error directly + # 2. If we forced `local_files_only=True` when `model_info` failed => we raise the initial error + if model_info_call_error is None: + # 1. user passed `local_files_only=True` + raise + else: + # 2. we forced `local_files_only=True` when `model_info` failed + raise EnvironmentError( + f"Cannot load model {pretrained_model_name}: model is not cached locally and an error occurred" + " while trying to fetch metadata from the Hub. Please check out the root cause in the stacktrace" + " above." + ) from model_info_call_error + + +def _maybe_raise_error_for_incorrect_transformers(config_dict): + has_transformers_component = False + for k in config_dict: + if isinstance(config_dict[k], list): + has_transformers_component = config_dict[k][0] == "transformers" + if has_transformers_component: + break + if has_transformers_component and not is_transformers_version(">", "4.47.1"): + raise ValueError("Please upgrade your `transformers` installation to the latest version to use DDUF.") + + +def _maybe_warn_for_wrong_component_in_quant_config(pipe_init_dict, quant_config): + if quant_config is None: + return + + actual_pipe_components = set(pipe_init_dict.keys()) + missing = "" + quant_components = None + if getattr(quant_config, "components_to_quantize", None) is not None: + quant_components = set(quant_config.components_to_quantize) + elif getattr(quant_config, "quant_mapping", None) is not None and isinstance(quant_config.quant_mapping, dict): + quant_components = set(quant_config.quant_mapping.keys()) + + if quant_components and not quant_components.issubset(actual_pipe_components): + missing = quant_components - actual_pipe_components + + if missing: + logger.warning( + f"The following components in the quantization config {missing} will be ignored " + "as they do not belong to the underlying pipeline. Acceptable values for the pipeline " + f"components are: {', '.join(actual_pipe_components)}." + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d675f1de04a7f1ee03bdca13f93bc6cc779de98d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/pipeline_utils.py @@ -0,0 +1,2385 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import fnmatch +import importlib +import inspect +import os +import re +import sys +import types +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Callable, Dict, List, Union, get_args, get_origin, get_type_hints + +import httpx +import numpy as np +import PIL.Image +import requests +import torch +from huggingface_hub import ( + DDUFEntry, + ModelCard, + create_repo, + hf_hub_download, + model_info, + read_dduf_file, + snapshot_download, +) +from huggingface_hub.utils import HfHubHTTPError, OfflineModeIsEnabled, validate_hf_hub_args +from packaging import version +from tqdm.auto import tqdm +from typing_extensions import Self + +from .. import __version__ +from ..configuration_utils import ConfigMixin +from ..models import AutoencoderKL +from ..models.attention_processor import FusedAttnProcessor2_0 +from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, ModelMixin +from ..quantizers import PipelineQuantizationConfig +from ..quantizers.bitsandbytes.utils import _check_bnb_status +from ..schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME +from ..utils import ( + CONFIG_NAME, + DEPRECATED_REVISION_ARGS, + BaseOutput, + PushToHubMixin, + _get_detailed_type, + _is_valid_type, + deprecate, + is_accelerate_available, + is_accelerate_version, + is_bitsandbytes_version, + is_hpu_available, + is_torch_npu_available, + is_torch_version, + is_transformers_version, + logging, + numpy_to_pil, +) +from ..utils.distributed_utils import is_torch_dist_rank_zero +from ..utils.hub_utils import _check_legacy_sharding_variant_format, load_or_create_model_card, populate_model_card +from ..utils.torch_utils import empty_device_cache, get_device, is_compiled_module + + +if is_torch_npu_available(): + import torch_npu # noqa: F401 + +from .pipeline_loading_utils import ( + ALL_IMPORTABLE_CLASSES, + CONNECTED_PIPES_KEYS, + CUSTOM_PIPELINE_FILE_NAME, + LOADABLE_CLASSES, + _download_dduf_file, + _fetch_class_library_tuple, + _get_custom_components_and_folders, + _get_custom_pipeline_class, + _get_final_device_map, + _get_ignore_patterns, + _get_pipeline_class, + _identify_model_variants, + _maybe_raise_error_for_incorrect_transformers, + _maybe_raise_warning_for_inpainting, + _maybe_warn_for_wrong_component_in_quant_config, + _resolve_custom_pipeline_and_cls, + _unwrap_model, + _update_init_kwargs_with_connected_pipeline, + filter_model_files, + load_sub_model, + maybe_raise_or_warn, + variant_compatible_siblings, + warn_deprecated_model_variant, +) + + +if is_accelerate_available(): + import accelerate + + +LIBRARIES = [] +for library in LOADABLE_CLASSES: + LIBRARIES.append(library) + +SUPPORTED_DEVICE_MAP = ["balanced"] + [get_device(), "cpu"] + +logger = logging.get_logger(__name__) + + +@dataclass +class ImagePipelineOutput(BaseOutput): + """ + Output class for image pipelines. + + Args: + images (`List[PIL.Image.Image]` or `np.ndarray`) + List of denoised PIL images of length `batch_size` or NumPy array of shape `(batch_size, height, width, + num_channels)`. + """ + + images: list[PIL.Image.Image] | np.ndarray + + +@dataclass +class AudioPipelineOutput(BaseOutput): + """ + Output class for audio pipelines. + + Args: + audios (`np.ndarray`) + List of denoised audio samples of a NumPy array of shape `(batch_size, num_channels, sample_rate)`. + """ + + audios: np.ndarray + + +class DeprecatedPipelineMixin: + """ + A mixin that can be used to mark a pipeline as deprecated. + + Pipelines inheriting from this mixin will raise a warning when instantiated, indicating that they are deprecated + and won't receive updates past the specified version. Tests will be skipped for pipelines that inherit from this + mixin. + + Example usage: + ```python + class MyDeprecatedPipeline(DeprecatedPipelineMixin, DiffusionPipeline): + _last_supported_version = "0.20.0" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + ``` + """ + + # Override this in the inheriting class to specify the last version that will support this pipeline + _last_supported_version = None + + def __init__(self, *args, **kwargs): + # Get the class name for the warning message + class_name = self.__class__.__name__ + + # Get the last supported version or use the current version if not specified + version_info = getattr(self.__class__, "_last_supported_version", __version__) + + # Raise a warning that this pipeline is deprecated + logger.warning( + f"The {class_name} has been deprecated and will not receive bug fixes or feature updates after Diffusers version {version_info}. " + ) + + # Call the parent class's __init__ method + super().__init__(*args, **kwargs) + + +class DiffusionPipeline(ConfigMixin, PushToHubMixin): + r""" + Base class for all pipelines. + + [`DiffusionPipeline`] stores all components (models, schedulers, and processors) for diffusion pipelines and + provides methods for loading, downloading and saving models. It also includes methods to: + + - move all PyTorch modules to the device of your choice + - enable/disable the progress bar for the denoising iteration + + Class attributes: + + - **config_name** (`str`) -- The configuration filename that stores the class and module names of all the + diffusion pipeline's components. + - **_optional_components** (`List[str]`) -- List of all optional components that don't have to be passed to the + pipeline to function (should be overridden by subclasses). + """ + + config_name = "model_index.json" + model_cpu_offload_seq = None + hf_device_map = None + _optional_components = [] + _exclude_from_cpu_offload = [] + _load_connected_pipes = False + _is_onnx = False + + def register_modules(self, **kwargs): + for name, module in kwargs.items(): + # retrieve library + if module is None or isinstance(module, (tuple, list)) and module[0] is None: + register_dict = {name: (None, None)} + else: + library, class_name = _fetch_class_library_tuple(module) + register_dict = {name: (library, class_name)} + + # save model index config + self.register_to_config(**register_dict) + + # set models + setattr(self, name, module) + + def __setattr__(self, name: str, value: Any): + if name in self.__dict__ and hasattr(self.config, name): + # We need to overwrite the config if name exists in config + if isinstance(getattr(self.config, name), (tuple, list)): + if value is not None and self.config[name][0] is not None: + class_library_tuple = _fetch_class_library_tuple(value) + else: + class_library_tuple = (None, None) + + self.register_to_config(**{name: class_library_tuple}) + else: + self.register_to_config(**{name: value}) + + super().__setattr__(name, value) + + def save_pretrained( + self, + save_directory: str | os.PathLike, + safe_serialization: bool = True, + variant: str | None = None, + max_shard_size: int | str | None = None, + push_to_hub: bool = False, + **kwargs, + ): + """ + Save all saveable variables of the pipeline to a directory. A pipeline variable can be saved and loaded if its + class implements both a save and loading method. The pipeline is easily reloaded using the + [`~DiffusionPipeline.from_pretrained`] class method. + + Arguments: + save_directory (`str` or `os.PathLike`): + Directory to save a pipeline to. Will be created if it doesn't exist. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`. + variant (`str`, *optional*): + If specified, weights are saved in the format `pytorch_model..bin`. + max_shard_size (`int` or `str`, defaults to `None`): + The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size + lower than this size. If expressed as a string, needs to be digits followed by a unit (like `"5GB"`). + If expressed as an integer, the unit is bytes. Note that this limit will be decreased after a certain + period of time (starting from Oct 2024) to allow users to upgrade to the latest version of `diffusers`. + This is to establish a common default size for this argument across different libraries in the Hugging + Face ecosystem (`transformers`, and `accelerate`, for example). + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the + repository you want to push to with `repo_id` (will default to the name of `save_directory` in your + namespace). + + kwargs (`Dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + model_index_dict = dict(self.config) + model_index_dict.pop("_class_name", None) + model_index_dict.pop("_diffusers_version", None) + model_index_dict.pop("_module", None) + model_index_dict.pop("_name_or_path", None) + + if push_to_hub: + commit_message = kwargs.pop("commit_message", None) + private = kwargs.pop("private", None) + create_pr = kwargs.pop("create_pr", False) + token = kwargs.pop("token", None) + repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1]) + repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id + + expected_modules, optional_kwargs = self._get_signature_keys(self) + + def is_saveable_module(name, value): + if name not in expected_modules: + return False + if name in self._optional_components and value[0] is None: + return False + return True + + model_index_dict = {k: v for k, v in model_index_dict.items() if is_saveable_module(k, v)} + for pipeline_component_name in model_index_dict.keys(): + sub_model = getattr(self, pipeline_component_name) + model_cls = sub_model.__class__ + + # Dynamo wraps the original model in a private class. + # I didn't find a public API to get the original class. + if is_compiled_module(sub_model): + sub_model = _unwrap_model(sub_model) + model_cls = sub_model.__class__ + + save_method_name = None + # search for the model's base class in LOADABLE_CLASSES + for library_name, library_classes in LOADABLE_CLASSES.items(): + if library_name in sys.modules: + library = importlib.import_module(library_name) + else: + logger.info( + f"{library_name} is not installed. Cannot save {pipeline_component_name} as {library_classes} from {library_name}" + ) + + for base_class, save_load_methods in library_classes.items(): + class_candidate = getattr(library, base_class, None) + if class_candidate is not None and issubclass(model_cls, class_candidate): + # if we found a suitable base class in LOADABLE_CLASSES then grab its save method + save_method_name = save_load_methods[0] + break + if save_method_name is not None: + break + + if save_method_name is None: + logger.warning( + f"self.{pipeline_component_name}={sub_model} of type {type(sub_model)} cannot be saved." + ) + # make sure that unsaveable components are not tried to be loaded afterward + self.register_to_config(**{pipeline_component_name: (None, None)}) + continue + + save_method = getattr(sub_model, save_method_name) + + # Call the save method with the argument safe_serialization only if it's supported + save_method_signature = inspect.signature(save_method) + save_method_accept_safe = "safe_serialization" in save_method_signature.parameters + save_method_accept_variant = "variant" in save_method_signature.parameters + save_method_accept_max_shard_size = "max_shard_size" in save_method_signature.parameters + save_method_accept_peft_format = "save_peft_format" in save_method_signature.parameters + + save_kwargs = {} + if save_method_accept_safe: + save_kwargs["safe_serialization"] = safe_serialization + if save_method_accept_variant: + save_kwargs["variant"] = variant + if save_method_accept_max_shard_size and max_shard_size is not None: + # max_shard_size is expected to not be None in ModelMixin + save_kwargs["max_shard_size"] = max_shard_size + if save_method_accept_peft_format: + # Set save_peft_format=False for transformers>=5.0.0 compatibility + # In transformers 5.0.0+, the default save_peft_format=True adds "base_model.model" prefix + # to adapter keys, but from_pretrained expects keys without this prefix + save_kwargs["save_peft_format"] = False + + save_method(os.path.join(save_directory, pipeline_component_name), **save_kwargs) + + # finally save the config + self.save_config(save_directory) + + if push_to_hub: + # Create a new empty model card and eventually tag it + model_card = load_or_create_model_card(repo_id, token=token, is_pipeline=True) + model_card = populate_model_card(model_card) + model_card.save(os.path.join(save_directory, "README.md")) + + self._upload_folder( + save_directory, + repo_id, + token=token, + commit_message=commit_message, + create_pr=create_pr, + ) + + def to(self, *args, **kwargs) -> Self: + r""" + Performs Pipeline dtype and/or device conversion. A torch.dtype and torch.device are inferred from the + arguments of `self.to(*args, **kwargs).` + + > [!TIP] > If the pipeline already has the correct torch.dtype and torch.device, then it is returned as is. + Otherwise, > the returned pipeline is a copy of self with the desired torch.dtype and torch.device. + + + Here are the ways to call `to`: + + - `to(dtype, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) + - `to(device, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the specified + [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) + - `to(device=None, dtype=None, silence_dtype_warnings=False) → DiffusionPipeline` to return a pipeline with the + specified [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) and + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) + + Arguments: + dtype (`torch.dtype`, *optional*): + Returns a pipeline with the specified + [`dtype`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.dtype) + device (`torch.Device`, *optional*): + Returns a pipeline with the specified + [`device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.device) + silence_dtype_warnings (`str`, *optional*, defaults to `False`): + Whether to omit warnings if the target `dtype` is not compatible with the target `device`. + + Returns: + [`DiffusionPipeline`]: The pipeline converted to specified `dtype` and/or `dtype`. + """ + dtype = kwargs.pop("dtype", None) + device = kwargs.pop("device", None) + silence_dtype_warnings = kwargs.pop("silence_dtype_warnings", False) + + dtype_arg = None + device_arg = None + if len(args) == 1: + if isinstance(args[0], torch.dtype): + dtype_arg = args[0] + else: + device_arg = torch.device(args[0]) if args[0] is not None else None + elif len(args) == 2: + if isinstance(args[0], torch.dtype): + raise ValueError( + "When passing two arguments, make sure the first corresponds to `device` and the second to `dtype`." + ) + device_arg = torch.device(args[0]) if args[0] is not None else None + dtype_arg = args[1] + elif len(args) > 2: + raise ValueError("Please make sure to pass at most two arguments (`device` and `dtype`) `.to(...)`") + + if dtype is not None and dtype_arg is not None: + raise ValueError( + "You have passed `dtype` both as an argument and as a keyword argument. Please only pass one of the two." + ) + + dtype = dtype or dtype_arg + + if device is not None and device_arg is not None: + raise ValueError( + "You have passed `device` both as an argument and as a keyword argument. Please only pass one of the two." + ) + + device = device or device_arg + device_type = torch.device(device).type if device is not None else None + pipeline_has_bnb = any(any((_check_bnb_status(module))) for _, module in self.components.items()) + + # throw warning if pipeline is in "offloaded"-mode but user tries to manually set to GPU. + def module_is_sequentially_offloaded(module): + if not is_accelerate_available() or is_accelerate_version("<", "0.14.0"): + return False + + _, _, is_loaded_in_8bit_bnb = _check_bnb_status(module) + + # https://github.com/huggingface/accelerate/pull/3907 + if is_loaded_in_8bit_bnb and ( + is_bitsandbytes_version("<", "0.48.0") or is_accelerate_version("<", "1.13.0.dev0") + ): + return False + + return hasattr(module, "_hf_hook") and ( + isinstance(module._hf_hook, accelerate.hooks.AlignDevicesHook) + or hasattr(module._hf_hook, "hooks") + and isinstance(module._hf_hook.hooks[0], accelerate.hooks.AlignDevicesHook) + ) + + def module_is_offloaded(module): + if not is_accelerate_available() or is_accelerate_version("<", "0.17.0.dev0"): + return False + + return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload) + + # .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer + pipeline_is_sequentially_offloaded = any( + module_is_sequentially_offloaded(module) for _, module in self.components.items() + ) + is_pipeline_device_mapped = self._is_pipeline_device_mapped() + if is_pipeline_device_mapped: + raise ValueError( + "It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline." + ) + + if device_type in ["cuda", "xpu"]: + if pipeline_is_sequentially_offloaded and not pipeline_has_bnb: + raise ValueError( + "It seems like you have activated sequential model offloading by calling `enable_sequential_cpu_offload`, but are now attempting to move the pipeline to GPU. This is not compatible with offloading. Please, move your pipeline `.to('cpu')` or consider removing the move altogether if you use sequential offloading." + ) + # PR: https://github.com/huggingface/accelerate/pull/3223/ + elif pipeline_has_bnb and is_accelerate_version("<", "1.1.0.dev0"): + raise ValueError( + "You are trying to call `.to('cuda')` on a pipeline that has models quantized with `bitsandbytes`. Your current `accelerate` installation does not support it. Please upgrade the installation." + ) + + # Display a warning in this case (the operation succeeds but the benefits are lost) + pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items()) + if pipeline_is_offloaded and device_type in ["cuda", "xpu"]: + logger.warning( + f"It seems like you have activated model offloading by calling `enable_model_cpu_offload`, but are now manually moving the pipeline to GPU. It is strongly recommended against doing so as memory gains from offloading are likely to be lost. Offloading automatically takes care of moving the individual components {', '.join(self.components.keys())} to GPU when needed. To make sure offloading works as expected, you should consider moving the pipeline back to CPU: `pipeline.to('cpu')` or removing the move altogether if you use offloading." + ) + + # Enable generic support for Intel Gaudi accelerator using GPU/HPU migration + if device_type == "hpu" and kwargs.pop("hpu_migration", True) and is_hpu_available(): + os.environ["PT_HPU_GPU_MIGRATION"] = "1" + logger.debug("Environment variable set: PT_HPU_GPU_MIGRATION=1") + + import habana_frameworks.torch # noqa: F401 + + # HPU hardware check + if not (hasattr(torch, "hpu") and torch.hpu.is_available()): + raise ValueError("You are trying to call `.to('hpu')` but HPU device is unavailable.") + + os.environ["PT_HPU_MAX_COMPOUND_OP_SIZE"] = "1" + logger.debug("Environment variable set: PT_HPU_MAX_COMPOUND_OP_SIZE=1") + + if dtype in (torch.bfloat16, None) and kwargs.pop("sdp_on_bf16", True): + if hasattr(torch._C, "_set_math_sdp_allow_fp16_bf16_reduction"): + torch._C._set_math_sdp_allow_fp16_bf16_reduction(True) + logger.warning( + "Enabled SDP with BF16 precision on HPU. To disable, please use `.to('hpu', sdp_on_bf16=False)`" + ) + + module_names, _ = self._get_signature_keys(self) + modules = [getattr(self, n, None) for n in module_names] + modules = [m for m in modules if isinstance(m, torch.nn.Module)] + + is_offloaded = pipeline_is_offloaded or pipeline_is_sequentially_offloaded + for module in modules: + _, is_loaded_in_4bit_bnb, is_loaded_in_8bit_bnb = _check_bnb_status(module) + is_group_offloaded = self._maybe_raise_error_if_group_offload_active(module=module) + + if (is_loaded_in_4bit_bnb or is_loaded_in_8bit_bnb) and dtype is not None: + logger.warning( + f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` {'4bit' if is_loaded_in_4bit_bnb else '8bit'} and conversion to {dtype} is not supported. Module is still in {'4bit' if is_loaded_in_4bit_bnb else '8bit'} precision." + ) + + if is_loaded_in_8bit_bnb and device is not None and is_bitsandbytes_version("<", "0.48.0"): + logger.warning( + f"The module '{module.__class__.__name__}' has been loaded in `bitsandbytes` 8bit and moving it to {device} via `.to()` is not supported. Module is still on {module.device}." + "You need to upgrade bitsandbytes to at least 0.48.0" + ) + + # Note: we also handle this at the ModelMixin level. The reason for doing it here too is that modeling + # components can be from outside diffusers too, but still have group offloading enabled. + if ( + self._maybe_raise_error_if_group_offload_active(raise_error=False, module=module) + and device is not None + ): + logger.warning( + f"The module '{module.__class__.__name__}' is group offloaded and moving it to {device} via `.to()` is not supported." + ) + + # This can happen for `transformer` models. CPU placement was added in + # https://github.com/huggingface/transformers/pull/33122. So, we guard this accordingly. + if is_loaded_in_4bit_bnb and device is not None and is_transformers_version(">", "4.44.0"): + module.to(device=device) + # added here https://github.com/huggingface/transformers/pull/43258 + if ( + is_loaded_in_8bit_bnb + and device is not None + and is_transformers_version(">", "4.58.0") + and is_bitsandbytes_version(">=", "0.48.0") + ): + module.to(device=device) + elif not is_loaded_in_4bit_bnb and not is_loaded_in_8bit_bnb and not is_group_offloaded: + module.to(device, dtype) + + if ( + module.dtype == torch.float16 + and str(device) in ["cpu"] + and not silence_dtype_warnings + and not is_offloaded + ): + logger.warning( + "Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It" + " is not recommended to move them to `cpu` as running them will fail. Please make" + " sure to use an accelerator to run the pipeline in inference, due to the lack of" + " support for`float16` operations on this device in PyTorch. Please, remove the" + " `torch_dtype=torch.float16` argument, or use another device for inference." + ) + return self + + @property + def device(self) -> torch.device: + r""" + Returns: + `torch.device`: The torch device on which the pipeline is located. + """ + module_names, _ = self._get_signature_keys(self) + modules = [getattr(self, n, None) for n in module_names] + modules = [m for m in modules if isinstance(m, torch.nn.Module)] + + for module in modules: + return module.device + + return torch.device("cpu") + + @property + def dtype(self) -> torch.dtype: + r""" + Returns: + `torch.dtype`: The torch dtype on which the pipeline is located. + """ + module_names, _ = self._get_signature_keys(self) + modules = [getattr(self, n, None) for n in module_names] + modules = [m for m in modules if isinstance(m, torch.nn.Module)] + + for module in modules: + return module.dtype + + return torch.float32 + + @classmethod + @validate_hf_hub_args + def from_pretrained(cls, pretrained_model_name_or_path: str | os.PathLike, **kwargs) -> Self: + r""" + Instantiate a PyTorch diffusion pipeline from pretrained pipeline weights. + + The pipeline is set in evaluation mode (`model.eval()`) by default. + + If you get the error message below, you need to finetune the weights for your downstream task: + + ``` + Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match: + - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated + You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. + ``` + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *repo id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline + hosted on the Hub. + - A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights + saved using + [`~DiffusionPipeline.save_pretrained`]. + - A path to a *directory* (for example `./my_pipeline_directory/`) containing a dduf file + torch_dtype (`torch.dtype` or `dict[str, Union[str, torch.dtype]]`, *optional*): + Override the default `torch.dtype` and load the model with another dtype. To load submodels with + different dtype pass a `dict` (for example `{'transformer': torch.bfloat16, 'vae': torch.float16}`). + Set the default dtype for unspecified components with `default` (for example `{'transformer': + torch.bfloat16, 'default': torch.float16}`). If a component is not specified and no default is set, + `torch.float32` is used. + custom_pipeline (`str`, *optional*): + + > [!WARNING] > 🧪 This is an experimental feature and may change in the future. + + Can be either: + + - A string, the *repo id* (for example `hf-internal-testing/diffusers-dummy-pipeline`) of a custom + pipeline hosted on the Hub. The repository must contain a file called pipeline.py that defines + the custom pipeline. + - A string, the *file name* of a community pipeline hosted on GitHub under + [Community](https://github.com/huggingface/diffusers/tree/main/examples/community). Valid file + names must match the file name and not the pipeline script (`clip_guided_stable_diffusion` + instead of `clip_guided_stable_diffusion.py`). Community pipelines are always loaded from the + current main branch of GitHub. + - A path to a directory (`./my_pipeline_directory/`) containing a custom pipeline. The directory + must contain a file called `pipeline.py` that defines the custom pipeline. + + For more information on how to load and create custom pipelines, please have a look at [Loading and + Adding Custom + Pipelines](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview) + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + cache_dir (`Union[str, os.PathLike]`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + custom_revision (`str`, *optional*): + The specific model version to use. It can be a branch name, a tag name, or a commit id similar to + `revision` when loading a custom pipeline from the Hub. Defaults to the latest stable 🤗 Diffusers + version. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + device_map (`str`, *optional*): + Strategy that dictates how the different components of a pipeline should be placed on available + devices. Currently, only "balanced" `device_map` is supported. Check out + [this](https://huggingface.co/docs/diffusers/main/en/tutorials/inference_with_big_models#device-placement) + to know more. + max_memory (`Dict`, *optional*): + A dictionary device identifier for the maximum memory. Will default to the maximum memory available for + each GPU and the available CPU RAM if unset. + offload_folder (`str` or `os.PathLike`, *optional*): + The path to offload weights if device_map contains the value `"disk"`. + offload_state_dict (`bool`, *optional*): + If `True`, temporarily offloads the CPU state dict to the hard drive to avoid running out of CPU RAM if + the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to `True` + when there is some disk offload. + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading only loading the pretrained weights and not initializing the weights. This also + tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. + Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this + argument to `True` will raise an error. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the safetensors weights are downloaded if they're available **and** if the + safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors + weights. If set to `False`, safetensors weights are not loaded. + use_onnx (`bool`, *optional*, defaults to `None`): + If set to `True`, ONNX weights will always be downloaded if present. If set to `False`, ONNX weights + will never be downloaded. By default `use_onnx` defaults to the `_is_onnx` class attribute which is + `False` for non-ONNX pipelines and `True` for ONNX pipelines. ONNX weights include both files ending + with `.onnx` and `.pb`. + kwargs (remaining dictionary of keyword arguments, *optional*): + Can be used to overwrite load and saveable variables (the pipeline components of the specific pipeline + class). The overwritten components are passed directly to the pipelines `__init__` method. See example + below for more information. + variant (`str`, *optional*): + Load weights from a specified variant filename such as `"fp16"` or `"ema"`. This is ignored when + loading `from_flax`. + dduf_file(`str`, *optional*): + Load weights from the specified dduf file. + disable_mmap ('bool', *optional*, defaults to 'False'): + Whether to disable mmap when loading a Safetensors model. This option can perform better when the model + is on a network mount or hard drive, which may not handle the seeky-ness of mmap very well. + + > [!TIP] > To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in + with `hf > auth login`. + + Examples: + + ```py + >>> from diffusers import DiffusionPipeline + + >>> # Download pipeline from huggingface.co and cache. + >>> pipeline = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") + + >>> # Download pipeline that requires an authorization token + >>> # For more information on access tokens, please refer to this section + >>> # of the documentation](https://huggingface.co/docs/hub/security-tokens) + >>> pipeline = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + + >>> # Use a different scheduler + >>> from diffusers import LMSDiscreteScheduler + + >>> scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config) + >>> pipeline.scheduler = scheduler + ``` + """ + # Copy the kwargs to re-use during loading connected pipeline. + kwargs_copied = kwargs.copy() + + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + from_flax = kwargs.pop("from_flax", False) + torch_dtype = kwargs.pop("torch_dtype", None) + custom_pipeline = kwargs.pop("custom_pipeline", None) + custom_revision = kwargs.pop("custom_revision", None) + provider = kwargs.pop("provider", None) + sess_options = kwargs.pop("sess_options", None) + provider_options = kwargs.pop("provider_options", None) + device_map = kwargs.pop("device_map", None) + max_memory = kwargs.pop("max_memory", None) + offload_folder = kwargs.pop("offload_folder", None) + offload_state_dict = kwargs.pop("offload_state_dict", None) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + variant = kwargs.pop("variant", None) + dduf_file = kwargs.pop("dduf_file", None) + use_safetensors = kwargs.pop("use_safetensors", None) + use_onnx = kwargs.pop("use_onnx", None) + load_connected_pipeline = kwargs.pop("load_connected_pipeline", False) + quantization_config = kwargs.pop("quantization_config", None) + disable_mmap = kwargs.pop("disable_mmap", False) + + if torch_dtype is not None and not isinstance(torch_dtype, dict) and not isinstance(torch_dtype, torch.dtype): + torch_dtype = torch.float32 + logger.warning( + f"Passed `torch_dtype` {torch_dtype} is not a `torch.dtype`. Defaulting to `torch.float32`." + ) + + if low_cpu_mem_usage and not is_accelerate_available(): + low_cpu_mem_usage = False + logger.warning( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if quantization_config is not None and not isinstance(quantization_config, PipelineQuantizationConfig): + raise ValueError("`quantization_config` must be an instance of `PipelineQuantizationConfig`.") + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + if device_map is not None and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Loading and dispatching requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `device_map=None`." + ) + + if device_map is not None and not is_accelerate_available(): + raise NotImplementedError( + "Using `device_map` requires the `accelerate` library. Please install it using: `pip install accelerate`." + ) + + if device_map is not None and not isinstance(device_map, str): + raise ValueError("`device_map` must be a string.") + + if device_map is not None and device_map not in SUPPORTED_DEVICE_MAP: + raise NotImplementedError( + f"{device_map} not supported. Supported strategies are: {', '.join(SUPPORTED_DEVICE_MAP)}" + ) + + if device_map is not None and device_map in SUPPORTED_DEVICE_MAP: + if is_accelerate_version("<", "0.28.0"): + raise NotImplementedError("Device placement requires `accelerate` version `0.28.0` or later.") + + if low_cpu_mem_usage is False and device_map is not None: + raise ValueError( + f"You cannot set `low_cpu_mem_usage` to False while using device_map={device_map} for loading and" + " dispatching. Please make sure to set `low_cpu_mem_usage=True`." + ) + + if dduf_file: + if custom_pipeline: + raise NotImplementedError("Custom pipelines are not supported with DDUF at the moment.") + if load_connected_pipeline: + raise NotImplementedError("Connected pipelines are not supported with DDUF at the moment.") + + # 1. Download the checkpoints and configs + # use snapshot download here to get it working from from_pretrained + if not os.path.isdir(pretrained_model_name_or_path): + if pretrained_model_name_or_path.count("/") > 1: + raise ValueError( + f'The provided pretrained_model_name_or_path "{pretrained_model_name_or_path}"' + " is neither a valid local path nor a valid repo id. Please check the parameter." + ) + cached_folder = cls.download( + pretrained_model_name_or_path, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + from_flax=from_flax, + use_safetensors=use_safetensors, + use_onnx=use_onnx, + custom_pipeline=custom_pipeline, + custom_revision=custom_revision, + variant=variant, + dduf_file=dduf_file, + load_connected_pipeline=load_connected_pipeline, + **kwargs, + ) + else: + cached_folder = pretrained_model_name_or_path + + # The variant filenames can have the legacy sharding checkpoint format that we check and throw + # a warning if detected. + if variant is not None and _check_legacy_sharding_variant_format(folder=cached_folder, variant=variant): + warn_msg = ( + f"Warning: The repository contains sharded checkpoints for variant '{variant}' maybe in a deprecated format. " + "Please check your files carefully:\n\n" + "- Correct format example: diffusion_pytorch_model.fp16-00003-of-00003.safetensors\n" + "- Deprecated format example: diffusion_pytorch_model-00001-of-00002.fp16.safetensors\n\n" + "If you find any files in the deprecated format:\n" + "1. Remove all existing checkpoint files for this variant.\n" + "2. Re-obtain the correct files by running `save_pretrained()`.\n\n" + "This will ensure you're using the most up-to-date and compatible checkpoint format." + ) + logger.warning(warn_msg) + + dduf_entries = None + if dduf_file: + dduf_file_path = os.path.join(cached_folder, dduf_file) + dduf_entries = read_dduf_file(dduf_file_path) + # The reader contains already all the files needed, no need to check it again + cached_folder = "" + + config_dict = cls.load_config(cached_folder, dduf_entries=dduf_entries) + + if dduf_file: + _maybe_raise_error_for_incorrect_transformers(config_dict) + + # pop out "_ignore_files" as it is only needed for download + config_dict.pop("_ignore_files", None) + + # 2. Define which model components should load variants + # We retrieve the information by matching whether variant model checkpoints exist in the subfolders. + # Example: `diffusion_pytorch_model.safetensors` -> `diffusion_pytorch_model.fp16.safetensors` + # with variant being `"fp16"`. + model_variants = _identify_model_variants(folder=cached_folder, variant=variant, config=config_dict) + if len(model_variants) == 0 and variant is not None: + error_message = f"You are trying to load the model files of the `variant={variant}`, but no such modeling files are available." + raise ValueError(error_message) + + # 3. Load the pipeline class, if using custom module then load it from the hub + # if we load from explicit class, let's use it + custom_pipeline, custom_class_name = _resolve_custom_pipeline_and_cls( + folder=cached_folder, config=config_dict, custom_pipeline=custom_pipeline + ) + pipeline_class = _get_pipeline_class( + cls, + config=config_dict, + load_connected_pipeline=load_connected_pipeline, + custom_pipeline=custom_pipeline, + class_name=custom_class_name, + cache_dir=cache_dir, + revision=custom_revision, + ) + + if device_map is not None and pipeline_class._load_connected_pipes: + raise NotImplementedError("`device_map` is not yet supported for connected pipelines.") + + # DEPRECATED: To be removed in 1.0.0 + # we are deprecating the `StableDiffusionInpaintPipelineLegacy` pipeline which gets loaded + # when a user requests for a `StableDiffusionInpaintPipeline` with `diffusers` version being <= 0.5.1. + _maybe_raise_warning_for_inpainting( + pipeline_class=pipeline_class, + pretrained_model_name_or_path=pretrained_model_name_or_path, + config=config_dict, + ) + + # 4. Define expected modules given pipeline signature + # and define non-None initialized modules (=`init_kwargs`) + + # some modules can be passed directly to the init + # in this case they are already instantiated in `kwargs` + # extract them here + expected_modules, optional_kwargs = cls._get_signature_keys(pipeline_class) + expected_types = pipeline_class._get_signature_types() + passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs} + passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs} + init_dict, unused_kwargs, _ = pipeline_class.extract_init_dict(config_dict, **kwargs) + + # define init kwargs and make sure that optional component modules are filtered out + init_kwargs = { + k: init_dict.pop(k) + for k in optional_kwargs + if k in init_dict and k not in pipeline_class._optional_components + } + init_kwargs = {**init_kwargs, **passed_pipe_kwargs} + + # remove `null` components + def load_module(name, value): + if value[0] is None: + return False + if name in passed_class_obj and passed_class_obj[name] is None: + return False + return True + + init_dict = {k: v for k, v in init_dict.items() if load_module(k, v)} + + # Special case: safety_checker must be loaded separately when using `from_flax` + if from_flax and "safety_checker" in init_dict and "safety_checker" not in passed_class_obj: + raise NotImplementedError( + "The safety checker cannot be automatically loaded when loading weights `from_flax`." + " Please, pass `safety_checker=None` to `from_pretrained`, and load the safety checker" + " separately if you need it." + ) + + # 5. Throw nice warnings / errors for fast accelerate loading + if len(unused_kwargs) > 0: + logger.warning( + f"Keyword arguments {unused_kwargs} are not expected by {pipeline_class.__name__} and will be ignored." + ) + + # import it here to avoid circular import + from diffusers import pipelines + + # 6. device map delegation + final_device_map = None + if device_map is not None: + final_device_map = _get_final_device_map( + device_map=device_map, + pipeline_class=pipeline_class, + passed_class_obj=passed_class_obj, + init_dict=init_dict, + library=library, + max_memory=max_memory, + torch_dtype=torch_dtype, + cached_folder=cached_folder, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + ) + + # 7. Load each module in the pipeline + current_device_map = None + _maybe_warn_for_wrong_component_in_quant_config(init_dict, quantization_config) + logging_tqdm_kwargs = {"desc": "Loading pipeline components..."} + if not is_torch_dist_rank_zero(): + logging_tqdm_kwargs["disable"] = True + + for name, (library_name, class_name) in logging.tqdm(init_dict.items(), **logging_tqdm_kwargs): + # 7.1 device_map shenanigans + if final_device_map is not None: + if isinstance(final_device_map, dict) and len(final_device_map) > 0: + component_device = final_device_map.get(name, None) + if component_device is not None: + current_device_map = {"": component_device} + else: + current_device_map = None + elif isinstance(final_device_map, str): + current_device_map = final_device_map + + # 7.2 - now that JAX/Flax is an official framework of the library, we might load from Flax names + class_name = class_name[4:] if class_name.startswith("Flax") else class_name + + # 7.3 Define all importable classes + is_pipeline_module = hasattr(pipelines, library_name) + importable_classes = ALL_IMPORTABLE_CLASSES + loaded_sub_model = None + + # 7.4 Use passed sub model or load class_name from library_name + if name in passed_class_obj: + # if the model is in a pipeline module, then we load it from the pipeline + # check that passed_class_obj has correct parent class + maybe_raise_or_warn( + library_name, library, class_name, importable_classes, passed_class_obj, name, is_pipeline_module + ) + + loaded_sub_model = passed_class_obj[name] + else: + # load sub model + sub_model_dtype = ( + torch_dtype.get(name, torch_dtype.get("default", torch.float32)) + if isinstance(torch_dtype, dict) + else torch_dtype + ) + loaded_sub_model = load_sub_model( + library_name=library_name, + class_name=class_name, + importable_classes=importable_classes, + pipelines=pipelines, + is_pipeline_module=is_pipeline_module, + pipeline_class=pipeline_class, + torch_dtype=sub_model_dtype, + provider=provider, + sess_options=sess_options, + device_map=current_device_map, + max_memory=max_memory, + offload_folder=offload_folder, + offload_state_dict=offload_state_dict, + model_variants=model_variants, + name=name, + from_flax=from_flax, + variant=variant, + low_cpu_mem_usage=low_cpu_mem_usage, + cached_folder=cached_folder, + use_safetensors=use_safetensors, + dduf_entries=dduf_entries, + provider_options=provider_options, + disable_mmap=disable_mmap, + quantization_config=quantization_config, + ) + logger.info( + f"Loaded {name} as {class_name} from `{name}` subfolder of {pretrained_model_name_or_path}." + ) + + init_kwargs[name] = loaded_sub_model # UNet(...), # DiffusionSchedule(...) + + # 8. Handle connected pipelines. + if pipeline_class._load_connected_pipes and os.path.isfile(os.path.join(cached_folder, "README.md")): + init_kwargs = _update_init_kwargs_with_connected_pipeline( + init_kwargs=init_kwargs, + passed_pipe_kwargs=passed_pipe_kwargs, + passed_class_objs=passed_class_obj, + folder=cached_folder, + **kwargs_copied, + ) + + # 9. Potentially add passed objects if expected + missing_modules = set(expected_modules) - set(init_kwargs.keys()) + passed_modules = list(passed_class_obj.keys()) + optional_modules = pipeline_class._optional_components + if len(missing_modules) > 0 and missing_modules <= set(passed_modules + optional_modules): + for module in missing_modules: + init_kwargs[module] = passed_class_obj.get(module, None) + elif len(missing_modules) > 0: + passed_modules = set(list(init_kwargs.keys()) + list(passed_class_obj.keys())) - set(optional_kwargs) + raise ValueError( + f"Pipeline {pipeline_class} expected {expected_modules}, but only {passed_modules} were passed." + ) + + # 10. Type checking init arguments + for kw, arg in init_kwargs.items(): + # Too complex to validate with type annotation alone + if "scheduler" in kw: + continue + # Many tokenizer annotations don't include its "Fast" variant, so skip this + # e.g T5Tokenizer but not T5TokenizerFast + elif "tokenizer" in kw: + continue + elif ( + arg is not None # Skip if None + and not expected_types[kw] == (inspect.Signature.empty,) # Skip if no type annotations + and not _is_valid_type(arg, expected_types[kw]) # Check type + ): + logger.warning(f"Expected types for {kw}: {expected_types[kw]}, got {_get_detailed_type(arg)}.") + + # 11. Instantiate the pipeline + model = pipeline_class(**init_kwargs) + + # 12. Save where the model was instantiated from + model.register_to_config(_name_or_path=pretrained_model_name_or_path) + if device_map is not None: + setattr(model, "hf_device_map", final_device_map) + if quantization_config is not None: + setattr(model, "quantization_config", quantization_config) + return model + + @property + def name_or_path(self) -> str: + return getattr(self.config, "_name_or_path", None) + + @property + def _execution_device(self): + r""" + Returns the device on which the pipeline's models will be executed. After calling + [`~DiffusionPipeline.enable_sequential_cpu_offload`] the execution device can only be inferred from + Accelerate's module hooks. + """ + from ..hooks.group_offloading import _get_group_onload_device + + # When apply group offloading at the leaf_level, we're in the same situation as accelerate's sequential + # offloading. We need to return the onload device of the group offloading hooks so that the intermediates + # required for computation (latents, prompt embeddings, etc.) can be created on the correct device. + for name, model in self.components.items(): + if not isinstance(model, torch.nn.Module): + continue + try: + return _get_group_onload_device(model) + except ValueError: + pass + + for name, model in self.components.items(): + if not isinstance(model, torch.nn.Module) or name in self._exclude_from_cpu_offload: + continue + + if not hasattr(model, "_hf_hook"): + return self.device + for module in model.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return self.device + + def remove_all_hooks(self): + r""" + Removes all hooks that were added when using `enable_sequential_cpu_offload` or `enable_model_cpu_offload`. + """ + for _, model in self.components.items(): + if isinstance(model, torch.nn.Module) and hasattr(model, "_hf_hook"): + accelerate.hooks.remove_hook_from_module(model, recurse=True) + self._all_hooks = [] + + def enable_model_cpu_offload(self, gpu_id: int | None = None, device: torch.device | str = None): + r""" + Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared + to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the accelerator when its + `forward` method is called, and the model remains in accelerator until the next model runs. Memory savings are + lower than with `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution + of the `unet`. + + Arguments: + gpu_id (`int`, *optional*): + The ID of the accelerator that shall be used in inference. If not specified, it will default to 0. + device (`torch.Device` or `str`, *optional*, defaults to None): + The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will + automatically detect the available accelerator and use. + """ + self._maybe_raise_error_if_group_offload_active(raise_error=True) + + is_pipeline_device_mapped = self._is_pipeline_device_mapped() + if is_pipeline_device_mapped: + raise ValueError( + "It seems like you have activated a device mapping strategy on the pipeline so calling `enable_model_cpu_offload() isn't allowed. You can call `reset_device_map()` first and then call `enable_model_cpu_offload()`." + ) + + if self.model_cpu_offload_seq is None: + raise ValueError( + "Model CPU offload cannot be enabled because no `model_cpu_offload_seq` class attribute is set." + ) + + if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"): + from accelerate import cpu_offload_with_hook + else: + raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.") + + self.remove_all_hooks() + + if device is None: + device = get_device() + if device == "cpu": + raise RuntimeError("`enable_model_cpu_offload` requires accelerator, but not found") + + torch_device = torch.device(device) + device_index = torch_device.index + + if gpu_id is not None and device_index is not None: + raise ValueError( + f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}" + f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}" + ) + + # _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0 + self._offload_gpu_id = gpu_id or torch_device.index or getattr(self, "_offload_gpu_id", 0) + + device_type = torch_device.type + device = torch.device(f"{device_type}:{self._offload_gpu_id}") + self._offload_device = device + + self.to("cpu", silence_dtype_warnings=True) + empty_device_cache(device.type) + + all_model_components = {k: v for k, v in self.components.items() if isinstance(v, torch.nn.Module)} + + self._all_hooks = [] + hook = None + for model_str in self.model_cpu_offload_seq.split("->"): + model = all_model_components.pop(model_str, None) + + if not isinstance(model, torch.nn.Module): + continue + + # This is because the model would already be placed on a CUDA device. + _, _, is_loaded_in_8bit_bnb = _check_bnb_status(model) + if is_loaded_in_8bit_bnb and ( + is_transformers_version("<", "4.58.0") or is_bitsandbytes_version("<", "0.48.0") + ): + logger.info( + f"Skipping the hook placement for the {model.__class__.__name__} as it is loaded in `bitsandbytes` 8bit." + ) + continue + + _, hook = cpu_offload_with_hook(model, device, prev_module_hook=hook) + self._all_hooks.append(hook) + + # CPU offload models that are not in the seq chain unless they are explicitly excluded + # these models will stay on CPU until maybe_free_model_hooks is called + # some models cannot be in the seq chain because they are iteratively called, such as controlnet + for name, model in all_model_components.items(): + if not isinstance(model, torch.nn.Module): + continue + + if name in self._exclude_from_cpu_offload: + model.to(device) + else: + _, hook = cpu_offload_with_hook(model, device) + self._all_hooks.append(hook) + + def maybe_free_model_hooks(self): + r""" + Method that performs the following: + - Offloads all components. + - Removes all model hooks that were added when using `enable_model_cpu_offload`, and then applies them again. + In case the model has not been offloaded, this function is a no-op. + - Resets stateful diffusers hooks of denoiser components if they were added with + [`~hooks.HookRegistry.register_hook`]. + + Make sure to add this function to the end of the `__call__` function of your pipeline so that it functions + correctly when applying `enable_model_cpu_offload`. + """ + for component in self.components.values(): + if hasattr(component, "_reset_stateful_cache"): + component._reset_stateful_cache() + + if not hasattr(self, "_all_hooks") or len(self._all_hooks) == 0: + # `enable_model_cpu_offload` has not be called, so silently do nothing + return + + # make sure the model is in the same state as before calling it + self.enable_model_cpu_offload(device=getattr(self, "_offload_device", "cuda")) + + def enable_sequential_cpu_offload(self, gpu_id: int | None = None, device: torch.device | str = None): + r""" + Offloads all models to CPU using 🤗 Accelerate, significantly reducing memory usage. When called, the state + dicts of all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are saved to CPU + and then moved to `torch.device('meta')` and loaded to accelerator only when their specific submodule has its + `forward` method called. Offloading happens on a submodule basis. Memory savings are higher than with + `enable_model_cpu_offload`, but performance is lower. + + Arguments: + gpu_id (`int`, *optional*): + The ID of the accelerator that shall be used in inference. If not specified, it will default to 0. + device (`torch.Device` or `str`, *optional*, defaults to None): + The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will + automatically detect the available accelerator and use. + """ + self._maybe_raise_error_if_group_offload_active(raise_error=True) + + if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"): + from accelerate import cpu_offload + else: + raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher") + self.remove_all_hooks() + + is_pipeline_device_mapped = self._is_pipeline_device_mapped() + if is_pipeline_device_mapped: + raise ValueError( + "It seems like you have activated a device mapping strategy on the pipeline so calling `enable_sequential_cpu_offload() isn't allowed. You can call `reset_device_map()` first and then call `enable_sequential_cpu_offload()`." + ) + + if device is None: + device = get_device() + if device == "cpu": + raise RuntimeError("`enable_sequential_cpu_offload` requires accelerator, but not found") + + torch_device = torch.device(device) + device_index = torch_device.index + + if gpu_id is not None and device_index is not None: + raise ValueError( + f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}" + f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}" + ) + + # _offload_gpu_id should be set to passed gpu_id (or id in passed `device`) or default to previously set id or default to 0 + self._offload_gpu_id = gpu_id or torch_device.index or getattr(self, "_offload_gpu_id", 0) + + device_type = torch_device.type + device = torch.device(f"{device_type}:{self._offload_gpu_id}") + self._offload_device = device + + if self.device.type != "cpu": + orig_device_type = self.device.type + self.to("cpu", silence_dtype_warnings=True) + empty_device_cache(orig_device_type) + + for name, model in self.components.items(): + if not isinstance(model, torch.nn.Module): + continue + + if name in self._exclude_from_cpu_offload: + model.to(device) + else: + # make sure to offload buffers if not all high level weights + # are of type nn.Module + offload_buffers = len(model._parameters) > 0 + cpu_offload(model, device, offload_buffers=offload_buffers) + + def enable_group_offload( + self, + onload_device: torch.device, + offload_device: torch.device = torch.device("cpu"), + offload_type: str = "block_level", + num_blocks_per_group: int | None = None, + non_blocking: bool = False, + use_stream: bool = False, + record_stream: bool = False, + low_cpu_mem_usage=False, + offload_to_disk_path: str | None = None, + exclude_modules: str | list[str] | None = None, + ) -> None: + r""" + Applies group offloading to the internal layers of a torch.nn.Module. To understand what group offloading is, + and where it is beneficial, we need to first provide some context on how other supported offloading methods + work. + + Typically, offloading is done at two levels: + - Module-level: In Diffusers, this can be enabled using the `ModelMixin::enable_model_cpu_offload()` method. It + works by offloading each component of a pipeline to the CPU for storage, and onloading to the accelerator + device when needed for computation. This method is more memory-efficient than keeping all components on the + accelerator, but the memory requirements are still quite high. For this method to work, one needs memory + equivalent to size of the model in runtime dtype + size of largest intermediate activation tensors to be able + to complete the forward pass. + - Leaf-level: In Diffusers, this can be enabled using the `ModelMixin::enable_sequential_cpu_offload()` method. + It + works by offloading the lowest leaf-level parameters of the computation graph to the CPU for storage, and + onloading only the leafs to the accelerator device for computation. This uses the lowest amount of accelerator + memory, but can be slower due to the excessive number of device synchronizations. + + Group offloading is a middle ground between the two methods. It works by offloading groups of internal layers, + (either `torch.nn.ModuleList` or `torch.nn.Sequential`). This method uses lower memory than module-level + offloading. It is also faster than leaf-level/sequential offloading, as the number of device synchronizations + is reduced. + + Another supported feature (for CUDA devices with support for asynchronous data transfer streams) is the ability + to overlap data transfer and computation to reduce the overall execution time compared to sequential + offloading. This is enabled using layer prefetching with streams, i.e., the layer that is to be executed next + starts onloading to the accelerator device while the current layer is being executed - this increases the + memory requirements slightly. Note that this implementation also supports leaf-level offloading but can be made + much faster when using streams. + + Args: + onload_device (`torch.device`): + The device to which the group of modules are onloaded. + offload_device (`torch.device`, defaults to `torch.device("cpu")`): + The device to which the group of modules are offloaded. This should typically be the CPU. Default is + CPU. + offload_type (`str` or `GroupOffloadingType`, defaults to "block_level"): + The type of offloading to be applied. Can be one of "block_level" or "leaf_level". Default is + "block_level". + offload_to_disk_path (`str`, *optional*, defaults to `None`): + The path to the directory where parameters will be offloaded. Setting this option can be useful in + limited RAM environment settings where a reasonable speed-memory trade-off is desired. + num_blocks_per_group (`int`, *optional*): + The number of blocks per group when using offload_type="block_level". This is required when using + offload_type="block_level". + non_blocking (`bool`, defaults to `False`): + If True, offloading and onloading is done with non-blocking data transfer. + use_stream (`bool`, defaults to `False`): + If True, offloading and onloading is done asynchronously using a CUDA stream. This can be useful for + overlapping computation and data transfer. + record_stream (`bool`, defaults to `False`): When enabled with `use_stream`, it marks the current tensor + as having been used by this stream. It is faster at the expense of slightly more memory usage. Refer to + the [PyTorch official docs](https://pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html) + more details. + low_cpu_mem_usage (`bool`, defaults to `False`): + If True, the CPU memory usage is minimized by pinning tensors on-the-fly instead of pre-pinning them. + This option only matters when using streamed CPU offloading (i.e. `use_stream=True`). This can be + useful when the CPU memory is a bottleneck but may counteract the benefits of using streams. + exclude_modules (`Union[str, List[str]]`, defaults to `None`): List of modules to exclude from offloading. + + Example: + ```python + >>> from diffusers import DiffusionPipeline + >>> import torch + + >>> pipe = DiffusionPipeline.from_pretrained("Qwen/Qwen-Image", torch_dtype=torch.bfloat16) + + >>> pipe.enable_group_offload( + ... onload_device=torch.device("cuda"), + ... offload_device=torch.device("cpu"), + ... offload_type="leaf_level", + ... use_stream=True, + ... ) + >>> image = pipe("a beautiful sunset").images[0] + ``` + """ + from ..hooks import apply_group_offloading + + if isinstance(exclude_modules, str): + exclude_modules = [exclude_modules] + elif exclude_modules is None: + exclude_modules = [] + + unknown = set(exclude_modules) - self.components.keys() + if unknown: + logger.info( + f"The following modules are not present in pipeline: {', '.join(unknown)}. Ignore if this is expected." + ) + + group_offload_kwargs = { + "onload_device": onload_device, + "offload_device": offload_device, + "offload_type": offload_type, + "num_blocks_per_group": num_blocks_per_group, + "non_blocking": non_blocking, + "use_stream": use_stream, + "record_stream": record_stream, + "low_cpu_mem_usage": low_cpu_mem_usage, + "offload_to_disk_path": offload_to_disk_path, + } + for name, component in self.components.items(): + if name not in exclude_modules and isinstance(component, torch.nn.Module): + if hasattr(component, "enable_group_offload"): + component.enable_group_offload(**group_offload_kwargs) + else: + apply_group_offloading(module=component, **group_offload_kwargs) + + if exclude_modules: + for module_name in exclude_modules: + module = getattr(self, module_name, None) + if module is not None and isinstance(module, torch.nn.Module): + module.to(onload_device) + logger.debug(f"Placed `{module_name}` on {onload_device} device as it was in `exclude_modules`.") + + def reset_device_map(self): + r""" + Resets the device maps (if any) to None. + """ + if self.hf_device_map is None: + return + else: + self.remove_all_hooks() + for name, component in self.components.items(): + if isinstance(component, torch.nn.Module): + component.to("cpu") + self.hf_device_map = None + + @classmethod + @validate_hf_hub_args + def download(cls, pretrained_model_name, **kwargs) -> str | os.PathLike: + r""" + Download and cache a PyTorch diffusion pipeline from pretrained pipeline weights. + + Parameters: + pretrained_model_name (`str` or `os.PathLike`, *optional*): + A string, the *repository id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained pipeline + hosted on the Hub. + custom_pipeline (`str`, *optional*): + Can be either: + + - A string, the *repository id* (for example `CompVis/ldm-text2im-large-256`) of a pretrained + pipeline hosted on the Hub. The repository must contain a file called `pipeline.py` that defines + the custom pipeline. + + - A string, the *file name* of a community pipeline hosted on GitHub under + [Community](https://github.com/huggingface/diffusers/tree/main/examples/community). Valid file + names must match the file name and not the pipeline script (`clip_guided_stable_diffusion` + instead of `clip_guided_stable_diffusion.py`). Community pipelines are always loaded from the + current `main` branch of GitHub. + + - A path to a *directory* (`./my_pipeline_directory/`) containing a custom pipeline. The directory + must contain a file called `pipeline.py` that defines the custom pipeline. + + > [!WARNING] > 🧪 This is an experimental feature and may change in the future. + + For more information on how to load and create custom pipelines, take a look at [How to contribute a + community pipeline](https://huggingface.co/docs/diffusers/main/en/using-diffusers/contribute_pipeline). + + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only (`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + custom_revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id similar to + `revision` when loading a custom pipeline from the Hub. It can be a 🤗 Diffusers version when loading a + custom pipeline from GitHub, otherwise it defaults to `"main"` when loading from the Hub. + mirror (`str`, *optional*): + Mirror source to resolve accessibility issues if you're downloading a model in China. We do not + guarantee the timeliness or safety of the source, and you should refer to the mirror site for more + information. + variant (`str`, *optional*): + Load weights from a specified variant filename such as `"fp16"` or `"ema"`. This is ignored when + loading `from_flax`. + dduf_file(`str`, *optional*): + Load weights from the specified DDUF file. + use_safetensors (`bool`, *optional*, defaults to `None`): + If set to `None`, the safetensors weights are downloaded if they're available **and** if the + safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors + weights. If set to `False`, safetensors weights are not loaded. + use_onnx (`bool`, *optional*, defaults to `False`): + If set to `True`, ONNX weights will always be downloaded if present. If set to `False`, ONNX weights + will never be downloaded. By default `use_onnx` defaults to the `_is_onnx` class attribute which is + `False` for non-ONNX pipelines and `True` for ONNX pipelines. ONNX weights include both files ending + with `.onnx` and `.pb`. + trust_remote_code (`bool`, *optional*, defaults to `False`): + Whether or not to allow for custom pipelines and components defined on the Hub in their own files. This + option should only be set to `True` for repositories you trust and in which you have read the code, as + it will execute code present on the Hub on your local machine. + + Returns: + `os.PathLike`: + A path to the downloaded pipeline. + + > [!TIP] > To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in + with `hf > auth login + + """ + cache_dir = kwargs.pop("cache_dir", None) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", None) + token = kwargs.pop("token", None) + revision = kwargs.pop("revision", None) + from_flax = kwargs.pop("from_flax", False) + custom_pipeline = kwargs.pop("custom_pipeline", None) + custom_revision = kwargs.pop("custom_revision", None) + variant = kwargs.pop("variant", None) + use_safetensors = kwargs.pop("use_safetensors", None) + use_onnx = kwargs.pop("use_onnx", None) + load_connected_pipeline = kwargs.pop("load_connected_pipeline", False) + trust_remote_code = kwargs.pop("trust_remote_code", False) + dduf_file: dict[str, DDUFEntry] | None = kwargs.pop("dduf_file", None) + + if dduf_file: + if custom_pipeline: + raise NotImplementedError("Custom pipelines are not supported with DDUF at the moment.") + if load_connected_pipeline: + raise NotImplementedError("Connected pipelines are not supported with DDUF at the moment.") + return _download_dduf_file( + pretrained_model_name=pretrained_model_name, + dduf_file=dduf_file, + pipeline_class_name=cls.__name__, + cache_dir=cache_dir, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + ) + + allow_pickle = True if (use_safetensors is None or use_safetensors is False) else False + use_safetensors = use_safetensors if use_safetensors is not None else True + + allow_patterns = None + ignore_patterns = None + + model_info_call_error: Exception | None = None + if not local_files_only: + try: + info = model_info(pretrained_model_name, token=token, revision=revision) + except (HfHubHTTPError, OfflineModeIsEnabled, requests.ConnectionError, httpx.NetworkError) as e: + logger.warning(f"Couldn't connect to the Hub: {e}.\nWill try to load from local cache.") + local_files_only = True + model_info_call_error = e # save error to reraise it if model is not cached locally + + if not local_files_only: + config_file = hf_hub_download( + pretrained_model_name, + cls.config_name, + cache_dir=cache_dir, + revision=revision, + proxies=proxies, + force_download=force_download, + token=token, + ) + config_dict = cls._dict_from_json_file(config_file) + ignore_filenames = config_dict.pop("_ignore_files", []) + + filenames = {sibling.rfilename for sibling in info.siblings} + if variant is not None and _check_legacy_sharding_variant_format(filenames=filenames, variant=variant): + warn_msg = ( + f"Warning: The repository contains sharded checkpoints for variant '{variant}' maybe in a deprecated format. " + "Please check your files carefully:\n\n" + "- Correct format example: diffusion_pytorch_model.fp16-00003-of-00003.safetensors\n" + "- Deprecated format example: diffusion_pytorch_model-00001-of-00002.fp16.safetensors\n\n" + "If you find any files in the deprecated format:\n" + "1. Remove all existing checkpoint files for this variant.\n" + "2. Re-obtain the correct files by running `save_pretrained()`.\n\n" + "This will ensure you're using the most up-to-date and compatible checkpoint format." + ) + logger.warning(warn_msg) + + filenames = set(filenames) - set(ignore_filenames) + if revision in DEPRECATED_REVISION_ARGS and version.parse( + version.parse(__version__).base_version + ) >= version.parse("0.22.0"): + warn_deprecated_model_variant(pretrained_model_name, token, variant, revision, filenames) + + custom_components, folder_names = _get_custom_components_and_folders( + pretrained_model_name, config_dict, filenames, variant + ) + custom_class_name = None + if custom_pipeline is None and isinstance(config_dict["_class_name"], (list, tuple)): + custom_pipeline = config_dict["_class_name"][0] + custom_class_name = config_dict["_class_name"][1] + + load_pipe_from_hub = custom_pipeline is not None and f"{custom_pipeline}.py" in filenames + load_components_from_hub = len(custom_components) > 0 + + if load_pipe_from_hub and not trust_remote_code: + raise ValueError( + f"The repository for {pretrained_model_name} contains custom code in {custom_pipeline}.py which must be executed to correctly " + f"load the model. You can inspect the repository content at https://hf.co/{pretrained_model_name}/blob/main/{custom_pipeline}.py.\n" + f"Please pass the argument `trust_remote_code=True` to allow custom code to be run." + ) + + if load_components_from_hub and not trust_remote_code: + raise ValueError( + f"The repository for {pretrained_model_name} contains custom code in {'.py, '.join([os.path.join(k, v) for k, v in custom_components.items()])} which must be executed to correctly " + f"load the model. You can inspect the repository content at {', '.join([f'https://hf.co/{pretrained_model_name}/{k}/{v}.py' for k, v in custom_components.items()])}.\n" + f"Please pass the argument `trust_remote_code=True` to allow custom code to be run." + ) + + # retrieve passed components that should not be downloaded + pipeline_class = _get_pipeline_class( + cls, + config_dict, + load_connected_pipeline=load_connected_pipeline, + custom_pipeline=custom_pipeline, + repo_id=pretrained_model_name if load_pipe_from_hub else None, + hub_revision=revision, + class_name=custom_class_name, + cache_dir=cache_dir, + revision=custom_revision, + ) + expected_components, _ = cls._get_signature_keys(pipeline_class) + passed_components = [k for k in expected_components if k in kwargs] + + # retrieve the names of the folders containing model weights + model_folder_names = { + os.path.split(f)[0] for f in filter_model_files(filenames) if os.path.split(f)[0] in folder_names + } + # retrieve all patterns that should not be downloaded and error out when needed + ignore_patterns = _get_ignore_patterns( + passed_components, + model_folder_names, + filenames, + use_safetensors, + from_flax, + allow_pickle, + use_onnx, + pipeline_class._is_onnx, + variant, + ) + + model_filenames, variant_filenames = variant_compatible_siblings( + filenames, variant=variant, ignore_patterns=ignore_patterns + ) + + # all filenames compatible with variant will be added + allow_patterns = list(model_filenames) + + # allow all patterns from non-model folders + # this enables downloading schedulers, tokenizers, ... + allow_patterns += [f"{k}/*" for k in folder_names if k not in model_folder_names] + # add custom component files + allow_patterns += [f"{k}/{f}.py" for k, f in custom_components.items()] + # add custom pipeline file + allow_patterns += [f"{custom_pipeline}.py"] if f"{custom_pipeline}.py" in filenames else [] + # also allow downloading config.json files with the model + allow_patterns += [os.path.join(k, "config.json") for k in model_folder_names] + allow_patterns += [ + SCHEDULER_CONFIG_NAME, + CONFIG_NAME, + cls.config_name, + CUSTOM_PIPELINE_FILE_NAME, + ] + + # Don't download any objects that are passed + allow_patterns = [ + p for p in allow_patterns if not (len(p.split("/")) == 2 and p.split("/")[0] in passed_components) + ] + + if pipeline_class._load_connected_pipes: + allow_patterns.append("README.md") + + # Don't download index files of forbidden patterns either + ignore_patterns = ignore_patterns + [f"{i}.index.*json" for i in ignore_patterns] + re_ignore_pattern = [re.compile(fnmatch.translate(p)) for p in ignore_patterns] + re_allow_pattern = [re.compile(fnmatch.translate(p)) for p in allow_patterns] + + expected_files = [f for f in filenames if not any(p.match(f) for p in re_ignore_pattern)] + expected_files = [f for f in expected_files if any(p.match(f) for p in re_allow_pattern)] + + snapshot_folder = Path(config_file).parent + pipeline_is_cached = all((snapshot_folder / f).is_file() for f in expected_files) + + if pipeline_is_cached and not force_download: + # if the pipeline is cached, we can directly return it + # else call snapshot_download + return snapshot_folder + + user_agent = {"pipeline_class": cls.__name__} + if custom_pipeline is not None and not custom_pipeline.endswith(".py"): + user_agent["custom_pipeline"] = custom_pipeline + + # download all allow_patterns - ignore_patterns + try: + cached_folder = snapshot_download( + pretrained_model_name, + cache_dir=cache_dir, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + user_agent=user_agent, + ) + + cls_name = cls.load_config(os.path.join(cached_folder, "model_index.json")).get("_class_name", None) + cls_name = cls_name[4:] if isinstance(cls_name, str) and cls_name.startswith("Flax") else cls_name + + diffusers_module = importlib.import_module(__name__.split(".")[0]) + pipeline_class = getattr(diffusers_module, cls_name, None) if isinstance(cls_name, str) else None + + if pipeline_class is not None and pipeline_class._load_connected_pipes: + modelcard = ModelCard.load(os.path.join(cached_folder, "README.md")) + connected_pipes = sum([getattr(modelcard.data, k, []) for k in CONNECTED_PIPES_KEYS], []) + for connected_pipe_repo_id in connected_pipes: + download_kwargs = { + "cache_dir": cache_dir, + "force_download": force_download, + "proxies": proxies, + "local_files_only": local_files_only, + "token": token, + "variant": variant, + "use_safetensors": use_safetensors, + } + DiffusionPipeline.download(connected_pipe_repo_id, **download_kwargs) + + return cached_folder + + except FileNotFoundError: + # Means we tried to load pipeline with `local_files_only=True` but the files have not been found in local cache. + # This can happen in two cases: + # 1. If the user passed `local_files_only=True` => we raise the error directly + # 2. If we forced `local_files_only=True` when `model_info` failed => we raise the initial error + if model_info_call_error is None: + # 1. user passed `local_files_only=True` + raise + else: + # 2. we forced `local_files_only=True` when `model_info` failed + raise EnvironmentError( + f"Cannot load model {pretrained_model_name}: model is not cached locally and an error occurred" + " while trying to fetch metadata from the Hub. Please check out the root cause in the stacktrace" + " above." + ) from model_info_call_error + + @classmethod + def _get_signature_keys(cls, obj): + parameters = inspect.signature(obj.__init__).parameters + required_parameters = {k: v for k, v in parameters.items() if v.default == inspect._empty} + optional_parameters = set({k for k, v in parameters.items() if v.default != inspect._empty}) + expected_modules = set(required_parameters.keys()) - {"self"} + + optional_names = list(optional_parameters) + for name in optional_names: + if name in cls._optional_components: + expected_modules.add(name) + optional_parameters.remove(name) + + return sorted(expected_modules), sorted(optional_parameters) + + @classmethod + def _get_signature_types(cls): + signature_types = {} + # Use get_type_hints to properly resolve string annotations (from __future__ import annotations) + try: + type_hints = get_type_hints(cls.__init__) + except Exception: + # Fallback to direct annotation access if get_type_hints fails + type_hints = {} + for k, v in inspect.signature(cls.__init__).parameters.items(): + if v.annotation != inspect.Parameter.empty: + type_hints[k] = v.annotation + + # Get all parameters from the signature to ensure we don't miss any + all_params = inspect.signature(cls.__init__).parameters + + for param_name, param in all_params.items(): + # Skip 'self' parameter + if param_name == "self": + continue + + # If we have type hints, use them + if param_name in type_hints: + annotation = type_hints[param_name] + if inspect.isclass(annotation): + signature_types[param_name] = (annotation,) + elif get_origin(annotation) == Union: + signature_types[param_name] = get_args(annotation) + elif isinstance(annotation, types.UnionType): + # Handle PEP 604 union syntax (X | Y) introduced in Python 3.10+ + signature_types[param_name] = get_args(annotation) + elif get_origin(annotation) in [List, Dict, list, dict]: + signature_types[param_name] = (annotation,) + else: + logger.warning(f"cannot get type annotation for Parameter {param_name} of {cls}.") + # Still add it with empty signature so it's in expected_types + signature_types[param_name] = (inspect.Signature.empty,) + else: + # No type annotation found - add with empty signature + signature_types[param_name] = (inspect.Signature.empty,) + + return signature_types + + @property + def parameters(self) -> dict[str, Any]: + r""" + The `self.parameters` property can be useful to run different pipelines with the same weights and + configurations without reallocating additional memory. + + Returns (`dict`): + A dictionary containing all the optional parameters needed to initialize the pipeline. + + Examples: + + ```py + >>> from diffusers import ( + ... StableDiffusionPipeline, + ... StableDiffusionImg2ImgPipeline, + ... StableDiffusionInpaintPipeline, + ... ) + + >>> text2img = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> img2img = StableDiffusionImg2ImgPipeline(**text2img.components, **text2img.parameters) + >>> inpaint = StableDiffusionInpaintPipeline(**text2img.components, **text2img.parameters) + ``` + """ + expected_modules, optional_parameters = self._get_signature_keys(self) + pipeline_parameters = { + k: self.config[k] for k in self.config.keys() if not k.startswith("_") and k in optional_parameters + } + + return pipeline_parameters + + @property + def components(self) -> dict[str, Any]: + r""" + The `self.components` property can be useful to run different pipelines with the same weights and + configurations without reallocating additional memory. + + Returns (`dict`): + A dictionary containing all the modules needed to initialize the pipeline. + + Examples: + + ```py + >>> from diffusers import ( + ... StableDiffusionPipeline, + ... StableDiffusionImg2ImgPipeline, + ... StableDiffusionInpaintPipeline, + ... ) + + >>> text2img = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> img2img = StableDiffusionImg2ImgPipeline(**text2img.components) + >>> inpaint = StableDiffusionInpaintPipeline(**text2img.components) + ``` + """ + expected_modules, optional_parameters = self._get_signature_keys(self) + components = { + k: getattr(self, k) for k in self.config.keys() if not k.startswith("_") and k not in optional_parameters + } + + actual = sorted(set(components.keys())) + expected = sorted(expected_modules) + if actual != expected: + raise ValueError( + f"{self} has been incorrectly initialized or {self.__class__} is incorrectly implemented. Expected" + f" {expected} to be defined, but {actual} are defined." + ) + + return components + + @staticmethod + def numpy_to_pil(images): + """ + Convert a NumPy image or a batch of images to a PIL image. + """ + return numpy_to_pil(images) + + @torch.compiler.disable + def progress_bar(self, iterable=None, total=None): + if not hasattr(self, "_progress_bar_config"): + self._progress_bar_config = {} + elif not isinstance(self._progress_bar_config, dict): + raise ValueError( + f"`self._progress_bar_config` should be of type `dict`, but is {type(self._progress_bar_config)}." + ) + + progress_bar_config = dict(self._progress_bar_config) + if "disable" not in progress_bar_config: + progress_bar_config["disable"] = not is_torch_dist_rank_zero() + + if iterable is not None: + return tqdm(iterable, **progress_bar_config) + elif total is not None: + return tqdm(total=total, **progress_bar_config) + else: + raise ValueError("Either `total` or `iterable` has to be defined.") + + def set_progress_bar_config(self, **kwargs): + self._progress_bar_config = kwargs + + def enable_xformers_memory_efficient_attention(self, attention_op: Callable | None = None): + r""" + Enable memory efficient attention from [xFormers](https://facebookresearch.github.io/xformers/). When this + option is enabled, you should observe lower GPU memory usage and a potential speed up during inference. Speed + up during training is not guaranteed. + + > [!WARNING] > ⚠️ When memory efficient attention and sliced attention are both enabled, memory efficient + attention takes > precedent. + + Parameters: + attention_op (`Callable`, *optional*): + Override the default `None` operator for use as `op` argument to the + [`memory_efficient_attention()`](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention) + function of xFormers. + + Examples: + + ```py + >>> import torch + >>> from diffusers import DiffusionPipeline + >>> from xformers.ops import MemoryEfficientAttentionFlashAttentionOp + + >>> pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16) + >>> pipe = pipe.to("cuda") + >>> pipe.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp) + >>> # Workaround for not accepting attention shape using VAE for Flash Attention + >>> pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None) + ``` + """ + self.set_use_memory_efficient_attention_xformers(True, attention_op) + + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention from [xFormers](https://facebookresearch.github.io/xformers/). + """ + self.set_use_memory_efficient_attention_xformers(False) + + def set_use_memory_efficient_attention_xformers(self, valid: bool, attention_op: Callable | None = None) -> None: + # Recursively walk through all the children. + # Any children which exposes the set_use_memory_efficient_attention_xformers method + # gets the message + def fn_recursive_set_mem_eff(module: torch.nn.Module): + if hasattr(module, "set_use_memory_efficient_attention_xformers"): + module.set_use_memory_efficient_attention_xformers(valid, attention_op) + + for child in module.children(): + fn_recursive_set_mem_eff(child) + + module_names, _ = self._get_signature_keys(self) + modules = [getattr(self, n, None) for n in module_names] + modules = [m for m in modules if isinstance(m, torch.nn.Module)] + + for module in modules: + fn_recursive_set_mem_eff(module) + + def enable_attention_slicing(self, slice_size: str | int = "auto"): + r""" + Enable sliced attention computation. When this option is enabled, the attention module splits the input tensor + in slices to compute attention in several steps. For more than one attention head, the computation is performed + sequentially over each head. This is useful to save some memory in exchange for a small speed decrease. + + > [!WARNING] > ⚠️ Don't enable attention slicing if you're already using `scaled_dot_product_attention` (SDPA) + from PyTorch > 2.0 or xFormers. These attention computations are already very memory efficient so you won't + need to enable > this function. If you enable attention slicing with SDPA or xFormers, it can lead to serious + slow downs! + + Args: + slice_size (`str` or `int`, *optional*, defaults to `"auto"`): + When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If + `"max"`, maximum amount of memory will be saved by running only one slice at a time. If a number is + provided, uses as many slices as `attention_head_dim // slice_size`. In this case, `attention_head_dim` + must be a multiple of `slice_size`. + + Examples: + + ```py + >>> import torch + >>> from diffusers import StableDiffusionPipeline + + >>> pipe = StableDiffusionPipeline.from_pretrained( + ... "stable-diffusion-v1-5/stable-diffusion-v1-5", + ... torch_dtype=torch.float16, + ... use_safetensors=True, + ... ) + + >>> prompt = "a photo of an astronaut riding a horse on mars" + >>> pipe.enable_attention_slicing() + >>> image = pipe(prompt).images[0] + ``` + """ + self.set_attention_slice(slice_size) + + def disable_attention_slicing(self): + r""" + Disable sliced attention computation. If `enable_attention_slicing` was previously called, attention is + computed in one step. + """ + # set slice_size = `None` to disable `attention slicing` + self.enable_attention_slicing(None) + + def set_attention_slice(self, slice_size: int | None): + module_names, _ = self._get_signature_keys(self) + modules = [getattr(self, n, None) for n in module_names] + modules = [m for m in modules if isinstance(m, torch.nn.Module) and hasattr(m, "set_attention_slice")] + + for module in modules: + module.set_attention_slice(slice_size) + + @classmethod + def from_pipe(cls, pipeline, **kwargs): + r""" + Create a new pipeline from a given pipeline. This method is useful to create a new pipeline from the existing + pipeline components without reallocating additional memory. + + Arguments: + pipeline (`DiffusionPipeline`): + The pipeline from which to create a new pipeline. + + Returns: + `DiffusionPipeline`: + A new pipeline with the same weights and configurations as `pipeline`. + + Examples: + + ```py + >>> from diffusers import StableDiffusionPipeline, StableDiffusionSAGPipeline + + >>> pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") + >>> new_pipe = StableDiffusionSAGPipeline.from_pipe(pipe) + ``` + """ + + original_config = dict(pipeline.config) + torch_dtype = kwargs.pop("torch_dtype", torch.float32) + + # derive the pipeline class to instantiate + custom_pipeline = kwargs.pop("custom_pipeline", None) + custom_revision = kwargs.pop("custom_revision", None) + + if custom_pipeline is not None: + pipeline_class = _get_custom_pipeline_class(custom_pipeline, revision=custom_revision) + else: + pipeline_class = cls + + expected_modules, optional_kwargs = cls._get_signature_keys(pipeline_class) + # true_optional_modules are optional components with default value in signature so it is ok not to pass them to `__init__` + # e.g. `image_encoder` for StableDiffusionPipeline + parameters = inspect.signature(cls.__init__).parameters + true_optional_modules = set( + {k for k, v in parameters.items() if v.default != inspect._empty and k in expected_modules} + ) + + # get the class of each component based on its type hint + # e.g. {"unet": UNet2DConditionModel, "text_encoder": CLIPTextMode} + component_types = pipeline_class._get_signature_types() + + pretrained_model_name_or_path = original_config.pop("_name_or_path", None) + # allow users pass modules in `kwargs` to override the original pipeline's components + passed_class_obj = {k: kwargs.pop(k) for k in expected_modules if k in kwargs} + + original_class_obj = {} + for name, component in pipeline.components.items(): + if name in expected_modules and name not in passed_class_obj: + # for model components, we will not switch over if the class does not matches the type hint in the new pipeline's signature + if ( + not isinstance(component, ModelMixin) + or type(component) in component_types[name] + or (component is None and name in cls._optional_components) + ): + original_class_obj[name] = component + else: + logger.warning( + f"component {name} is not switched over to new pipeline because type does not match the expected." + f" {name} is {type(component)} while the new pipeline expect {component_types[name]}." + f" please pass the component of the correct type to the new pipeline. `from_pipe(..., {name}={name})`" + ) + + # allow users pass optional kwargs to override the original pipelines config attribute + passed_pipe_kwargs = {k: kwargs.pop(k) for k in optional_kwargs if k in kwargs} + original_pipe_kwargs = { + k: original_config[k] + for k in original_config.keys() + if k in optional_kwargs and k not in passed_pipe_kwargs + } + + # config attribute that were not expected by pipeline is stored as its private attribute + # (i.e. when the original pipeline was also instantiated with `from_pipe` from another pipeline that has this config) + # in this case, we will pass them as optional arguments if they can be accepted by the new pipeline + additional_pipe_kwargs = [ + k[1:] + for k in original_config.keys() + if k.startswith("_") and k[1:] in optional_kwargs and k[1:] not in passed_pipe_kwargs + ] + for k in additional_pipe_kwargs: + original_pipe_kwargs[k] = original_config.pop(f"_{k}") + + pipeline_kwargs = { + **passed_class_obj, + **original_class_obj, + **passed_pipe_kwargs, + **original_pipe_kwargs, + **kwargs, + } + + # store unused config as private attribute in the new pipeline + unused_original_config = { + f"{'' if k.startswith('_') else '_'}{k}": v for k, v in original_config.items() if k not in pipeline_kwargs + } + + optional_components = ( + pipeline._optional_components + if hasattr(pipeline, "_optional_components") and pipeline._optional_components + else [] + ) + missing_modules = ( + set(expected_modules) - set(optional_components) - set(pipeline_kwargs.keys()) - set(true_optional_modules) + ) + + if len(missing_modules) > 0: + raise ValueError( + f"Pipeline {pipeline_class} expected {expected_modules}, but only {set(list(passed_class_obj.keys()) + list(original_class_obj.keys()))} were passed" + ) + + new_pipeline = pipeline_class(**pipeline_kwargs) + if pretrained_model_name_or_path is not None: + new_pipeline.register_to_config(_name_or_path=pretrained_model_name_or_path) + new_pipeline.register_to_config(**unused_original_config) + + if torch_dtype is not None: + new_pipeline.to(dtype=torch_dtype) + + return new_pipeline + + def _maybe_raise_error_if_group_offload_active( + self, raise_error: bool = False, module: torch.nn.Module | None = None + ) -> bool: + from ..hooks.group_offloading import _is_group_offload_enabled + + components = self.components.values() if module is None else [module] + components = [component for component in components if isinstance(component, torch.nn.Module)] + for component in components: + if _is_group_offload_enabled(component): + if raise_error: + raise ValueError( + "You are trying to apply model/sequential CPU offloading to a pipeline that contains components " + "with group offloading enabled. This is not supported. Please disable group offloading for " + "components of the pipeline to use other offloading methods." + ) + return True + return False + + def _is_pipeline_device_mapped(self): + # We support passing `device_map="cuda"`, for example. This is helpful, in case + # users want to pass `device_map="cpu"` when initializing a pipeline. This explicit declaration is desirable + # in limited VRAM environments because quantized models often initialize directly on the accelerator. + device_map = self.hf_device_map + is_device_type_map = False + if isinstance(device_map, str): + try: + torch.device(device_map) + is_device_type_map = True + except RuntimeError: + pass + + return not is_device_type_map and isinstance(device_map, dict) and len(device_map) > 1 + + +class StableDiffusionMixin: + r""" + Helper for DiffusionPipeline with vae and unet.(mainly for LDM such as stable diffusion) + """ + + def enable_vae_slicing(self): + r""" + Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to + compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. + """ + depr_message = f"Calling `enable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`." + deprecate( + "enable_vae_slicing", + "0.40.0", + depr_message, + ) + self.vae.enable_slicing() + + def disable_vae_slicing(self): + r""" + Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to + computing decoding in one step. + """ + depr_message = f"Calling `disable_vae_slicing()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`." + deprecate( + "disable_vae_slicing", + "0.40.0", + depr_message, + ) + self.vae.disable_slicing() + + def enable_vae_tiling(self): + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + """ + depr_message = f"Calling `enable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`." + deprecate( + "enable_vae_tiling", + "0.40.0", + depr_message, + ) + self.vae.enable_tiling() + + def disable_vae_tiling(self): + r""" + Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to + computing decoding in one step. + """ + depr_message = f"Calling `disable_vae_tiling()` on a `{self.__class__.__name__}` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`." + deprecate( + "disable_vae_tiling", + "0.40.0", + depr_message, + ) + self.vae.disable_tiling() + + def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): + r"""Enables the FreeU mechanism as in https://huggingface.co/papers/2309.11497. + + The suffixes after the scaling factors represent the stages where they are being applied. + + Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values + that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. + + Args: + s1 (`float`): + Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to + mitigate "oversmoothing effect" in the enhanced denoising process. + s2 (`float`): + Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to + mitigate "oversmoothing effect" in the enhanced denoising process. + b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. + b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. + """ + if not hasattr(self, "unet"): + raise ValueError("The pipeline must have `unet` for using FreeU.") + self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) + + def disable_freeu(self): + """Disables the FreeU mechanism if enabled.""" + self.unet.disable_freeu() + + def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): + """ + Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, key, value) + are fused. For cross-attention modules, key and value projection matrices are fused. + + > [!WARNING] > This API is 🧪 experimental. + + Args: + unet (`bool`, defaults to `True`): To apply fusion on the UNet. + vae (`bool`, defaults to `True`): To apply fusion on the VAE. + """ + self.fusing_unet = False + self.fusing_vae = False + + if unet: + self.fusing_unet = True + self.unet.fuse_qkv_projections() + self.unet.set_attn_processor(FusedAttnProcessor2_0()) + + if vae: + if not isinstance(self.vae, AutoencoderKL): + raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") + + self.fusing_vae = True + self.vae.fuse_qkv_projections() + self.vae.set_attn_processor(FusedAttnProcessor2_0()) + + def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): + """Disable QKV projection fusion if enabled. + + > [!WARNING] > This API is 🧪 experimental. + + Args: + unet (`bool`, defaults to `True`): To apply fusion on the UNet. + vae (`bool`, defaults to `True`): To apply fusion on the VAE. + + """ + if unet: + if not self.fusing_unet: + logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") + else: + self.unet.unfuse_qkv_projections() + self.fusing_unet = False + + if vae: + if not self.fusing_vae: + logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") + else: + self.vae.unfuse_qkv_projections() + self.fusing_vae = False diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/transformers_loading_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/transformers_loading_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..02f8ee612601ff42887fbe0cc19778665c9660aa --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/pipelines/transformers_loading_utils.py @@ -0,0 +1,123 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import os +import tempfile +from typing import TYPE_CHECKING + +from huggingface_hub import DDUFEntry +from tqdm import tqdm + +from ..utils import is_safetensors_available, is_transformers_available, is_transformers_version + + +if TYPE_CHECKING: + from transformers import PreTrainedModel, PreTrainedTokenizer + +if is_transformers_available(): + from transformers import PreTrainedModel, PreTrainedTokenizer + +if is_safetensors_available(): + import safetensors.torch + + +def _load_tokenizer_from_dduf( + cls: "PreTrainedTokenizer", name: str, dduf_entries: dict[str, DDUFEntry], **kwargs +) -> "PreTrainedTokenizer": + """ + Load a tokenizer from a DDUF archive. + + In practice, `transformers` do not provide a way to load a tokenizer from a DDUF archive. This function is a + workaround by extracting the tokenizer files from the DDUF archive and loading the tokenizer from the extracted + files. There is an extra cost of extracting the files, but of limited impact as the tokenizer files are usually + small-ish. + """ + with tempfile.TemporaryDirectory() as tmp_dir: + for entry_name, entry in dduf_entries.items(): + if entry_name.startswith(name + "/"): + tmp_entry_path = os.path.join(tmp_dir, *entry_name.split("/")) + # need to create intermediary directory if they don't exist + os.makedirs(os.path.dirname(tmp_entry_path), exist_ok=True) + with open(tmp_entry_path, "wb") as f: + with entry.as_mmap() as mm: + f.write(mm) + return cls.from_pretrained(os.path.dirname(tmp_entry_path), **kwargs) + + +def _load_transformers_model_from_dduf( + cls: "PreTrainedModel", name: str, dduf_entries: dict[str, DDUFEntry], **kwargs +) -> "PreTrainedModel": + """ + Load a transformers model from a DDUF archive. + + In practice, `transformers` do not provide a way to load a model from a DDUF archive. This function is a workaround + by instantiating a model from the config file and loading the weights from the DDUF archive directly. + """ + config_file = dduf_entries.get(f"{name}/config.json") + if config_file is None: + raise EnvironmentError( + f"Could not find a config.json file for component {name} in DDUF file (contains {dduf_entries.keys()})." + ) + generation_config = dduf_entries.get(f"{name}/generation_config.json", None) + + weight_files = [ + entry + for entry_name, entry in dduf_entries.items() + if entry_name.startswith(f"{name}/") and entry_name.endswith(".safetensors") + ] + if not weight_files: + raise EnvironmentError( + f"Could not find any weight file for component {name} in DDUF file (contains {dduf_entries.keys()})." + ) + if not is_safetensors_available(): + raise EnvironmentError( + "Safetensors is not available, cannot load model from DDUF. Please `pip install safetensors`." + ) + if is_transformers_version("<", "4.47.0"): + raise ImportError( + "You need to install `transformers>4.47.0` in order to load a transformers model from a DDUF file. " + "You can install it with: `pip install --upgrade transformers`" + ) + + with tempfile.TemporaryDirectory() as tmp_dir: + from transformers import AutoConfig, GenerationConfig + + tmp_config_file = os.path.join(tmp_dir, "config.json") + with open(tmp_config_file, "w") as f: + f.write(config_file.read_text()) + config = AutoConfig.from_pretrained(tmp_config_file) + if generation_config is not None: + tmp_generation_config_file = os.path.join(tmp_dir, "generation_config.json") + with open(tmp_generation_config_file, "w") as f: + f.write(generation_config.read_text()) + generation_config = GenerationConfig.from_pretrained(tmp_generation_config_file) + state_dict = {} + with contextlib.ExitStack() as stack: + for entry in tqdm(weight_files, desc="Loading state_dict"): # Loop over safetensors files + # Memory-map the safetensors file + mmap = stack.enter_context(entry.as_mmap()) + # Load tensors from the memory-mapped file + tensors = safetensors.torch.load(mmap) + # Update the state dictionary with tensors + state_dict.update(tensors) + # `from_pretrained` sets the model to eval mode by default, which is the + # correct behavior for inference. Do not call `model.train()` here. + return cls.from_pretrained( + pretrained_model_name_or_path=None, + config=config, + generation_config=generation_config, + state_dict=state_dict, + **kwargs, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ca867c12908e9604b27fd384d924476ac8ba8d9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .auto import DiffusersAutoQuantizer +from .base import DiffusersQuantizer +from .pipe_quant_config import PipelineQuantizationConfig diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e8a62e9d53d0eda6acf1e03aed56cc2b0c1b23a8 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/auto.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/auto.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0182958c34b2df3147841e50dd8f45c87c1a433e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/auto.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/base.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/base.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8d0e7a42e672a982c0d20e607bd45da8d61ffba Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/base.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/pipe_quant_config.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/pipe_quant_config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..48965583acab8af7e24ba2ea4112b4d0ae8e825a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/pipe_quant_config.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/quantization_config.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/quantization_config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b70cb85757a7556c49b93bffd0c436386faa0b63 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/__pycache__/quantization_config.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/auto.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/auto.py new file mode 100644 index 0000000000000000000000000000000000000000..6cd24c459c9d5487eb270148beaed29be55a9697 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/auto.py @@ -0,0 +1,149 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from +https://github.com/huggingface/transformers/blob/c409cd81777fb27aadc043ed3d8339dbc020fb3b/src/transformers/quantizers/auto.py +""" + +import warnings + +from .bitsandbytes import BnB4BitDiffusersQuantizer, BnB8BitDiffusersQuantizer +from .gguf import GGUFQuantizer +from .modelopt import NVIDIAModelOptQuantizer +from .quantization_config import ( + BitsAndBytesConfig, + GGUFQuantizationConfig, + NVIDIAModelOptConfig, + QuantizationConfigMixin, + QuantizationMethod, + QuantoConfig, + TorchAoConfig, +) +from .quanto import QuantoQuantizer +from .torchao import TorchAoHfQuantizer + + +AUTO_QUANTIZER_MAPPING = { + "bitsandbytes_4bit": BnB4BitDiffusersQuantizer, + "bitsandbytes_8bit": BnB8BitDiffusersQuantizer, + "gguf": GGUFQuantizer, + "quanto": QuantoQuantizer, + "torchao": TorchAoHfQuantizer, + "modelopt": NVIDIAModelOptQuantizer, +} + +AUTO_QUANTIZATION_CONFIG_MAPPING = { + "bitsandbytes_4bit": BitsAndBytesConfig, + "bitsandbytes_8bit": BitsAndBytesConfig, + "gguf": GGUFQuantizationConfig, + "quanto": QuantoConfig, + "torchao": TorchAoConfig, + "modelopt": NVIDIAModelOptConfig, +} + + +class DiffusersAutoQuantizer: + """ + The auto diffusers quantizer class that takes care of automatically instantiating to the correct + `DiffusersQuantizer` given the `QuantizationConfig`. + """ + + @classmethod + def from_dict(cls, quantization_config_dict: dict): + quant_method = quantization_config_dict.get("quant_method", None) + # We need a special care for bnb models to make sure everything is BC .. + if quantization_config_dict.get("load_in_8bit", False) or quantization_config_dict.get("load_in_4bit", False): + suffix = "_4bit" if quantization_config_dict.get("load_in_4bit", False) else "_8bit" + quant_method = QuantizationMethod.BITS_AND_BYTES + suffix + elif quant_method is None: + raise ValueError( + "The model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantized" + ) + + if quant_method not in AUTO_QUANTIZATION_CONFIG_MAPPING.keys(): + raise ValueError( + f"Unknown quantization type, got {quant_method} - supported types are:" + f" {list(AUTO_QUANTIZER_MAPPING.keys())}" + ) + + target_cls = AUTO_QUANTIZATION_CONFIG_MAPPING[quant_method] + return target_cls.from_dict(quantization_config_dict) + + @classmethod + def from_config(cls, quantization_config: QuantizationConfigMixin | dict, **kwargs): + # Convert it to a QuantizationConfig if the q_config is a dict + if isinstance(quantization_config, dict): + quantization_config = cls.from_dict(quantization_config) + + quant_method = quantization_config.quant_method + + # Again, we need a special care for bnb as we have a single quantization config + # class for both 4-bit and 8-bit quantization + if quant_method == QuantizationMethod.BITS_AND_BYTES: + if quantization_config.load_in_8bit: + quant_method += "_8bit" + else: + quant_method += "_4bit" + + if quant_method not in AUTO_QUANTIZER_MAPPING.keys(): + raise ValueError( + f"Unknown quantization type, got {quant_method} - supported types are:" + f" {list(AUTO_QUANTIZER_MAPPING.keys())}" + ) + + target_cls = AUTO_QUANTIZER_MAPPING[quant_method] + return target_cls(quantization_config, **kwargs) + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): + model_config = cls.load_config(pretrained_model_name_or_path, **kwargs) + if getattr(model_config, "quantization_config", None) is None: + raise ValueError( + f"Did not found a `quantization_config` in {pretrained_model_name_or_path}. Make sure that the model is correctly quantized." + ) + quantization_config_dict = model_config.quantization_config + quantization_config = cls.from_dict(quantization_config_dict) + # Update with potential kwargs that are passed through from_pretrained. + quantization_config.update(kwargs) + + return cls.from_config(quantization_config) + + @classmethod + def merge_quantization_configs( + cls, + quantization_config: dict | QuantizationConfigMixin, + quantization_config_from_args: QuantizationConfigMixin | None, + ): + """ + handles situations where both quantization_config from args and quantization_config from model config are + present. + """ + if quantization_config_from_args is not None: + warning_msg = ( + "You passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading" + " already has a `quantization_config` attribute. The `quantization_config` from the model will be used." + ) + else: + warning_msg = "" + + if isinstance(quantization_config, dict): + quantization_config = cls.from_dict(quantization_config) + + if isinstance(quantization_config, NVIDIAModelOptConfig): + quantization_config.check_model_patching() + + if warning_msg != "": + warnings.warn(warning_msg) + + return quantization_config diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/base.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..b0988284b64843168b80f0778879239e54e6d336 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/base.py @@ -0,0 +1,245 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Adapted from +https://github.com/huggingface/transformers/blob/52cb4034ada381fe1ffe8d428a1076e5411a8026/src/transformers/quantizers/base.py +""" + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any + +from ..utils import is_torch_available +from .quantization_config import QuantizationConfigMixin + + +if TYPE_CHECKING: + from ..models.modeling_utils import ModelMixin + +if is_torch_available(): + import torch + + +class DiffusersQuantizer(ABC): + """ + Abstract class of the HuggingFace quantizer. Supports for now quantizing HF diffusers models for inference and/or + quantization. This class is used only for diffusers.models.modeling_utils.ModelMixin.from_pretrained and cannot be + easily used outside the scope of that method yet. + + Attributes + quantization_config (`diffusers.quantizers.quantization_config.QuantizationConfigMixin`): + The quantization config that defines the quantization parameters of your model that you want to quantize. + modules_to_not_convert (`list[str]`, *optional*): + The list of module names to not convert when quantizing the model. + required_packages (`list[str]`, *optional*): + The list of required pip packages to install prior to using the quantizer + requires_calibration (`bool`): + Whether the quantization method requires to calibrate the model before using it. + """ + + requires_calibration = False + required_packages = None + + def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs): + self.quantization_config = quantization_config + + # -- Handle extra kwargs below -- + self.modules_to_not_convert = kwargs.pop("modules_to_not_convert", []) + self.pre_quantized = kwargs.pop("pre_quantized", True) + + if not self.pre_quantized and self.requires_calibration: + raise ValueError( + f"The quantization method {quantization_config.quant_method} does require the model to be pre-quantized." + f" You explicitly passed `pre_quantized=False` meaning your model weights are not quantized. Make sure to " + f"pass `pre_quantized=True` while knowing what you are doing." + ) + + def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype": + """ + Some quantization methods require to explicitly set the dtype of the model to a target dtype. You need to + override this method in case you want to make sure that behavior is preserved + + Args: + torch_dtype (`torch.dtype`): + The input dtype that is passed in `from_pretrained` + """ + return torch_dtype + + def update_device_map(self, device_map: dict[str, Any] | None) -> dict[str, Any] | None: + """ + Override this method if you want to pass a override the existing device map with a new one. E.g. for + bitsandbytes, since `accelerate` is a hard requirement, if no device_map is passed, the device_map is set to + `"auto"`` + + Args: + device_map (`dict | str`, *optional*): + The device_map that is passed through the `from_pretrained` method. + """ + return device_map + + def adjust_target_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype": + """ + Override this method if you want to adjust the `target_dtype` variable used in `from_pretrained` to compute the + device_map in case the device_map is a `str`. E.g. for bitsandbytes we force-set `target_dtype` to `torch.int8` + and for 4-bit we pass a custom enum `accelerate.CustomDtype.int4`. + + Args: + torch_dtype (`torch.dtype`, *optional*): + The torch_dtype that is used to compute the device_map. + """ + return torch_dtype + + def update_missing_keys(self, model, missing_keys: list[str], prefix: str) -> list[str]: + """ + Override this method if you want to adjust the `missing_keys`. + + Args: + missing_keys (`list[str]`, *optional*): + The list of missing keys in the checkpoint compared to the state dict of the model + """ + return missing_keys + + def get_special_dtypes_update(self, model, torch_dtype: "torch.dtype") -> dict[str, "torch.dtype"]: + """ + returns dtypes for modules that are not quantized - used for the computation of the device_map in case one + passes a str as a device_map. The method will use the `modules_to_not_convert` that is modified in + `_process_model_before_weight_loading`. `diffusers` models don't have any `modules_to_not_convert` attributes + yet but this can change soon in the future. + + Args: + model (`~diffusers.models.modeling_utils.ModelMixin`): + The model to quantize + torch_dtype (`torch.dtype`): + The dtype passed in `from_pretrained` method. + """ + + return { + name: torch_dtype + for name, _ in model.named_parameters() + if any(m in name for m in self.modules_to_not_convert) + } + + def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]: + """adjust max_memory argument for infer_auto_device_map() if extra memory is needed for quantization""" + return max_memory + + def check_if_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + state_dict: dict[str, Any], + **kwargs, + ) -> bool: + """ + checks if a loaded state_dict component is part of quantized param + some validation; only defined for + quantization methods that require to create a new parameters for quantization. + """ + return False + + def create_quantized_param(self, *args, **kwargs) -> "torch.nn.Parameter": + """ + takes needed components from state_dict and creates quantized param. + """ + return + + def check_quantized_param_shape(self, *args, **kwargs): + """ + checks if the quantized param has expected shape. + """ + return True + + def validate_environment(self, *args, **kwargs): + """ + This method is used to potentially check for potential conflicts with arguments that are passed in + `from_pretrained`. You need to define it for all future quantizers that are integrated with diffusers. If no + explicit check are needed, simply return nothing. + """ + return + + def preprocess_model(self, model: "ModelMixin", **kwargs): + """ + Setting model attributes and/or converting model before weights loading. At this point the model should be + initialized on the meta device so you can freely manipulate the skeleton of the model in order to replace + modules in-place. Make sure to override the abstract method `_process_model_before_weight_loading`. + + Args: + model (`~diffusers.models.modeling_utils.ModelMixin`): + The model to quantize + kwargs (`dict`, *optional*): + The keyword arguments that are passed along `_process_model_before_weight_loading`. + """ + model.is_quantized = True + model.quantization_method = self.quantization_config.quant_method + return self._process_model_before_weight_loading(model, **kwargs) + + def postprocess_model(self, model: "ModelMixin", **kwargs): + """ + Post-process the model post weights loading. Make sure to override the abstract method + `_process_model_after_weight_loading`. + + Args: + model (`~diffusers.models.modeling_utils.ModelMixin`): + The model to quantize + kwargs (`dict`, *optional*): + The keyword arguments that are passed along `_process_model_after_weight_loading`. + """ + return self._process_model_after_weight_loading(model, **kwargs) + + def dequantize(self, model): + """ + Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance. Note + not all quantization schemes support this. + """ + model = self._dequantize(model) + + # Delete quantizer and quantization config + del model.hf_quantizer + + return model + + def get_cuda_warm_up_factor(self): + """ + The factor to be used in `caching_allocator_warmup` to get the number of bytes to pre-allocate to warm up cuda. + A factor of 2 means we allocate all bytes in the empty model (since we allocate in fp16), a factor of 4 means + we allocate half the memory of the weights residing in the empty model, etc... + """ + # By default we return 4, i.e. half the model size (this corresponds to the case where the model is not + # really pre-processed, i.e. we do not have the info that weights are going to be 8 bits before actual + # weight loading) + return 4 + + def _dequantize(self, model): + raise NotImplementedError( + f"{self.quantization_config.quant_method} has no implementation of `dequantize`, please raise an issue on GitHub." + ) + + @abstractmethod + def _process_model_before_weight_loading(self, model, **kwargs): ... + + @abstractmethod + def _process_model_after_weight_loading(self, model, **kwargs): ... + + @property + @abstractmethod + def is_serializable(self): ... + + @property + @abstractmethod + def is_trainable(self): ... + + @property + def is_compileable(self) -> bool: + """Flag indicating whether the quantized model can be compiled""" + return False diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9e745bc810faad76608b00ab9c493369466d0970 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__init__.py @@ -0,0 +1,2 @@ +from .bnb_quantizer import BnB4BitDiffusersQuantizer, BnB8BitDiffusersQuantizer +from .utils import dequantize_and_replace, dequantize_bnb_weight, replace_with_bnb_linear diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2c41bb90942c9a8a6567b36b401c82ce2011dad Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/bnb_quantizer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/bnb_quantizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b15e28a005ed852b0ccde6bbe8afde5a521f943 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/bnb_quantizer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0809bc9cb6c5becc2546700cc3fff2d2e928267c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/__pycache__/utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/bnb_quantizer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/bnb_quantizer.py new file mode 100644 index 0000000000000000000000000000000000000000..16ff0d83b8c4311c72003c97c939bf42262610c6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/bnb_quantizer.py @@ -0,0 +1,577 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from +https://github.com/huggingface/transformers/blob/c409cd81777fb27aadc043ed3d8339dbc020fb3b/src/transformers/quantizers/quantizer_bnb_4bit.py +""" + +from typing import TYPE_CHECKING, Any + +from ...utils import get_module_from_name +from ..base import DiffusersQuantizer + + +if TYPE_CHECKING: + from ...models.modeling_utils import ModelMixin + +from ...utils import ( + is_accelerate_available, + is_accelerate_version, + is_bitsandbytes_available, + is_bitsandbytes_version, + is_torch_available, + logging, +) + + +if is_torch_available(): + import torch + +logger = logging.get_logger(__name__) + + +class BnB4BitDiffusersQuantizer(DiffusersQuantizer): + """ + 4-bit quantization from bitsandbytes.py quantization method: + before loading: converts transformer layers into Linear4bit during loading: load 16bit weight and pass to the + layer object after: quantizes individual weights in Linear4bit into 4bit at the first .cuda() call saving: + from state dict, as usual; saves weights and `quant_state` components + loading: + need to locate `quant_state` components and pass to Param4bit constructor + """ + + use_keep_in_fp32_modules = True + requires_calibration = False + + def __init__(self, quantization_config, **kwargs): + super().__init__(quantization_config, **kwargs) + + if self.quantization_config.llm_int8_skip_modules is not None: + self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules + + def validate_environment(self, *args, **kwargs): + if not (torch.cuda.is_available() or torch.xpu.is_available()): + raise RuntimeError("No GPU found. A GPU is needed for quantization.") + if not is_accelerate_available() or is_accelerate_version("<", "0.26.0"): + raise ImportError( + "Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>=0.26.0'`" + ) + if not is_bitsandbytes_available() or is_bitsandbytes_version("<", "0.43.3"): + raise ImportError( + "Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`" + ) + + if kwargs.get("from_flax", False): + raise ValueError( + "Converting into 4-bit weights from flax weights is currently not supported, please make" + " sure the weights are in PyTorch format." + ) + + device_map = kwargs.get("device_map", None) + if ( + device_map is not None + and isinstance(device_map, dict) + and not self.quantization_config.llm_int8_enable_fp32_cpu_offload + ): + device_map_without_no_convert = { + key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert + } + if "cpu" in device_map_without_no_convert.values() or "disk" in device_map_without_no_convert.values(): + raise ValueError( + "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the " + "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules " + "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to " + "`from_pretrained`. Check " + "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu " + "for more details. " + ) + + def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype": + if target_dtype != torch.int8: + from accelerate.utils import CustomDtype + + logger.info("target_dtype {target_dtype} is replaced by `CustomDtype.INT4` for 4-bit BnB quantization") + return CustomDtype.INT4 + else: + raise ValueError(f"Wrong `target_dtype` ({target_dtype}) provided.") + + def check_if_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + state_dict: dict[str, Any], + **kwargs, + ) -> bool: + import bitsandbytes as bnb + + module, tensor_name = get_module_from_name(model, param_name) + if isinstance(module._parameters.get(tensor_name, None), bnb.nn.Params4bit): + # Add here check for loaded components' dtypes once serialization is implemented + return True + elif isinstance(module, bnb.nn.Linear4bit) and tensor_name == "bias": + # bias could be loaded by regular set_module_tensor_to_device() from accelerate, + # but it would wrongly use uninitialized weight there. + return True + else: + return False + + def create_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + target_device: "torch.device", + state_dict: dict[str, Any], + unexpected_keys: list[str] | None = None, + **kwargs, + ): + import bitsandbytes as bnb + + module, tensor_name = get_module_from_name(model, param_name) + + if tensor_name not in module._parameters: + raise ValueError(f"{module} does not have a parameter or a buffer named {tensor_name}.") + + old_value = getattr(module, tensor_name) + + if tensor_name == "bias": + if param_value is None: + new_value = old_value.to(target_device) + else: + new_value = param_value.to(target_device) + + new_value = torch.nn.Parameter(new_value, requires_grad=old_value.requires_grad) + module._parameters[tensor_name] = new_value + return + + if not isinstance(module._parameters[tensor_name], bnb.nn.Params4bit): + raise ValueError("this function only loads `Linear4bit components`") + if ( + old_value.device == torch.device("meta") + and target_device not in ["meta", torch.device("meta")] + and param_value is None + ): + raise ValueError(f"{tensor_name} is on the meta device, we need a `value` to put in on {target_device}.") + + # construct `new_value` for the module._parameters[tensor_name]: + if self.pre_quantized: + # 4bit loading. Collecting components for restoring quantized weight + # This can be expanded to make a universal call for any quantized weight loading + + if not self.is_serializable: + raise ValueError( + "Detected int4 weights but the version of bitsandbytes is not compatible with int4 serialization. " + "Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`." + ) + + if (param_name + ".quant_state.bitsandbytes__fp4" not in state_dict) and ( + param_name + ".quant_state.bitsandbytes__nf4" not in state_dict + ): + raise ValueError( + f"Supplied state dict for {param_name} does not contain `bitsandbytes__*` and possibly other `quantized_stats` components." + ) + + quantized_stats = {} + for k, v in state_dict.items(): + # `startswith` to counter for edge cases where `param_name` + # substring can be present in multiple places in the `state_dict` + if param_name + "." in k and k.startswith(param_name): + quantized_stats[k] = v + if unexpected_keys is not None and k in unexpected_keys: + unexpected_keys.remove(k) + + new_value = bnb.nn.Params4bit.from_prequantized( + data=param_value, + quantized_stats=quantized_stats, + requires_grad=False, + device=target_device, + ) + else: + new_value = param_value.to("cpu") + kwargs = old_value.__dict__ + new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(target_device) + + module._parameters[tensor_name] = new_value + + def check_quantized_param_shape(self, param_name, current_param, loaded_param): + current_param_shape = current_param.shape + loaded_param_shape = loaded_param.shape + + n = current_param_shape.numel() + inferred_shape = (n,) if "bias" in param_name else ((n + 1) // 2, 1) + if loaded_param_shape != inferred_shape: + raise ValueError( + f"Expected the flattened shape of the current param ({param_name}) to be {loaded_param_shape} but is {inferred_shape}." + ) + else: + return True + + def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]: + # need more space for buffers that are created during quantization + max_memory = {key: val * 0.90 for key, val in max_memory.items()} + return max_memory + + def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype": + if torch_dtype is None: + # We force the `dtype` to be float16, this is a requirement from `bitsandbytes` + logger.info( + "Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to " + "requirements of `bitsandbytes` to enable model loading in 8-bit or 4-bit. " + "Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass" + " torch_dtype=torch.float16 to remove this warning.", + torch_dtype, + ) + torch_dtype = torch.float16 + return torch_dtype + + def update_device_map(self, device_map): + if device_map is None: + if torch.xpu.is_available(): + current_device = f"xpu:{torch.xpu.current_device()}" + else: + current_device = f"cuda:{torch.cuda.current_device()}" + device_map = {"": current_device} + logger.info( + "The device_map was not initialized. " + "Setting device_map to {" + ": {current_device}}. " + "If you want to use the model for inference, please set device_map ='auto' " + ) + return device_map + + def _process_model_before_weight_loading( + self, + model: "ModelMixin", + device_map, + keep_in_fp32_modules: list[str] = [], + **kwargs, + ): + from .utils import replace_with_bnb_linear + + load_in_8bit_fp32_cpu_offload = self.quantization_config.llm_int8_enable_fp32_cpu_offload + + # We may keep some modules such as the `proj_out` in their original dtype for numerical stability reasons + self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules + + if not isinstance(self.modules_to_not_convert, list): + self.modules_to_not_convert = [self.modules_to_not_convert] + + self.modules_to_not_convert.extend(keep_in_fp32_modules) + + # Extend `self.modules_to_not_convert` to keys that are supposed to be offloaded to `cpu` or `disk` + if isinstance(device_map, dict) and len(device_map.keys()) > 1: + keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]] + + if len(keys_on_cpu) > 0 and not load_in_8bit_fp32_cpu_offload: + raise ValueError( + "If you want to offload some keys to `cpu` or `disk`, you need to set " + "`llm_int8_enable_fp32_cpu_offload=True`. Note that these modules will not be " + " converted to 8-bit but kept in 32-bit." + ) + self.modules_to_not_convert.extend(keys_on_cpu) + + # Purge `None`. + # Unlike `transformers`, we don't know if we should always keep certain modules in FP32 + # in case of diffusion transformer models. For language models and others alike, `lm_head` + # and tied modules are usually kept in FP32. + self.modules_to_not_convert = [module for module in self.modules_to_not_convert if module is not None] + + model = replace_with_bnb_linear( + model, modules_to_not_convert=self.modules_to_not_convert, quantization_config=self.quantization_config + ) + model.config.quantization_config = self.quantization_config + model.is_loaded_in_4bit = True + + def _process_model_after_weight_loading(self, model: "ModelMixin", **kwargs): + model.is_4bit_serializable = self.is_serializable + return model + + @property + def is_serializable(self): + # Because we're mandating `bitsandbytes` 0.43.3. + return True + + @property + def is_trainable(self) -> bool: + # Because we're mandating `bitsandbytes` 0.43.3. + return True + + def _dequantize(self, model): + from .utils import dequantize_and_replace + + is_model_on_cpu = model.device.type == "cpu" + if is_model_on_cpu: + logger.info( + "Model was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to GPU. After dequantization, will move the model back to CPU again to preserve the previous device." + ) + if torch.xpu.is_available(): + model.to(torch.xpu.current_device()) + else: + model.to(torch.cuda.current_device()) + + model = dequantize_and_replace( + model, self.modules_to_not_convert, quantization_config=self.quantization_config + ) + if is_model_on_cpu: + model.to("cpu") + return model + + +class BnB8BitDiffusersQuantizer(DiffusersQuantizer): + """ + 8-bit quantization from bitsandbytes quantization method: + before loading: converts transformer layers into Linear8bitLt during loading: load 16bit weight and pass to the + layer object after: quantizes individual weights in Linear8bitLt into 8bit at fitst .cuda() call + saving: + from state dict, as usual; saves weights and 'SCB' component + loading: + need to locate SCB component and pass to the Linear8bitLt object + """ + + use_keep_in_fp32_modules = True + requires_calibration = False + + def __init__(self, quantization_config, **kwargs): + super().__init__(quantization_config, **kwargs) + + if self.quantization_config.llm_int8_skip_modules is not None: + self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules + + def validate_environment(self, *args, **kwargs): + if not (torch.cuda.is_available() or torch.xpu.is_available()): + raise RuntimeError("No GPU found. A GPU is needed for quantization.") + if not is_accelerate_available() or is_accelerate_version("<", "0.26.0"): + raise ImportError( + "Using `bitsandbytes` 8-bit quantization requires Accelerate: `pip install 'accelerate>=0.26.0'`" + ) + if not is_bitsandbytes_available() or is_bitsandbytes_version("<", "0.43.3"): + raise ImportError( + "Using `bitsandbytes` 8-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`" + ) + + if kwargs.get("from_flax", False): + raise ValueError( + "Converting into 8-bit weights from flax weights is currently not supported, please make" + " sure the weights are in PyTorch format." + ) + + device_map = kwargs.get("device_map", None) + if ( + device_map is not None + and isinstance(device_map, dict) + and not self.quantization_config.llm_int8_enable_fp32_cpu_offload + ): + device_map_without_no_convert = { + key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert + } + if "cpu" in device_map_without_no_convert.values() or "disk" in device_map_without_no_convert.values(): + raise ValueError( + "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the " + "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules " + "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to " + "`from_pretrained`. Check " + "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu " + "for more details. " + ) + + # Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer.adjust_max_memory + def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]: + # need more space for buffers that are created during quantization + max_memory = {key: val * 0.90 for key, val in max_memory.items()} + return max_memory + + # Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer.update_torch_dtype + def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype": + if torch_dtype is None: + # We force the `dtype` to be float16, this is a requirement from `bitsandbytes` + logger.info( + "Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to " + "requirements of `bitsandbytes` to enable model loading in 8-bit or 4-bit. " + "Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass" + " torch_dtype=torch.float16 to remove this warning.", + torch_dtype, + ) + torch_dtype = torch.float16 + return torch_dtype + + # Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer.update_device_map + def update_device_map(self, device_map): + if device_map is None: + if torch.xpu.is_available(): + current_device = f"xpu:{torch.xpu.current_device()}" + else: + current_device = f"cuda:{torch.cuda.current_device()}" + device_map = {"": current_device} + logger.info( + "The device_map was not initialized. " + "Setting device_map to {" + ": {current_device}}. " + "If you want to use the model for inference, please set device_map ='auto' " + ) + return device_map + + def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype": + if target_dtype != torch.int8: + logger.info("target_dtype {target_dtype} is replaced by `torch.int8` for 8-bit BnB quantization") + return torch.int8 + + def check_if_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + state_dict: dict[str, Any], + **kwargs, + ): + import bitsandbytes as bnb + + module, tensor_name = get_module_from_name(model, param_name) + if isinstance(module._parameters.get(tensor_name, None), bnb.nn.Int8Params): + if self.pre_quantized: + if param_name.replace("weight", "SCB") not in state_dict.keys(): + raise ValueError("Missing quantization component `SCB`") + if param_value.dtype != torch.int8: + raise ValueError( + f"Incompatible dtype `{param_value.dtype}` when loading 8-bit prequantized weight. Expected `torch.int8`." + ) + return True + return False + + def create_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + target_device: "torch.device", + state_dict: dict[str, Any], + unexpected_keys: list[str] | None = None, + **kwargs, + ): + import bitsandbytes as bnb + + fp16_statistics_key = param_name.replace("weight", "SCB") + fp16_weights_format_key = param_name.replace("weight", "weight_format") + + fp16_statistics = state_dict.get(fp16_statistics_key, None) + fp16_weights_format = state_dict.get(fp16_weights_format_key, None) + + module, tensor_name = get_module_from_name(model, param_name) + if tensor_name not in module._parameters: + raise ValueError(f"{module} does not have a parameter or a buffer named {tensor_name}.") + + old_value = getattr(module, tensor_name) + + if not isinstance(module._parameters[tensor_name], bnb.nn.Int8Params): + raise ValueError(f"Parameter `{tensor_name}` should only be a `bnb.nn.Int8Params` instance.") + if ( + old_value.device == torch.device("meta") + and target_device not in ["meta", torch.device("meta")] + and param_value is None + ): + raise ValueError(f"{tensor_name} is on the meta device, we need a `value` to put in on {target_device}.") + + new_value = param_value.to("cpu") + if self.pre_quantized and not self.is_serializable: + raise ValueError( + "Detected int8 weights but the version of bitsandbytes is not compatible with int8 serialization. " + "Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`." + ) + + kwargs = old_value.__dict__ + new_value = bnb.nn.Int8Params(new_value, requires_grad=False, **kwargs).to(target_device) + + module._parameters[tensor_name] = new_value + if fp16_statistics is not None: + setattr(module.weight, "SCB", fp16_statistics.to(target_device)) + if unexpected_keys is not None: + unexpected_keys.remove(fp16_statistics_key) + + # We just need to pop the `weight_format` keys from the state dict to remove unneeded + # messages. The correct format is correctly retrieved during the first forward pass. + if fp16_weights_format is not None and unexpected_keys is not None: + unexpected_keys.remove(fp16_weights_format_key) + + # Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer._process_model_after_weight_loading with 4bit->8bit + def _process_model_after_weight_loading(self, model: "ModelMixin", **kwargs): + model.is_8bit_serializable = self.is_serializable + return model + + # Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer._process_model_before_weight_loading with 4bit->8bit + def _process_model_before_weight_loading( + self, + model: "ModelMixin", + device_map, + keep_in_fp32_modules: list[str] = [], + **kwargs, + ): + from .utils import replace_with_bnb_linear + + load_in_8bit_fp32_cpu_offload = self.quantization_config.llm_int8_enable_fp32_cpu_offload + + # We may keep some modules such as the `proj_out` in their original dtype for numerical stability reasons + self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules + + if not isinstance(self.modules_to_not_convert, list): + self.modules_to_not_convert = [self.modules_to_not_convert] + + self.modules_to_not_convert.extend(keep_in_fp32_modules) + + # Extend `self.modules_to_not_convert` to keys that are supposed to be offloaded to `cpu` or `disk` + if isinstance(device_map, dict) and len(device_map.keys()) > 1: + keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]] + + if len(keys_on_cpu) > 0 and not load_in_8bit_fp32_cpu_offload: + raise ValueError( + "If you want to offload some keys to `cpu` or `disk`, you need to set " + "`llm_int8_enable_fp32_cpu_offload=True`. Note that these modules will not be " + " converted to 8-bit but kept in 32-bit." + ) + self.modules_to_not_convert.extend(keys_on_cpu) + + # Purge `None`. + # Unlike `transformers`, we don't know if we should always keep certain modules in FP32 + # in case of diffusion transformer models. For language models and others alike, `lm_head` + # and tied modules are usually kept in FP32. + self.modules_to_not_convert = [module for module in self.modules_to_not_convert if module is not None] + + model = replace_with_bnb_linear( + model, modules_to_not_convert=self.modules_to_not_convert, quantization_config=self.quantization_config + ) + model.config.quantization_config = self.quantization_config + model.is_loaded_in_8bit = True + + @property + # Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer.is_serializable + def is_serializable(self): + # Because we're mandating `bitsandbytes` 0.43.3. + return True + + @property + # Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer.is_serializable + def is_trainable(self) -> bool: + # Because we're mandating `bitsandbytes` 0.43.3. + return True + + @property + def is_compileable(self) -> bool: + return True + + def _dequantize(self, model): + from .utils import dequantize_and_replace + + model = dequantize_and_replace( + model, self.modules_to_not_convert, quantization_config=self.quantization_config + ) + return model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..aea4e7dda57f297c2cbf615b92d444ee4b7b1e92 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/bitsandbytes/utils.py @@ -0,0 +1,318 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Adapted from +https://github.com/huggingface/transformers/blob/c409cd81777fb27aadc043ed3d8339dbc020fb3b/src/transformers/integrations/bitsandbytes.py +""" + +import inspect +from inspect import signature + +from ...utils import is_accelerate_available, is_bitsandbytes_available, is_torch_available, logging +from ..quantization_config import QuantizationMethod + + +if is_torch_available(): + import torch + import torch.nn as nn + +if is_bitsandbytes_available(): + import bitsandbytes as bnb + +if is_accelerate_available(): + import accelerate + from accelerate import init_empty_weights + from accelerate.hooks import add_hook_to_module, remove_hook_from_module + +logger = logging.get_logger(__name__) + + +def _replace_with_bnb_linear( + model, + modules_to_not_convert=None, + current_key_name=None, + quantization_config=None, + has_been_replaced=False, +): + """ + Private method that wraps the recursion for module replacement. + + Returns the converted model and a boolean that indicates if the conversion has been successful or not. + """ + for name, module in model.named_children(): + if current_key_name is None: + current_key_name = [] + current_key_name.append(name) + + if isinstance(module, nn.Linear) and name not in modules_to_not_convert: + # Check if the current key is not in the `modules_to_not_convert` + current_key_name_str = ".".join(current_key_name) + if not any( + (key + "." in current_key_name_str) or (key == current_key_name_str) for key in modules_to_not_convert + ): + with init_empty_weights(): + in_features = module.in_features + out_features = module.out_features + + if quantization_config.quantization_method() == "llm_int8": + model._modules[name] = bnb.nn.Linear8bitLt( + in_features, + out_features, + module.bias is not None, + has_fp16_weights=quantization_config.llm_int8_has_fp16_weight, + threshold=quantization_config.llm_int8_threshold, + ) + has_been_replaced = True + else: + if ( + quantization_config.llm_int8_skip_modules is not None + and name in quantization_config.llm_int8_skip_modules + ): + pass + else: + extra_kwargs = ( + {"quant_storage": quantization_config.bnb_4bit_quant_storage} + if "quant_storage" in list(signature(bnb.nn.Linear4bit).parameters) + else {} + ) + model._modules[name] = bnb.nn.Linear4bit( + in_features, + out_features, + module.bias is not None, + quantization_config.bnb_4bit_compute_dtype, + compress_statistics=quantization_config.bnb_4bit_use_double_quant, + quant_type=quantization_config.bnb_4bit_quant_type, + **extra_kwargs, + ) + has_been_replaced = True + # Store the module class in case we need to transpose the weight later + model._modules[name].source_cls = type(module) + # Force requires grad to False to avoid unexpected errors + model._modules[name].requires_grad_(False) + if len(list(module.children())) > 0: + _, has_been_replaced = _replace_with_bnb_linear( + module, + modules_to_not_convert, + current_key_name, + quantization_config, + has_been_replaced=has_been_replaced, + ) + # Remove the last key for recursion + current_key_name.pop(-1) + return model, has_been_replaced + + +def replace_with_bnb_linear(model, modules_to_not_convert=None, current_key_name=None, quantization_config=None): + """ + Helper function to replace the `nn.Linear` layers within `model` with either `bnb.nn.Linear8bit` or + `bnb.nn.Linear4bit` using the `bitsandbytes` library. + + References: + * `bnb.nn.Linear8bit`: [LLM.int8(): 8-bit Matrix Multiplication for Transformers at + Scale](https://huggingface.co/papers/2208.07339) + * `bnb.nn.Linear4bit`: [QLoRA: Efficient Finetuning of Quantized + LLMs](https://huggingface.co/papers/2305.14314) + + Parameters: + model (`torch.nn.Module`): + Input model or `torch.nn.Module` as the function is run recursively. + modules_to_not_convert (`list[`str`]`, *optional*, defaults to `[]`): + Names of the modules to not convert in `Linear8bitLt`. In practice we keep the `modules_to_not_convert` in + full precision for numerical stability reasons. + current_key_name (`list[`str`]`, *optional*): + An array to track the current key of the recursion. This is used to check whether the current key (part of + it) is not in the list of modules to not convert (for instances modules that are offloaded to `cpu` or + `disk`). + quantization_config ('transformers.utils.quantization_config.BitsAndBytesConfig'): + To configure and manage settings related to quantization, a technique used to compress neural network + models by reducing the precision of the weights and activations, thus making models more efficient in terms + of both storage and computation. + """ + model, _ = _replace_with_bnb_linear(model, modules_to_not_convert, current_key_name, quantization_config) + + has_been_replaced = any( + isinstance(replaced_module, (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt)) + for _, replaced_module in model.named_modules() + ) + if not has_been_replaced: + logger.warning( + "You are loading your model in 8bit or 4bit but no linear modules were found in your model." + " Please double check your model architecture, or submit an issue on github if you think this is" + " a bug." + ) + + return model + + +# Adapted from PEFT: https://github.com/huggingface/peft/blob/6d458b300fc2ed82e19f796b53af4c97d03ea604/src/peft/utils/integrations.py#L81 +def dequantize_bnb_weight(weight: "torch.nn.Parameter", state=None, dtype: "torch.dtype" = None): + """ + Helper function to dequantize 4bit or 8bit bnb weights. + + If the weight is not a bnb quantized weight, it will be returned as is. + """ + if not isinstance(weight, torch.nn.Parameter): + raise TypeError(f"Input weight should be of type nn.Parameter, got {type(weight)} instead") + + cls_name = weight.__class__.__name__ + if cls_name not in ("Params4bit", "Int8Params"): + return weight + + if cls_name == "Params4bit": + output_tensor = bnb.functional.dequantize_4bit(weight.data, weight.quant_state) + msg = f"The model is going to be dequantized in {output_tensor.dtype} - if you want to upcast it to another dtype, make sure to pass the desired dtype when quantizing the model through `bnb_4bit_quant_type` argument of `BitsAndBytesConfig`" + if dtype: + msg = f"The model is going to be first dequantized in {output_tensor.dtype} and type-casted to {dtype}" + output_tensor = output_tensor.to(dtype) + logger.warning_once(msg) + return output_tensor + + if state.SCB is None: + state.SCB = weight.SCB + + if hasattr(bnb.functional, "int8_vectorwise_dequant"): + # Use bitsandbytes API if available (requires v0.45.0+) + dequantized = bnb.functional.int8_vectorwise_dequant(weight.data, state.SCB) + else: + # Multiply by (scale/127) to dequantize. + dequantized = weight.data * state.SCB.view(-1, 1) * 7.874015718698502e-3 + + if dtype: + dequantized = dequantized.to(dtype) + return dequantized + + +def _create_accelerate_new_hook(old_hook): + r""" + Creates a new hook based on the old hook. Use it only if you know what you are doing ! This method is a copy of: + https://github.com/huggingface/peft/blob/748f7968f3a31ec06a1c2b0328993319ad9a150a/src/peft/utils/other.py#L245 with + some changes + """ + old_hook_cls = getattr(accelerate.hooks, old_hook.__class__.__name__) + old_hook_attr = old_hook.__dict__ + filtered_old_hook_attr = {} + old_hook_init_signature = inspect.signature(old_hook_cls.__init__) + for k in old_hook_attr.keys(): + if k in old_hook_init_signature.parameters: + filtered_old_hook_attr[k] = old_hook_attr[k] + new_hook = old_hook_cls(**filtered_old_hook_attr) + return new_hook + + +def _dequantize_and_replace( + model, + dtype, + modules_to_not_convert=None, + current_key_name=None, + quantization_config=None, + has_been_replaced=False, +): + """ + Converts a quantized model into its dequantized original version. The newly converted model will have some + performance drop compared to the original model before quantization - use it only for specific usecases such as + QLoRA adapters merging. + + Returns the converted model and a boolean that indicates if the conversion has been successful or not. + """ + quant_method = quantization_config.quantization_method() + + target_cls = bnb.nn.Linear8bitLt if quant_method == "llm_int8" else bnb.nn.Linear4bit + + for name, module in model.named_children(): + if current_key_name is None: + current_key_name = [] + current_key_name.append(name) + + if isinstance(module, target_cls) and name not in modules_to_not_convert: + # Check if the current key is not in the `modules_to_not_convert` + current_key_name_str = ".".join(current_key_name) + + if not any( + (key + "." in current_key_name_str) or (key == current_key_name_str) for key in modules_to_not_convert + ): + bias = getattr(module, "bias", None) + + device = module.weight.device + with init_empty_weights(): + new_module = torch.nn.Linear(module.in_features, module.out_features, bias=bias is not None) + + if quant_method == "llm_int8": + state = module.state + else: + state = None + + new_module.weight = torch.nn.Parameter(dequantize_bnb_weight(module.weight, state, dtype)) + + if bias is not None: + new_module.bias = bias + + # Create a new hook and attach it in case we use accelerate + if hasattr(module, "_hf_hook"): + old_hook = module._hf_hook + new_hook = _create_accelerate_new_hook(old_hook) + + remove_hook_from_module(module) + add_hook_to_module(new_module, new_hook) + + new_module.to(device) + model._modules[name] = new_module + has_been_replaced = True + if len(list(module.children())) > 0: + _, has_been_replaced = _dequantize_and_replace( + module, + dtype=dtype, + modules_to_not_convert=modules_to_not_convert, + current_key_name=current_key_name, + quantization_config=quantization_config, + has_been_replaced=has_been_replaced, + ) + # Remove the last key for recursion + current_key_name.pop(-1) + return model, has_been_replaced + + +def dequantize_and_replace( + model, + modules_to_not_convert=None, + quantization_config=None, +): + model, _ = _dequantize_and_replace( + model, + dtype=model.dtype, + modules_to_not_convert=modules_to_not_convert, + quantization_config=quantization_config, + ) + has_been_replaced = any( + isinstance(replaced_module, torch.nn.Linear) for _, replaced_module in model.named_modules() + ) + if not has_been_replaced: + logger.warning( + "Some linear modules were not dequantized. This could lead to unexpected behaviour. Please check your model." + ) + + return model + + +def _check_bnb_status(module) -> bool | bool: + is_loaded_in_4bit_bnb = ( + hasattr(module, "is_loaded_in_4bit") + and module.is_loaded_in_4bit + and getattr(module, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES + ) + is_loaded_in_8bit_bnb = ( + hasattr(module, "is_loaded_in_8bit") + and module.is_loaded_in_8bit + and getattr(module, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES + ) + return is_loaded_in_4bit_bnb or is_loaded_in_8bit_bnb, is_loaded_in_4bit_bnb, is_loaded_in_8bit_bnb diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b3d9082ac803c8da97ba7bb37021445307b96f7c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__init__.py @@ -0,0 +1 @@ +from .gguf_quantizer import GGUFQuantizer diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a661e40514fffd37ff9c2facac26dd58c1bc133 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/gguf_quantizer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/gguf_quantizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ccf4ad1338d6527ece25c43f2f94d873a34812cf Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/gguf_quantizer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a63246c07253cf8ef568df24bf099ce069d74551 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/__pycache__/utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/gguf_quantizer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/gguf_quantizer.py new file mode 100644 index 0000000000000000000000000000000000000000..15f39dd9605eddb44d0b169cbc492e84afdc8bca --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/gguf_quantizer.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from ..base import DiffusersQuantizer + + +if TYPE_CHECKING: + from ...models.modeling_utils import ModelMixin + + +from ...utils import ( + get_module_from_name, + is_accelerate_available, + is_accelerate_version, + is_gguf_available, + is_gguf_version, + is_torch_available, + logging, +) + + +if is_torch_available() and is_gguf_available(): + import torch + + from .utils import ( + GGML_QUANT_SIZES, + GGUFParameter, + _dequantize_gguf_and_restore_linear, + _quant_shape_from_byte_shape, + _replace_with_gguf_linear, + ) + + +logger = logging.get_logger(__name__) + + +class GGUFQuantizer(DiffusersQuantizer): + use_keep_in_fp32_modules = True + + def __init__(self, quantization_config, **kwargs): + super().__init__(quantization_config, **kwargs) + + self.compute_dtype = quantization_config.compute_dtype + self.pre_quantized = quantization_config.pre_quantized + self.modules_to_not_convert = quantization_config.modules_to_not_convert or [] + + if not isinstance(self.modules_to_not_convert, list): + self.modules_to_not_convert = [self.modules_to_not_convert] + + def validate_environment(self, *args, **kwargs): + if not is_accelerate_available() or is_accelerate_version("<", "0.26.0"): + raise ImportError( + "Loading GGUF Parameters requires `accelerate` installed in your environment: `pip install 'accelerate>=0.26.0'`" + ) + if not is_gguf_available() or is_gguf_version("<", "0.10.0"): + raise ImportError( + "To load GGUF format files you must have `gguf` installed in your environment: `pip install gguf>=0.10.0`" + ) + + # Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer.adjust_max_memory + def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]: + # need more space for buffers that are created during quantization + max_memory = {key: val * 0.90 for key, val in max_memory.items()} + return max_memory + + def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype": + if target_dtype != torch.uint8: + logger.info(f"target_dtype {target_dtype} is replaced by `torch.uint8` for GGUF quantization") + return torch.uint8 + + def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype": + if torch_dtype is None: + torch_dtype = self.compute_dtype + return torch_dtype + + def check_quantized_param_shape(self, param_name, current_param, loaded_param): + loaded_param_shape = loaded_param.shape + current_param_shape = current_param.shape + quant_type = loaded_param.quant_type + + block_size, type_size = GGML_QUANT_SIZES[quant_type] + + inferred_shape = _quant_shape_from_byte_shape(loaded_param_shape, type_size, block_size) + if inferred_shape != current_param_shape: + raise ValueError( + f"{param_name} has an expected quantized shape of: {inferred_shape}, but received shape: {loaded_param_shape}" + ) + + return True + + def check_if_quantized_param( + self, + model: "ModelMixin", + param_value: "GGUFParameter" | "torch.Tensor", + param_name: str, + state_dict: dict[str, Any], + **kwargs, + ) -> bool: + if isinstance(param_value, GGUFParameter): + return True + + return False + + def create_quantized_param( + self, + model: "ModelMixin", + param_value: "GGUFParameter" | "torch.Tensor", + param_name: str, + target_device: "torch.device", + state_dict: dict[str, Any] | None = None, + unexpected_keys: list[str] | None = None, + **kwargs, + ): + module, tensor_name = get_module_from_name(model, param_name) + if tensor_name not in module._parameters and tensor_name not in module._buffers: + raise ValueError(f"{module} does not have a parameter or a buffer named {tensor_name}.") + + if tensor_name in module._parameters: + module._parameters[tensor_name] = param_value.to(target_device) + if tensor_name in module._buffers: + module._buffers[tensor_name] = param_value.to(target_device) + + def _process_model_before_weight_loading( + self, + model: "ModelMixin", + device_map, + keep_in_fp32_modules: list[str] = [], + **kwargs, + ): + state_dict = kwargs.get("state_dict", None) + + self.modules_to_not_convert.extend(keep_in_fp32_modules) + self.modules_to_not_convert = [module for module in self.modules_to_not_convert if module is not None] + + _replace_with_gguf_linear( + model, self.compute_dtype, state_dict, modules_to_not_convert=self.modules_to_not_convert + ) + + def _process_model_after_weight_loading(self, model: "ModelMixin", **kwargs): + return model + + @property + def is_serializable(self): + return False + + @property + def is_trainable(self) -> bool: + return False + + @property + def is_compileable(self) -> bool: + return True + + def _dequantize(self, model): + is_model_on_cpu = model.device.type == "cpu" + if is_model_on_cpu: + logger.info( + "Model was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to accelerator. After dequantization, will move the model back to CPU again to preserve the previous device." + ) + device = ( + torch.accelerator.current_accelerator() + if hasattr(torch, "accelerator") + else torch.cuda.current_device() + ) + model.to(device) + + model = _dequantize_gguf_and_restore_linear(model, self.modules_to_not_convert) + if is_model_on_cpu: + model.to("cpu") + return model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e0ad0e1cce424c840e64b85695735bcc93c9cd86 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/gguf/utils.py @@ -0,0 +1,610 @@ +# Copyright 2025 The HuggingFace Team and City96. All rights reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. + +import inspect +import os +from contextlib import nullcontext + +import gguf +import torch +import torch.nn as nn + +from ...utils import is_accelerate_available, is_kernels_available + + +if is_accelerate_available(): + import accelerate + from accelerate import init_empty_weights + from accelerate.hooks import add_hook_to_module, remove_hook_from_module + + +can_use_cuda_kernels = ( + os.getenv("DIFFUSERS_GGUF_CUDA_KERNELS", "false").lower() in ["1", "true", "yes"] + and torch.cuda.is_available() + and torch.cuda.get_device_capability()[0] >= 7 +) +if can_use_cuda_kernels and is_kernels_available(): + from kernels import get_kernel + + ops = get_kernel("Isotr0py/ggml") +else: + ops = None + +UNQUANTIZED_TYPES = {gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16, gguf.GGMLQuantizationType.BF16} +STANDARD_QUANT_TYPES = { + gguf.GGMLQuantizationType.Q4_0, + gguf.GGMLQuantizationType.Q4_1, + gguf.GGMLQuantizationType.Q5_0, + gguf.GGMLQuantizationType.Q5_1, + gguf.GGMLQuantizationType.Q8_0, + gguf.GGMLQuantizationType.Q8_1, +} +KQUANT_TYPES = { + gguf.GGMLQuantizationType.Q2_K, + gguf.GGMLQuantizationType.Q3_K, + gguf.GGMLQuantizationType.Q4_K, + gguf.GGMLQuantizationType.Q5_K, + gguf.GGMLQuantizationType.Q6_K, +} +IMATRIX_QUANT_TYPES = { + gguf.GGMLQuantizationType.IQ1_M, + gguf.GGMLQuantizationType.IQ1_S, + gguf.GGMLQuantizationType.IQ2_XXS, + gguf.GGMLQuantizationType.IQ2_XS, + gguf.GGMLQuantizationType.IQ2_S, + gguf.GGMLQuantizationType.IQ3_XXS, + gguf.GGMLQuantizationType.IQ3_S, + gguf.GGMLQuantizationType.IQ4_XS, + gguf.GGMLQuantizationType.IQ4_NL, +} +# TODO(Isotr0py): Currently, we don't have MMQ kernel for I-Matrix quantization. +# Consolidate DEQUANT_TYPES, MMVQ_QUANT_TYPES and MMQ_QUANT_TYPES after we add +# MMQ kernel for I-Matrix quantization. +DEQUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES | IMATRIX_QUANT_TYPES +MMVQ_QUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES | IMATRIX_QUANT_TYPES +MMQ_QUANT_TYPES = STANDARD_QUANT_TYPES | KQUANT_TYPES + + +def _fused_mul_mat_gguf(x: torch.Tensor, qweight: torch.Tensor, qweight_type: int) -> torch.Tensor: + # there is no need to call any kernel for fp16/bf16 + if qweight_type in UNQUANTIZED_TYPES: + weight = dequantize_gguf_tensor(qweight) + return x @ weight.T + + # TODO(Isotr0py): GGUF's MMQ and MMVQ implementation are designed for + # contiguous batching and inefficient with diffusers' batching, + # so we disabled it now. + + # elif qweight_type in MMVQ_QUANT_TYPES: + # y = ops.ggml_mul_mat_vec_a8(qweight, x, qweight_type, qweight.shape[0]) + # elif qweight_type in MMQ_QUANT_TYPES: + # y = ops.ggml_mul_mat_a8(qweight, x, qweight_type, qweight.shape[0]) + + # If there is no available MMQ kernel, fallback to dequantize + if qweight_type in DEQUANT_TYPES: + block_size, type_size = gguf.GGML_QUANT_SIZES[qweight_type] + shape = (qweight.shape[0], qweight.shape[1] // type_size * block_size) + weight = ops.ggml_dequantize(qweight, qweight_type, *shape) + y = x @ weight.to(x.dtype).T + else: + # Raise an error if the quantization type is not supported. + # Might be useful if llama.cpp adds a new quantization type. + # Wrap to GGMLQuantizationType IntEnum to make sure it's a valid type. + qweight_type = gguf.GGMLQuantizationType(qweight_type) + raise NotImplementedError(f"Unsupported GGUF quantization type: {qweight_type}") + return y.as_tensor() + + +# Copied from diffusers.quantizers.bitsandbytes.utils._create_accelerate_new_hook +def _create_accelerate_new_hook(old_hook): + r""" + Creates a new hook based on the old hook. Use it only if you know what you are doing ! This method is a copy of: + https://github.com/huggingface/peft/blob/748f7968f3a31ec06a1c2b0328993319ad9a150a/src/peft/utils/other.py#L245 with + some changes + """ + old_hook_cls = getattr(accelerate.hooks, old_hook.__class__.__name__) + old_hook_attr = old_hook.__dict__ + filtered_old_hook_attr = {} + old_hook_init_signature = inspect.signature(old_hook_cls.__init__) + for k in old_hook_attr.keys(): + if k in old_hook_init_signature.parameters: + filtered_old_hook_attr[k] = old_hook_attr[k] + new_hook = old_hook_cls(**filtered_old_hook_attr) + return new_hook + + +def _replace_with_gguf_linear(model, compute_dtype, state_dict, prefix="", modules_to_not_convert=[]): + def _should_convert_to_gguf(state_dict, prefix): + weight_key = prefix + "weight" + return weight_key in state_dict and isinstance(state_dict[weight_key], GGUFParameter) + + has_children = list(model.children()) + if not has_children: + return + + for name, module in model.named_children(): + module_prefix = prefix + name + "." + _replace_with_gguf_linear(module, compute_dtype, state_dict, module_prefix, modules_to_not_convert) + + if ( + isinstance(module, nn.Linear) + and _should_convert_to_gguf(state_dict, module_prefix) + and name not in modules_to_not_convert + ): + ctx = init_empty_weights if is_accelerate_available() else nullcontext + with ctx(): + model._modules[name] = GGUFLinear( + module.in_features, + module.out_features, + module.bias is not None, + compute_dtype=compute_dtype, + ) + model._modules[name].source_cls = type(module) + # Force requires_grad to False to avoid unexpected errors + model._modules[name].requires_grad_(False) + + return model + + +def _dequantize_gguf_and_restore_linear(model, modules_to_not_convert=[]): + for name, module in model.named_children(): + if isinstance(module, GGUFLinear) and name not in modules_to_not_convert: + device = module.weight.device + bias = getattr(module, "bias", None) + + ctx = init_empty_weights if is_accelerate_available() else nullcontext + with ctx(): + new_module = nn.Linear( + module.in_features, + module.out_features, + module.bias is not None, + device=device, + ) + new_module.weight = nn.Parameter(dequantize_gguf_tensor(module.weight)) + if bias is not None: + new_module.bias = bias + + # Create a new hook and attach it in case we use accelerate + if hasattr(module, "_hf_hook"): + old_hook = module._hf_hook + new_hook = _create_accelerate_new_hook(old_hook) + + remove_hook_from_module(module) + add_hook_to_module(new_module, new_hook) + + new_module.to(device) + model._modules[name] = new_module + + has_children = list(module.children()) + if has_children: + _dequantize_gguf_and_restore_linear(module, modules_to_not_convert) + + return model + + +# dequantize operations based on torch ports of GGUF dequantize_functions +# from City96 +# more info: https://github.com/city96/ComfyUI-GGUF/blob/main/dequant.py + + +QK_K = 256 +K_SCALE_SIZE = 12 + + +def to_uint32(x): + x = x.view(torch.uint8).to(torch.int32) + return (x[:, 0] | x[:, 1] << 8 | x[:, 2] << 16 | x[:, 3] << 24).unsqueeze(1) + + +def split_block_dims(blocks, *args): + n_max = blocks.shape[1] + dims = list(args) + [n_max - sum(args)] + return torch.split(blocks, dims, dim=1) + + +def get_scale_min(scales): + n_blocks = scales.shape[0] + scales = scales.view(torch.uint8) + scales = scales.reshape((n_blocks, 3, 4)) + + d, m, m_d = torch.split(scales, scales.shape[-2] // 3, dim=-2) + + sc = torch.cat([d & 0x3F, (m_d & 0x0F) | ((d >> 2) & 0x30)], dim=-1) + min = torch.cat([m & 0x3F, (m_d >> 4) | ((m >> 2) & 0x30)], dim=-1) + + return (sc.reshape((n_blocks, 8)), min.reshape((n_blocks, 8))) + + +def dequantize_blocks_Q8_0(blocks, block_size, type_size, dtype=None): + d, x = split_block_dims(blocks, 2) + d = d.view(torch.float16).to(dtype) + x = x.view(torch.int8) + return d * x + + +def dequantize_blocks_Q5_1(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + d, m, qh, qs = split_block_dims(blocks, 2, 2, 4) + d = d.view(torch.float16).to(dtype) + m = m.view(torch.float16).to(dtype) + qh = to_uint32(qh) + + qh = qh.reshape((n_blocks, 1)) >> torch.arange(32, device=d.device, dtype=torch.int32).reshape(1, 32) + ql = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor( + [0, 4], device=d.device, dtype=torch.uint8 + ).reshape(1, 1, 2, 1) + qh = (qh & 1).to(torch.uint8) + ql = (ql & 0x0F).reshape((n_blocks, -1)) + + qs = ql | (qh << 4) + return (d * qs) + m + + +def dequantize_blocks_Q5_0(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + d, qh, qs = split_block_dims(blocks, 2, 4) + d = d.view(torch.float16).to(dtype) + qh = to_uint32(qh) + + qh = qh.reshape(n_blocks, 1) >> torch.arange(32, device=d.device, dtype=torch.int32).reshape(1, 32) + ql = qs.reshape(n_blocks, -1, 1, block_size // 2) >> torch.tensor( + [0, 4], device=d.device, dtype=torch.uint8 + ).reshape(1, 1, 2, 1) + + qh = (qh & 1).to(torch.uint8) + ql = (ql & 0x0F).reshape(n_blocks, -1) + + qs = (ql | (qh << 4)).to(torch.int8) - 16 + return d * qs + + +def dequantize_blocks_Q4_1(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + d, m, qs = split_block_dims(blocks, 2, 2) + d = d.view(torch.float16).to(dtype) + m = m.view(torch.float16).to(dtype) + + qs = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor( + [0, 4], device=d.device, dtype=torch.uint8 + ).reshape(1, 1, 2, 1) + qs = (qs & 0x0F).reshape(n_blocks, -1) + + return (d * qs) + m + + +def dequantize_blocks_Q4_0(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + d, qs = split_block_dims(blocks, 2) + d = d.view(torch.float16).to(dtype) + + qs = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor( + [0, 4], device=d.device, dtype=torch.uint8 + ).reshape((1, 1, 2, 1)) + qs = (qs & 0x0F).reshape((n_blocks, -1)).to(torch.int8) - 8 + return d * qs + + +def dequantize_blocks_Q6_K(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + ( + ql, + qh, + scales, + d, + ) = split_block_dims(blocks, QK_K // 2, QK_K // 4, QK_K // 16) + + scales = scales.view(torch.int8).to(dtype) + d = d.view(torch.float16).to(dtype) + d = (d * scales).reshape((n_blocks, QK_K // 16, 1)) + + ql = ql.reshape((n_blocks, -1, 1, 64)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape( + (1, 1, 2, 1) + ) + ql = (ql & 0x0F).reshape((n_blocks, -1, 32)) + qh = qh.reshape((n_blocks, -1, 1, 32)) >> torch.tensor([0, 2, 4, 6], device=d.device, dtype=torch.uint8).reshape( + (1, 1, 4, 1) + ) + qh = (qh & 0x03).reshape((n_blocks, -1, 32)) + q = (ql | (qh << 4)).to(torch.int8) - 32 + q = q.reshape((n_blocks, QK_K // 16, -1)) + + return (d * q).reshape((n_blocks, QK_K)) + + +def dequantize_blocks_Q5_K(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + d, dmin, scales, qh, qs = split_block_dims(blocks, 2, 2, K_SCALE_SIZE, QK_K // 8) + + d = d.view(torch.float16).to(dtype) + dmin = dmin.view(torch.float16).to(dtype) + + sc, m = get_scale_min(scales) + + d = (d * sc).reshape((n_blocks, -1, 1)) + dm = (dmin * m).reshape((n_blocks, -1, 1)) + + ql = qs.reshape((n_blocks, -1, 1, 32)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape( + (1, 1, 2, 1) + ) + qh = qh.reshape((n_blocks, -1, 1, 32)) >> torch.arange(0, 8, device=d.device, dtype=torch.uint8).reshape( + (1, 1, 8, 1) + ) + ql = (ql & 0x0F).reshape((n_blocks, -1, 32)) + qh = (qh & 0x01).reshape((n_blocks, -1, 32)) + q = ql | (qh << 4) + + return (d * q - dm).reshape((n_blocks, QK_K)) + + +def dequantize_blocks_Q4_K(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + d, dmin, scales, qs = split_block_dims(blocks, 2, 2, K_SCALE_SIZE) + d = d.view(torch.float16).to(dtype) + dmin = dmin.view(torch.float16).to(dtype) + + sc, m = get_scale_min(scales) + + d = (d * sc).reshape((n_blocks, -1, 1)) + dm = (dmin * m).reshape((n_blocks, -1, 1)) + + qs = qs.reshape((n_blocks, -1, 1, 32)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape( + (1, 1, 2, 1) + ) + qs = (qs & 0x0F).reshape((n_blocks, -1, 32)) + + return (d * qs - dm).reshape((n_blocks, QK_K)) + + +def dequantize_blocks_Q3_K(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + hmask, qs, scales, d = split_block_dims(blocks, QK_K // 8, QK_K // 4, 12) + d = d.view(torch.float16).to(dtype) + + lscales, hscales = scales[:, :8], scales[:, 8:] + lscales = lscales.reshape((n_blocks, 1, 8)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape( + (1, 2, 1) + ) + lscales = lscales.reshape((n_blocks, 16)) + hscales = hscales.reshape((n_blocks, 1, 4)) >> torch.tensor( + [0, 2, 4, 6], device=d.device, dtype=torch.uint8 + ).reshape((1, 4, 1)) + hscales = hscales.reshape((n_blocks, 16)) + scales = (lscales & 0x0F) | ((hscales & 0x03) << 4) + scales = scales.to(torch.int8) - 32 + + dl = (d * scales).reshape((n_blocks, 16, 1)) + + ql = qs.reshape((n_blocks, -1, 1, 32)) >> torch.tensor([0, 2, 4, 6], device=d.device, dtype=torch.uint8).reshape( + (1, 1, 4, 1) + ) + qh = hmask.reshape(n_blocks, -1, 1, 32) >> torch.arange(0, 8, device=d.device, dtype=torch.uint8).reshape( + (1, 1, 8, 1) + ) + ql = ql.reshape((n_blocks, 16, QK_K // 16)) & 3 + qh = (qh.reshape((n_blocks, 16, QK_K // 16)) & 1) ^ 1 + q = ql.to(torch.int8) - (qh << 2).to(torch.int8) + + return (dl * q).reshape((n_blocks, QK_K)) + + +def dequantize_blocks_Q2_K(blocks, block_size, type_size, dtype=None): + n_blocks = blocks.shape[0] + + scales, qs, d, dmin = split_block_dims(blocks, QK_K // 16, QK_K // 4, 2) + d = d.view(torch.float16).to(dtype) + dmin = dmin.view(torch.float16).to(dtype) + + # (n_blocks, 16, 1) + dl = (d * (scales & 0xF)).reshape((n_blocks, QK_K // 16, 1)) + ml = (dmin * (scales >> 4)).reshape((n_blocks, QK_K // 16, 1)) + + shift = torch.tensor([0, 2, 4, 6], device=d.device, dtype=torch.uint8).reshape((1, 1, 4, 1)) + + qs = (qs.reshape((n_blocks, -1, 1, 32)) >> shift) & 3 + qs = qs.reshape((n_blocks, QK_K // 16, 16)) + qs = dl * qs - ml + + return qs.reshape((n_blocks, -1)) + + +def dequantize_blocks_BF16(blocks, block_size, type_size, dtype=None): + return (blocks.view(torch.int16).to(torch.int32) << 16).view(torch.float32) + + +# this part from calcuis (gguf.org) +# more info: https://github.com/calcuis/gguf-connector/blob/main/src/gguf_connector/quant2c.py + + +def dequantize_blocks_IQ4_NL(blocks, block_size, type_size, dtype=None): + kvalues = torch.tensor( + [-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113], + dtype=torch.float32, + device=blocks.device, + ) + n_blocks = blocks.shape[0] + d, qs = split_block_dims(blocks, 2) + d = d.view(torch.float16).to(dtype) + qs = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor( + [0, 4], device=blocks.device, dtype=torch.uint8 + ).reshape((1, 1, 2, 1)) + qs = (qs & 15).reshape((n_blocks, -1)).to(torch.int64) + kvalues = kvalues.view(1, 1, 16) + qs = qs.unsqueeze(-1) + qs = torch.gather(kvalues.expand(qs.shape[0], qs.shape[1], 16), 2, qs) + qs = qs.squeeze(-1).to(dtype) + return d * qs + + +def dequantize_blocks_IQ4_XS(blocks, block_size, type_size, dtype=None): + kvalues = torch.tensor( + [-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113], + dtype=torch.float32, + device=blocks.device, + ) + n_blocks = blocks.shape[0] + d, scales_h, scales_l, qs = split_block_dims(blocks, 2, 2, QK_K // 64) + d = d.view(torch.float16).to(dtype) + scales_h = scales_h.view(torch.int16) + scales_l = scales_l.reshape((n_blocks, -1, 1)) >> torch.tensor( + [0, 4], device=blocks.device, dtype=torch.uint8 + ).reshape((1, 1, 2)) + scales_h = scales_h.reshape((n_blocks, 1, -1)) >> torch.tensor( + [2 * i for i in range(QK_K // 32)], device=blocks.device, dtype=torch.uint8 + ).reshape((1, -1, 1)) + scales_l = scales_l.reshape((n_blocks, -1)) & 0x0F + scales_h = scales_h.reshape((n_blocks, -1)) & 0x03 + scales = (scales_l | (scales_h << 4)) - 32 + dl = (d * scales.to(dtype)).reshape((n_blocks, -1, 1)) + shifts_q = torch.tensor([0, 4], device=blocks.device, dtype=torch.uint8).reshape(1, 1, 2, 1) + qs = qs.reshape((n_blocks, -1, 1, 16)) >> shifts_q + qs = (qs & 15).reshape((n_blocks, -1, 32)).to(torch.int64) + kvalues = kvalues.view(1, 1, 1, 16) + qs = qs.unsqueeze(-1) + qs = torch.gather(kvalues.expand(qs.shape[0], qs.shape[1], qs.shape[2], 16), 3, qs) + qs = qs.squeeze(-1).to(dtype) + return (dl * qs).reshape(n_blocks, -1) + + +GGML_QUANT_SIZES = gguf.GGML_QUANT_SIZES +dequantize_functions = { + gguf.GGMLQuantizationType.IQ4_NL: dequantize_blocks_IQ4_NL, + gguf.GGMLQuantizationType.IQ4_XS: dequantize_blocks_IQ4_XS, + gguf.GGMLQuantizationType.BF16: dequantize_blocks_BF16, + gguf.GGMLQuantizationType.Q8_0: dequantize_blocks_Q8_0, + gguf.GGMLQuantizationType.Q5_1: dequantize_blocks_Q5_1, + gguf.GGMLQuantizationType.Q5_0: dequantize_blocks_Q5_0, + gguf.GGMLQuantizationType.Q4_1: dequantize_blocks_Q4_1, + gguf.GGMLQuantizationType.Q4_0: dequantize_blocks_Q4_0, + gguf.GGMLQuantizationType.Q6_K: dequantize_blocks_Q6_K, + gguf.GGMLQuantizationType.Q5_K: dequantize_blocks_Q5_K, + gguf.GGMLQuantizationType.Q4_K: dequantize_blocks_Q4_K, + gguf.GGMLQuantizationType.Q3_K: dequantize_blocks_Q3_K, + gguf.GGMLQuantizationType.Q2_K: dequantize_blocks_Q2_K, +} +SUPPORTED_GGUF_QUANT_TYPES = list(dequantize_functions.keys()) + + +def _quant_shape_from_byte_shape(shape, type_size, block_size): + return (*shape[:-1], shape[-1] // type_size * block_size) + + +def dequantize_gguf_tensor(tensor): + if not hasattr(tensor, "quant_type"): + return tensor + + quant_type = tensor.quant_type + dequant_fn = dequantize_functions[quant_type] + + block_size, type_size = GGML_QUANT_SIZES[quant_type] + + # Conver to plain tensor to avoid unnecessary __torch_function__ overhead. + tensor = tensor.as_tensor() + + tensor = tensor.view(torch.uint8) + shape = _quant_shape_from_byte_shape(tensor.shape, type_size, block_size) + + n_blocks = tensor.numel() // type_size + blocks = tensor.reshape((n_blocks, type_size)) + + dequant = dequant_fn(blocks, block_size, type_size) + dequant = dequant.reshape(shape) + + return dequant + + +class GGUFParameter(torch.nn.Parameter): + def __new__(cls, data, requires_grad=False, quant_type=None): + data = data if data is not None else torch.empty(0) + self = torch.Tensor._make_subclass(cls, data, requires_grad) + self.quant_type = quant_type + block_size, type_size = GGML_QUANT_SIZES[quant_type] + self.quant_shape = _quant_shape_from_byte_shape(self.shape, type_size, block_size) + + return self + + def as_tensor(self): + return torch.Tensor._make_subclass(torch.Tensor, self, self.requires_grad) + + @staticmethod + def _extract_quant_type(args): + # When converting from original format checkpoints we often use splits, cats etc on tensors + # this method ensures that the returned tensor type from those operations remains GGUFParameter + # so that we preserve quant_type information + for arg in args: + if isinstance(arg, list) and isinstance(arg[0], GGUFParameter): + return arg[0].quant_type + if isinstance(arg, GGUFParameter): + return arg.quant_type + return None + + @classmethod + def __torch_function__(cls, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + + result = super().__torch_function__(func, types, args, kwargs) + + if isinstance(result, torch.Tensor): + quant_type = cls._extract_quant_type(args) + return cls(result, quant_type=quant_type) + # Handle tuples and lists + elif type(result) in (list, tuple): + # Preserve the original type (tuple or list) + quant_type = cls._extract_quant_type(args) + wrapped = [cls(x, quant_type=quant_type) if isinstance(x, torch.Tensor) else x for x in result] + return type(result)(wrapped) + else: + return result + + +class GGUFLinear(nn.Linear): + def __init__( + self, + in_features, + out_features, + bias=False, + compute_dtype=None, + device=None, + ) -> None: + super().__init__(in_features, out_features, bias, device) + self.compute_dtype = compute_dtype + self.device = device + + def forward(self, inputs: torch.Tensor): + if ops is not None and self.weight.is_cuda and inputs.is_cuda: + return self.forward_cuda(inputs) + return self.forward_native(inputs) + + def forward_native(self, inputs: torch.Tensor): + weight = dequantize_gguf_tensor(self.weight) + weight = weight.to(self.compute_dtype) + bias = self.bias.to(self.compute_dtype) if self.bias is not None else None + + output = torch.nn.functional.linear(inputs, weight, bias) + return output + + def forward_cuda(self, inputs: torch.Tensor): + quant_type = self.weight.quant_type + output = _fused_mul_mat_gguf(inputs.to(self.compute_dtype), self.weight, quant_type) + if self.bias is not None: + output += self.bias.to(self.compute_dtype) + return output diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ae0951cb30d140011f6b3595036f3ca225b423f4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__init__.py @@ -0,0 +1 @@ +from .modelopt_quantizer import NVIDIAModelOptQuantizer diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..93fcd06486302b27d488c615f2a1c69f732f9346 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__pycache__/modelopt_quantizer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__pycache__/modelopt_quantizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..02b2a9d647728aa91314de1c5ea23d345914ff23 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/__pycache__/modelopt_quantizer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/modelopt_quantizer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/modelopt_quantizer.py new file mode 100644 index 0000000000000000000000000000000000000000..5c9e198c82b2a1fda338cda60cf80a9adbb71c1e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/modelopt/modelopt_quantizer.py @@ -0,0 +1,190 @@ +from typing import TYPE_CHECKING, Any + +from ...utils import ( + get_module_from_name, + is_accelerate_available, + is_nvidia_modelopt_available, + is_torch_available, + logging, +) +from ..base import DiffusersQuantizer + + +if TYPE_CHECKING: + from ...models.modeling_utils import ModelMixin + + +if is_torch_available(): + import torch + import torch.nn as nn + +if is_accelerate_available(): + from accelerate.utils import set_module_tensor_to_device + + +logger = logging.get_logger(__name__) + + +class NVIDIAModelOptQuantizer(DiffusersQuantizer): + r""" + Diffusers Quantizer for Nvidia-Model Optimizer + """ + + use_keep_in_fp32_modules = True + requires_calibration = False + required_packages = ["nvidia_modelopt"] + + def __init__(self, quantization_config, **kwargs): + super().__init__(quantization_config, **kwargs) + + def validate_environment(self, *args, **kwargs): + if not is_nvidia_modelopt_available(): + raise ImportError( + "Loading an nvidia-modelopt quantized model requires nvidia-modelopt library (`pip install nvidia-modelopt`)" + ) + + self.offload = False + + device_map = kwargs.get("device_map", None) + if isinstance(device_map, dict): + if "cpu" in device_map.values() or "disk" in device_map.values(): + if self.pre_quantized: + raise ValueError( + "You are attempting to perform cpu/disk offload with a pre-quantized modelopt model " + "This is not supported yet. Please remove the CPU or disk device from the `device_map` argument." + ) + else: + self.offload = True + + def check_if_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + state_dict: dict[str, Any], + **kwargs, + ): + # ModelOpt imports diffusers internally. This is here to prevent circular imports + from modelopt.torch.quantization.utils import is_quantized + + module, tensor_name = get_module_from_name(model, param_name) + if self.pre_quantized: + return True + elif is_quantized(module) and "weight" in tensor_name: + return True + return False + + def create_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + target_device: "torch.device", + *args, + **kwargs, + ): + """ + Create the quantized parameter by calling .calibrate() after setting it to the module. + """ + # ModelOpt imports diffusers internally. This is here to prevent circular imports + import modelopt.torch.quantization as mtq + + dtype = kwargs.get("dtype", torch.float32) + module, tensor_name = get_module_from_name(model, param_name) + if self.pre_quantized: + module._parameters[tensor_name] = torch.nn.Parameter(param_value.to(device=target_device)) + else: + set_module_tensor_to_device(model, param_name, target_device, param_value, dtype) + mtq.calibrate( + module, self.quantization_config.modelopt_config["algorithm"], self.quantization_config.forward_loop + ) + mtq.compress(module) + module.weight.requires_grad = False + + def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]: + max_memory = {key: val * 0.90 for key, val in max_memory.items()} + return max_memory + + def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype": + if self.quantization_config.quant_type == "FP8": + target_dtype = torch.float8_e4m3fn + return target_dtype + + def update_torch_dtype(self, torch_dtype: "torch.dtype" = None) -> "torch.dtype": + if torch_dtype is None: + logger.info("You did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.") + torch_dtype = torch.float32 + return torch_dtype + + def get_conv_param_names(self, model: "ModelMixin") -> list[str]: + """ + Get parameter names for all convolutional layers in a HuggingFace ModelMixin. Includes Conv1d/2d/3d and + ConvTranspose1d/2d/3d. + """ + conv_types = ( + nn.Conv1d, + nn.Conv2d, + nn.Conv3d, + nn.ConvTranspose1d, + nn.ConvTranspose2d, + nn.ConvTranspose3d, + ) + + conv_param_names = [] + for name, module in model.named_modules(): + if isinstance(module, conv_types): + for param_name, _ in module.named_parameters(recurse=False): + conv_param_names.append(f"{name}.{param_name}") + + return conv_param_names + + def _process_model_before_weight_loading( + self, + model: "ModelMixin", + device_map, + keep_in_fp32_modules: list[str] = [], + **kwargs, + ): + # ModelOpt imports diffusers internally. This is here to prevent circular imports + import modelopt.torch.opt as mto + + if self.pre_quantized: + return + + modules_to_not_convert = self.quantization_config.modules_to_not_convert + + if modules_to_not_convert is None: + modules_to_not_convert = [] + if isinstance(modules_to_not_convert, str): + modules_to_not_convert = [modules_to_not_convert] + modules_to_not_convert.extend(keep_in_fp32_modules) + if self.quantization_config.disable_conv_quantization: + modules_to_not_convert.extend(self.get_conv_param_names(model)) + + for module in modules_to_not_convert: + self.quantization_config.modelopt_config["quant_cfg"]["*" + module + "*"] = {"enable": False} + self.quantization_config.modules_to_not_convert = modules_to_not_convert + mto.apply_mode(model, mode=[("quantize", self.quantization_config.modelopt_config)]) + model.config.quantization_config = self.quantization_config + + def _process_model_after_weight_loading(self, model, **kwargs): + # ModelOpt imports diffusers internally. This is here to prevent circular imports + from modelopt.torch.opt import ModeloptStateManager + + if self.pre_quantized: + return model + + for _, m in model.named_modules(): + if hasattr(m, ModeloptStateManager._state_key) and m is not model: + ModeloptStateManager.remove_state(m) + + return model + + @property + def is_trainable(self): + return True + + @property + def is_serializable(self): + self.quantization_config.check_model_patching(operation="saving") + return True diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/pipe_quant_config.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/pipe_quant_config.py new file mode 100644 index 0000000000000000000000000000000000000000..90be7d7964387651e80e48c1e1c25a4c7f625c66 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/pipe_quant_config.py @@ -0,0 +1,206 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import inspect + +from ..utils import is_transformers_available, logging +from .quantization_config import QuantizationConfigMixin as DiffQuantConfigMixin + + +try: + from transformers.utils.quantization_config import QuantizationConfigMixin as TransformersQuantConfigMixin +except ImportError: + + class TransformersQuantConfigMixin: + pass + + +logger = logging.get_logger(__name__) + + +class PipelineQuantizationConfig: + """ + Configuration class to be used when applying quantization on-the-fly to [`~DiffusionPipeline.from_pretrained`]. + + Args: + quant_backend (`str`): Quantization backend to be used. When using this option, we assume that the backend + is available to both `diffusers` and `transformers`. + quant_kwargs (`dict`): Params to initialize the quantization backend class. + components_to_quantize (`list`): Components of a pipeline to be quantized. + quant_mapping (`dict`): Mapping defining the quantization specs to be used for the pipeline + components. When using this argument, users are not expected to provide `quant_backend`, `quant_kawargs`, + and `components_to_quantize`. + """ + + def __init__( + self, + quant_backend: str = None, + quant_kwargs: dict[str, str | float | int | dict] = None, + components_to_quantize: list[str] | str | None = None, + quant_mapping: dict[str, DiffQuantConfigMixin | "TransformersQuantConfigMixin"] = None, + ): + self.quant_backend = quant_backend + # Initialize kwargs to be {} to set to the defaults. + self.quant_kwargs = quant_kwargs or {} + if components_to_quantize: + if isinstance(components_to_quantize, str): + components_to_quantize = [components_to_quantize] + self.components_to_quantize = components_to_quantize + self.quant_mapping = quant_mapping + self.config_mapping = {} # book-keeping Example: `{module_name: quant_config}` + self.post_init() + + def post_init(self): + quant_mapping = self.quant_mapping + self.is_granular = True if quant_mapping is not None else False + + self._validate_init_args() + + def _validate_init_args(self): + if self.quant_backend and self.quant_mapping: + raise ValueError("Both `quant_backend` and `quant_mapping` cannot be specified at the same time.") + + if not self.quant_mapping and not self.quant_backend: + raise ValueError("Must provide a `quant_backend` when not providing a `quant_mapping`.") + + if not self.quant_kwargs and not self.quant_mapping: + raise ValueError("Both `quant_kwargs` and `quant_mapping` cannot be None.") + + if self.quant_backend is not None: + self._validate_init_kwargs_in_backends() + + if self.quant_mapping is not None: + self._validate_quant_mapping_args() + + def _validate_init_kwargs_in_backends(self): + quant_backend = self.quant_backend + + self._check_backend_availability(quant_backend) + + quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list() + + if quant_config_mapping_transformers is not None: + init_kwargs_transformers = inspect.signature(quant_config_mapping_transformers[quant_backend].__init__) + init_kwargs_transformers = {name for name in init_kwargs_transformers.parameters if name != "self"} + else: + init_kwargs_transformers = None + + init_kwargs_diffusers = inspect.signature(quant_config_mapping_diffusers[quant_backend].__init__) + init_kwargs_diffusers = {name for name in init_kwargs_diffusers.parameters if name != "self"} + + if init_kwargs_transformers != init_kwargs_diffusers: + raise ValueError( + "The signatures of the __init__ methods of the quantization config classes in `diffusers` and `transformers` don't match. " + f"Please provide a `quant_mapping` instead, in the {self.__class__.__name__} class. Refer to [the docs](https://huggingface.co/docs/diffusers/main/en/quantization/overview#pipeline-level-quantization) to learn more about how " + "this mapping would look like." + ) + + def _validate_quant_mapping_args(self): + quant_mapping = self.quant_mapping + transformers_map, diffusers_map = self._get_quant_config_list() + + available_transformers = list(transformers_map.values()) if transformers_map else None + available_diffusers = list(diffusers_map.values()) + + for module_name, config in quant_mapping.items(): + if any(isinstance(config, cfg) for cfg in available_diffusers): + continue + + if available_transformers and any(isinstance(config, cfg) for cfg in available_transformers): + continue + + if available_transformers: + raise ValueError( + f"Provided config for module_name={module_name} could not be found. " + f"Available diffusers configs: {available_diffusers}; " + f"Available transformers configs: {available_transformers}." + ) + else: + raise ValueError( + f"Provided config for module_name={module_name} could not be found. " + f"Available diffusers configs: {available_diffusers}." + ) + + def _check_backend_availability(self, quant_backend: str): + quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list() + + available_backends_transformers = ( + list(quant_config_mapping_transformers.keys()) if quant_config_mapping_transformers else None + ) + available_backends_diffusers = list(quant_config_mapping_diffusers.keys()) + + if ( + available_backends_transformers and quant_backend not in available_backends_transformers + ) or quant_backend not in quant_config_mapping_diffusers: + error_message = f"Provided quant_backend={quant_backend} was not found." + if available_backends_transformers: + error_message += f"\nAvailable ones (transformers): {available_backends_transformers}." + error_message += f"\nAvailable ones (diffusers): {available_backends_diffusers}." + raise ValueError(error_message) + + def _resolve_quant_config(self, is_diffusers: bool = True, module_name: str = None): + quant_config_mapping_transformers, quant_config_mapping_diffusers = self._get_quant_config_list() + + quant_mapping = self.quant_mapping + components_to_quantize = self.components_to_quantize + + # Granular case + if self.is_granular and module_name in quant_mapping: + logger.debug(f"Initializing quantization config class for {module_name}.") + config = quant_mapping[module_name] + self.config_mapping.update({module_name: config}) + return config + + # Global config case + else: + should_quantize = False + # Only quantize the modules requested for. + if components_to_quantize and module_name in components_to_quantize: + should_quantize = True + # No specification for `components_to_quantize` means all modules should be quantized. + elif not self.is_granular and not components_to_quantize: + should_quantize = True + + if should_quantize: + logger.debug(f"Initializing quantization config class for {module_name}.") + mapping_to_use = quant_config_mapping_diffusers if is_diffusers else quant_config_mapping_transformers + quant_config_cls = mapping_to_use[self.quant_backend] + quant_kwargs = self.quant_kwargs + quant_obj = quant_config_cls(**quant_kwargs) + self.config_mapping.update({module_name: quant_obj}) + return quant_obj + + # Fallback: no applicable configuration found. + return None + + def _get_quant_config_list(self): + if is_transformers_available(): + from transformers.quantizers.auto import ( + AUTO_QUANTIZATION_CONFIG_MAPPING as quant_config_mapping_transformers, + ) + else: + quant_config_mapping_transformers = None + + from ..quantizers.auto import AUTO_QUANTIZATION_CONFIG_MAPPING as quant_config_mapping_diffusers + + return quant_config_mapping_transformers, quant_config_mapping_diffusers + + def __repr__(self): + out = "" + config_mapping = dict(sorted(self.config_mapping.copy().items())) + for module_name, config in config_mapping.items(): + out += f"{module_name} {config}" + return out diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quantization_config.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quantization_config.py new file mode 100644 index 0000000000000000000000000000000000000000..138ec7b7e9891ae8099c81bf20b9161c33bc3939 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quantization_config.py @@ -0,0 +1,1050 @@ +#!/usr/bin/env python +# coding=utf-8 + +# Copyright 2023 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Adapted from +https://github.com/huggingface/transformers/blob/52cb4034ada381fe1ffe8d428a1076e5411a8026/src/transformers/utils/quantization_config.py +""" + +from __future__ import annotations + +import copy +import dataclasses +import importlib.metadata +import inspect +import json +import os +import warnings +from dataclasses import dataclass, is_dataclass +from enum import Enum +from functools import partial +from typing import Any, Callable + +from packaging import version + +from ..utils import is_torch_available, is_torchao_available, is_torchao_version, logging + + +if is_torch_available(): + import torch + +logger = logging.get_logger(__name__) + + +class QuantizationMethod(str, Enum): + BITS_AND_BYTES = "bitsandbytes" + GGUF = "gguf" + TORCHAO = "torchao" + QUANTO = "quanto" + MODELOPT = "modelopt" + + +if is_torchao_available(): + from torchao.quantization.quant_primitives import MappingType + + class TorchAoJSONEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, MappingType): + return obj.name + return super().default(obj) + + +@dataclass +class QuantizationConfigMixin: + """ + Mixin class for quantization config + """ + + quant_method: QuantizationMethod + _exclude_attributes_at_init = [] + + @classmethod + def from_dict(cls, config_dict, return_unused_kwargs=False, **kwargs): + """ + Instantiates a [`QuantizationConfigMixin`] from a Python dictionary of parameters. + + Args: + config_dict (`dict[str, Any]`): + Dictionary that will be used to instantiate the configuration object. + return_unused_kwargs (`bool`, *optional*, defaults to `False`): + Whether or not to return a list of unused keyword arguments. Used for `from_pretrained` method in + `PreTrainedModel`. + kwargs (`dict[str, Any]`): + Additional parameters from which to initialize the configuration object. + + Returns: + [`QuantizationConfigMixin`]: The configuration object instantiated from those parameters. + """ + + config = cls(**config_dict) + + to_remove = [] + for key, value in kwargs.items(): + if hasattr(config, key): + setattr(config, key, value) + to_remove.append(key) + for key in to_remove: + kwargs.pop(key, None) + + if return_unused_kwargs: + return config, kwargs + else: + return config + + def to_json_file(self, json_file_path: str | os.PathLike): + """ + Save this instance to a JSON file. + + Args: + json_file_path (`str` or `os.PathLike`): + Path to the JSON file in which this configuration instance's parameters will be saved. + use_diff (`bool`, *optional*, defaults to `True`): + If set to `True`, only the difference between the config instance and the default + `QuantizationConfig()` is serialized to JSON file. + """ + with open(json_file_path, "w", encoding="utf-8") as writer: + config_dict = self.to_dict() + json_string = json.dumps(config_dict, indent=2, sort_keys=True) + "\n" + + writer.write(json_string) + + def to_dict(self) -> dict[str, Any]: + """ + Serializes this instance to a Python dictionary. Returns: + `dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance. + """ + return copy.deepcopy(self.__dict__) + + def __iter__(self): + """allows `dict(obj)` for situations where obj may be a dict or QuantizationConfigMixin""" + for attr, value in copy.deepcopy(self.__dict__).items(): + yield attr, value + + def __repr__(self): + return f"{self.__class__.__name__} {self.to_json_string()}" + + def to_json_string(self, use_diff: bool = True) -> str: + """ + Serializes this instance to a JSON string. + + Args: + use_diff (`bool`, *optional*, defaults to `True`): + If set to `True`, only the difference between the config instance and the default `PretrainedConfig()` + is serialized to JSON string. + + Returns: + `str`: String containing all the attributes that make up this configuration instance in JSON format. + """ + if use_diff is True: + config_dict = self.to_diff_dict() + else: + config_dict = self.to_dict() + return json.dumps(config_dict, indent=2, sort_keys=True) + "\n" + + def update(self, **kwargs): + """ + Updates attributes of this class instance with attributes from `kwargs` if they match existing attributes, + returning all the unused kwargs. + + Args: + kwargs (`dict[str, Any]`): + Dictionary of attributes to tentatively update this class. + + Returns: + `dict[str, Any]`: Dictionary containing all the key-value pairs that were not used to update the instance. + """ + to_remove = [] + for key, value in kwargs.items(): + if hasattr(self, key): + setattr(self, key, value) + to_remove.append(key) + + # Remove all the attributes that were updated, without modifying the input dict + unused_kwargs = {key: value for key, value in kwargs.items() if key not in to_remove} + return unused_kwargs + + +@dataclass +class BitsAndBytesConfig(QuantizationConfigMixin): + """ + This is a wrapper class about all possible attributes and features that you can play with a model that has been + loaded using `bitsandbytes`. + + This replaces `load_in_8bit` or `load_in_4bit` therefore both options are mutually exclusive. + + Currently only supports `LLM.int8()`, `FP4`, and `NF4` quantization. If more methods are added to `bitsandbytes`, + then more arguments will be added to this class. + + Args: + load_in_8bit (`bool`, *optional*, defaults to `False`): + This flag is used to enable 8-bit quantization with LLM.int8(). + load_in_4bit (`bool`, *optional*, defaults to `False`): + This flag is used to enable 4-bit quantization by replacing the Linear layers with FP4/NF4 layers from + `bitsandbytes`. + llm_int8_threshold (`float`, *optional*, defaults to 6.0): + This corresponds to the outlier threshold for outlier detection as described in `LLM.int8() : 8-bit Matrix + Multiplication for Transformers at Scale` paper: https://huggingface.co/papers/2208.07339 Any hidden states + value that is above this threshold will be considered an outlier and the operation on those values will be + done in fp16. Values are usually normally distributed, that is, most values are in the range [-3.5, 3.5], + but there are some exceptional systematic outliers that are very differently distributed for large models. + These outliers are often in the interval [-60, -6] or [6, 60]. Int8 quantization works well for values of + magnitude ~5, but beyond that, there is a significant performance penalty. A good default threshold is 6, + but a lower threshold might be needed for more unstable models (small models, fine-tuning). + llm_int8_skip_modules (`list[str]`, *optional*): + An explicit list of the modules that we do not want to convert in 8-bit. This is useful for models such as + Jukebox that has several heads in different places and not necessarily at the last position. For example + for `CausalLM` models, the last `lm_head` is typically kept in its original `dtype`. + llm_int8_enable_fp32_cpu_offload (`bool`, *optional*, defaults to `False`): + This flag is used for advanced use cases and users that are aware of this feature. If you want to split + your model in different parts and run some parts in int8 on GPU and some parts in fp32 on CPU, you can use + this flag. This is useful for offloading large models such as `google/flan-t5-xxl`. Note that the int8 + operations will not be run on CPU. + llm_int8_has_fp16_weight (`bool`, *optional*, defaults to `False`): + This flag runs LLM.int8() with 16-bit main weights. This is useful for fine-tuning as the weights do not + have to be converted back and forth for the backward pass. + bnb_4bit_compute_dtype (`torch.dtype` or str, *optional*, defaults to `torch.float32`): + This sets the computational type which might be different than the input type. For example, inputs might be + fp32, but computation can be set to bf16 for speedups. + bnb_4bit_quant_type (`str`, *optional*, defaults to `"fp4"`): + This sets the quantization data type in the bnb.nn.Linear4Bit layers. Options are FP4 and NF4 data types + which are specified by `fp4` or `nf4`. + bnb_4bit_use_double_quant (`bool`, *optional*, defaults to `False`): + This flag is used for nested quantization where the quantization constants from the first quantization are + quantized again. + bnb_4bit_quant_storage (`torch.dtype` or str, *optional*, defaults to `torch.uint8`): + This sets the storage type to pack the quanitzed 4-bit prarams. + kwargs (`dict[str, Any]`, *optional*): + Additional parameters from which to initialize the configuration object. + """ + + _exclude_attributes_at_init = ["_load_in_4bit", "_load_in_8bit", "quant_method"] + + def __init__( + self, + load_in_8bit=False, + load_in_4bit=False, + llm_int8_threshold=6.0, + llm_int8_skip_modules=None, + llm_int8_enable_fp32_cpu_offload=False, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=None, + bnb_4bit_quant_type="fp4", + bnb_4bit_use_double_quant=False, + bnb_4bit_quant_storage=None, + **kwargs, + ): + self.quant_method = QuantizationMethod.BITS_AND_BYTES + + if load_in_4bit and load_in_8bit: + raise ValueError("load_in_4bit and load_in_8bit are both True, but only one can be used at the same time") + + self._load_in_8bit = load_in_8bit + self._load_in_4bit = load_in_4bit + self.llm_int8_threshold = llm_int8_threshold + self.llm_int8_skip_modules = llm_int8_skip_modules + self.llm_int8_enable_fp32_cpu_offload = llm_int8_enable_fp32_cpu_offload + self.llm_int8_has_fp16_weight = llm_int8_has_fp16_weight + self.bnb_4bit_quant_type = bnb_4bit_quant_type + self.bnb_4bit_use_double_quant = bnb_4bit_use_double_quant + + if bnb_4bit_compute_dtype is None: + self.bnb_4bit_compute_dtype = torch.float32 + elif isinstance(bnb_4bit_compute_dtype, str): + self.bnb_4bit_compute_dtype = getattr(torch, bnb_4bit_compute_dtype) + elif isinstance(bnb_4bit_compute_dtype, torch.dtype): + self.bnb_4bit_compute_dtype = bnb_4bit_compute_dtype + else: + raise ValueError("bnb_4bit_compute_dtype must be a string or a torch.dtype") + + if bnb_4bit_quant_storage is None: + self.bnb_4bit_quant_storage = torch.uint8 + elif isinstance(bnb_4bit_quant_storage, str): + if bnb_4bit_quant_storage not in [ + "float16", + "float32", + "int8", + "uint8", + "float64", + "bfloat16", + ]: + raise ValueError( + "`bnb_4bit_quant_storage` must be a valid string (one of 'float16', 'float32', 'int8', 'uint8', 'float64', 'bfloat16') " + ) + self.bnb_4bit_quant_storage = getattr(torch, bnb_4bit_quant_storage) + elif isinstance(bnb_4bit_quant_storage, torch.dtype): + self.bnb_4bit_quant_storage = bnb_4bit_quant_storage + else: + raise ValueError("bnb_4bit_quant_storage must be a string or a torch.dtype") + + if kwargs and not all(k in self._exclude_attributes_at_init for k in kwargs): + logger.warning(f"Unused kwargs: {list(kwargs.keys())}. These kwargs are not used in {self.__class__}.") + + self.post_init() + + @property + def load_in_4bit(self): + return self._load_in_4bit + + @load_in_4bit.setter + def load_in_4bit(self, value: bool): + if not isinstance(value, bool): + raise TypeError("load_in_4bit must be a boolean") + + if self.load_in_8bit and value: + raise ValueError("load_in_4bit and load_in_8bit are both True, but only one can be used at the same time") + self._load_in_4bit = value + + @property + def load_in_8bit(self): + return self._load_in_8bit + + @load_in_8bit.setter + def load_in_8bit(self, value: bool): + if not isinstance(value, bool): + raise TypeError("load_in_8bit must be a boolean") + + if self.load_in_4bit and value: + raise ValueError("load_in_4bit and load_in_8bit are both True, but only one can be used at the same time") + self._load_in_8bit = value + + def post_init(self): + r""" + Safety checker that arguments are correct - also replaces some NoneType arguments with their default values. + """ + if not isinstance(self.load_in_4bit, bool): + raise TypeError("load_in_4bit must be a boolean") + + if not isinstance(self.load_in_8bit, bool): + raise TypeError("load_in_8bit must be a boolean") + + if not isinstance(self.llm_int8_threshold, float): + raise TypeError("llm_int8_threshold must be a float") + + if self.llm_int8_skip_modules is not None and not isinstance(self.llm_int8_skip_modules, list): + raise TypeError("llm_int8_skip_modules must be a list of strings") + if not isinstance(self.llm_int8_enable_fp32_cpu_offload, bool): + raise TypeError("llm_int8_enable_fp32_cpu_offload must be a boolean") + + if not isinstance(self.llm_int8_has_fp16_weight, bool): + raise TypeError("llm_int8_has_fp16_weight must be a boolean") + + if self.bnb_4bit_compute_dtype is not None and not isinstance(self.bnb_4bit_compute_dtype, torch.dtype): + raise TypeError("bnb_4bit_compute_dtype must be torch.dtype") + + if not isinstance(self.bnb_4bit_quant_type, str): + raise TypeError("bnb_4bit_quant_type must be a string") + + if not isinstance(self.bnb_4bit_use_double_quant, bool): + raise TypeError("bnb_4bit_use_double_quant must be a boolean") + + if self.load_in_4bit and not version.parse(importlib.metadata.version("bitsandbytes")) >= version.parse( + "0.39.0" + ): + raise ValueError( + "4 bit quantization requires bitsandbytes>=0.39.0 - please upgrade your bitsandbytes version" + ) + + def is_quantizable(self): + r""" + Returns `True` if the model is quantizable, `False` otherwise. + """ + return self.load_in_8bit or self.load_in_4bit + + def quantization_method(self): + r""" + This method returns the quantization method used for the model. If the model is not quantizable, it returns + `None`. + """ + if self.load_in_8bit: + return "llm_int8" + elif self.load_in_4bit and self.bnb_4bit_quant_type == "fp4": + return "fp4" + elif self.load_in_4bit and self.bnb_4bit_quant_type == "nf4": + return "nf4" + else: + return None + + def to_dict(self) -> dict[str, Any]: + """ + Serializes this instance to a Python dictionary. Returns: + `dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance. + """ + output = copy.deepcopy(self.__dict__) + output["bnb_4bit_compute_dtype"] = str(output["bnb_4bit_compute_dtype"]).split(".")[1] + output["bnb_4bit_quant_storage"] = str(output["bnb_4bit_quant_storage"]).split(".")[1] + output["load_in_4bit"] = self.load_in_4bit + output["load_in_8bit"] = self.load_in_8bit + + return output + + def __repr__(self): + config_dict = self.to_dict() + return f"{self.__class__.__name__} {json.dumps(config_dict, indent=2, sort_keys=True)}\n" + + def to_diff_dict(self) -> dict[str, Any]: + """ + Removes all attributes from config which correspond to the default config attributes for better readability and + serializes to a Python dictionary. + + Returns: + `dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance, + """ + config_dict = self.to_dict() + + # get the default config dict + default_config_dict = BitsAndBytesConfig().to_dict() + + serializable_config_dict = {} + + # only serialize values that differ from the default config + for key, value in config_dict.items(): + if value != default_config_dict[key]: + serializable_config_dict[key] = value + + return serializable_config_dict + + +@dataclass +class GGUFQuantizationConfig(QuantizationConfigMixin): + """This is a config class for GGUF Quantization techniques. + + Args: + compute_dtype: (`torch.dtype`, defaults to `torch.float32`): + This sets the computational type which might be different than the input type. For example, inputs might be + fp32, but computation can be set to bf16 for speedups. + + """ + + def __init__(self, compute_dtype: "torch.dtype" | None = None): + self.quant_method = QuantizationMethod.GGUF + self.compute_dtype = compute_dtype + self.pre_quantized = True + + # TODO: (Dhruv) Add this as an init argument when we can support loading unquantized checkpoints. + self.modules_to_not_convert = None + + if self.compute_dtype is None: + self.compute_dtype = torch.float32 + + +@dataclass +class TorchAoConfig(QuantizationConfigMixin): + """This is a config class for torchao quantization/sparsity techniques. + + Args: + quant_type (`str` | AOBaseConfig): + The type of quantization we want to use, currently supporting: + - **Integer quantization:** + - Full function names: `int4_weight_only`, `int8_dynamic_activation_int4_weight`, + `int8_weight_only`, `int8_dynamic_activation_int8_weight` + - Shorthands: `int4wo`, `int4dq`, `int8wo`, `int8dq` + + - **Floating point 8-bit quantization:** + - Full function names: `float8_weight_only`, `float8_dynamic_activation_float8_weight`, + `float8_static_activation_float8_weight` + - Shorthands: `float8wo`, `float8wo_e5m2`, `float8wo_e4m3`, `float8dq`, `float8dq_e4m3`, + `float8_e4m3_tensor`, `float8_e4m3_row`, + + - **Floating point X-bit quantization:** (in torchao <= 0.14.1, not supported in torchao >= 0.15.0) + - Full function names: `fpx_weight_only` + - Shorthands: `fpX_eAwB`, where `X` is the number of bits (between `1` to `7`), `A` is the number + of exponent bits and `B` is the number of mantissa bits. The constraint of `X == A + B + 1` must + be satisfied for a given shorthand notation. + + - **Unsigned Integer quantization:** + - Full function names: `uintx_weight_only` + - Shorthands: `uint1wo`, `uint2wo`, `uint3wo`, `uint4wo`, `uint5wo`, `uint6wo`, `uint7wo` + - An AOBaseConfig instance: for more advanced configuration options. + modules_to_not_convert (`list[str]`, *optional*, default to `None`): + The list of modules to not quantize, useful for quantizing models that explicitly require to have some + modules left in their original precision. + kwargs (`dict[str, Any]`, *optional*): + The keyword arguments for the chosen type of quantization, for example, int4_weight_only quantization + supports two keyword arguments `group_size` and `inner_k_tiles` currently. More API examples and + documentation of arguments can be found in + https://github.com/pytorch/ao/tree/main/torchao/quantization#other-available-quantization-techniques + + Example: + ```python + from diffusers import FluxTransformer2DModel, TorchAoConfig + + # AOBaseConfig-based configuration + from torchao.quantization import Int8WeightOnlyConfig + + quantization_config = TorchAoConfig(Int8WeightOnlyConfig()) + + # String-based config + quantization_config = TorchAoConfig("int8wo") + transformer = FluxTransformer2DModel.from_pretrained( + "black-forest-labs/Flux.1-Dev", + subfolder="transformer", + quantization_config=quantization_config, + torch_dtype=torch.bfloat16, + ) + ``` + """ + + def __init__( + self, + quant_type: str | "AOBaseConfig", # noqa: F821 + modules_to_not_convert: list[str] | None = None, + **kwargs, + ) -> None: + self.quant_method = QuantizationMethod.TORCHAO + self.quant_type = quant_type + self.modules_to_not_convert = modules_to_not_convert + + # When we load from serialized config, "quant_type_kwargs" will be the key + if "quant_type_kwargs" in kwargs: + self.quant_type_kwargs = kwargs["quant_type_kwargs"] + else: + self.quant_type_kwargs = kwargs + + self.post_init() + + def post_init(self): + if not isinstance(self.quant_type, str): + if is_torchao_version("<=", "0.9.0"): + raise ValueError( + f"torchao <= 0.9.0 only supports string quant_type, got {type(self.quant_type).__name__}. " + f"Upgrade to torchao > 0.9.0 to use AOBaseConfig." + ) + + from torchao.quantization.quant_api import AOBaseConfig + + if not isinstance(self.quant_type, AOBaseConfig): + raise TypeError(f"quant_type must be a AOBaseConfig instance, got {type(self.quant_type).__name__}") + + elif isinstance(self.quant_type, str): + TORCHAO_QUANT_TYPE_METHODS = self._get_torchao_quant_type_to_method() + + if self.quant_type not in TORCHAO_QUANT_TYPE_METHODS.keys(): + is_floatx_quant_type = self.quant_type.startswith("fp") + is_float_quant_type = self.quant_type.startswith("float") or is_floatx_quant_type + if is_float_quant_type and not self._is_xpu_or_cuda_capability_atleast_8_9(): + raise ValueError( + f"Requested quantization type: {self.quant_type} is not supported on GPUs with CUDA capability <= 8.9. You " + f"can check the CUDA capability of your GPU using `torch.cuda.get_device_capability()`." + ) + elif is_floatx_quant_type and not is_torchao_version("<=", "0.14.1"): + raise ValueError( + f"Requested quantization type: {self.quant_type} is only supported in torchao <= 0.14.1. " + f"Please downgrade to torchao <= 0.14.1 to use this quantization type." + ) + + raise ValueError( + f"Requested quantization type: {self.quant_type} is not supported or is an incorrect `quant_type` name. If you think the " + f"provided quantization type should be supported, please open an issue at https://github.com/huggingface/diffusers/issues." + ) + + method = TORCHAO_QUANT_TYPE_METHODS[self.quant_type] + signature = inspect.signature(method) + all_kwargs = { + param.name + for param in signature.parameters.values() + if param.kind in [inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD] + } + unsupported_kwargs = list(self.quant_type_kwargs.keys() - all_kwargs) + + if len(unsupported_kwargs) > 0: + raise ValueError( + f'The quantization method "{self.quant_type}" does not support the following keyword arguments: ' + f"{unsupported_kwargs}. The following keywords arguments are supported: {all_kwargs}." + ) + + def to_dict(self): + """Convert configuration to a dictionary.""" + d = super().to_dict() + + if isinstance(self.quant_type, str): + # Handle layout serialization if present + if "quant_type_kwargs" in d and "layout" in d["quant_type_kwargs"]: + if is_dataclass(d["quant_type_kwargs"]["layout"]): + d["quant_type_kwargs"]["layout"] = [ + d["quant_type_kwargs"]["layout"].__class__.__name__, + dataclasses.asdict(d["quant_type_kwargs"]["layout"]), + ] + if isinstance(d["quant_type_kwargs"]["layout"], list): + assert len(d["quant_type_kwargs"]["layout"]) == 2, "layout saves layout name and layout kwargs" + assert isinstance(d["quant_type_kwargs"]["layout"][0], str), "layout name must be a string" + assert isinstance(d["quant_type_kwargs"]["layout"][1], dict), "layout kwargs must be a dict" + else: + raise ValueError("layout must be a list") + else: + # Handle AOBaseConfig serialization + from torchao.core.config import config_to_dict + + # For now we assume there is 1 config per Transformer, however in the future + # We may want to support a config per fqn. + d["quant_type"] = {"default": config_to_dict(self.quant_type)} + + return d + + @classmethod + def from_dict(cls, config_dict, return_unused_kwargs=False, **kwargs): + """Create configuration from a dictionary.""" + if not is_torchao_version(">", "0.9.0"): + raise NotImplementedError("TorchAoConfig requires torchao > 0.9.0 for construction from dict") + config_dict = config_dict.copy() + quant_type = config_dict.pop("quant_type") + + if isinstance(quant_type, str): + return cls(quant_type=quant_type, **config_dict) + # Check if we only have one key which is "default" + # In the future we may update this + assert len(quant_type) == 1 and "default" in quant_type, ( + "Expected only one key 'default' in quant_type dictionary" + ) + quant_type = quant_type["default"] + + # Deserialize quant_type if needed + from torchao.core.config import config_from_dict + + quant_type = config_from_dict(quant_type) + + return cls(quant_type=quant_type, **config_dict) + + @classmethod + def _get_torchao_quant_type_to_method(cls): + r""" + Returns supported torchao quantization types with all commonly used notations. + """ + + if is_torchao_available(): + # TODO(aryan): Support sparsify + from torchao.quantization import ( + float8_dynamic_activation_float8_weight, + float8_static_activation_float8_weight, + float8_weight_only, + int4_weight_only, + int8_dynamic_activation_int4_weight, + int8_dynamic_activation_int8_weight, + int8_weight_only, + uintx_weight_only, + ) + + if is_torchao_version("<=", "0.14.1"): + from torchao.quantization import fpx_weight_only + # TODO(aryan): Add a note on how to use PerAxis and PerGroup observers + from torchao.quantization.observer import PerRow, PerTensor + + def generate_float8dq_types(dtype: torch.dtype): + name = "e5m2" if dtype == torch.float8_e5m2 else "e4m3" + types = {} + + for granularity_cls in [PerTensor, PerRow]: + # Note: Activation and Weights cannot have different granularities + granularity_name = "tensor" if granularity_cls is PerTensor else "row" + types[f"float8dq_{name}_{granularity_name}"] = partial( + float8_dynamic_activation_float8_weight, + activation_dtype=dtype, + weight_dtype=dtype, + granularity=(granularity_cls(), granularity_cls()), + ) + + return types + + def generate_fpx_quantization_types(bits: int): + if is_torchao_version("<=", "0.14.1"): + types = {} + + for ebits in range(1, bits): + mbits = bits - ebits - 1 + types[f"fp{bits}_e{ebits}m{mbits}"] = partial(fpx_weight_only, ebits=ebits, mbits=mbits) + + non_sign_bits = bits - 1 + default_ebits = (non_sign_bits + 1) // 2 + default_mbits = non_sign_bits - default_ebits + types[f"fp{bits}"] = partial(fpx_weight_only, ebits=default_ebits, mbits=default_mbits) + + return types + else: + raise ValueError("Floating point X-bit quantization is not supported in torchao >= 0.15.0") + + INT4_QUANTIZATION_TYPES = { + # int4 weight + bfloat16/float16 activation + "int4wo": int4_weight_only, + "int4_weight_only": int4_weight_only, + # int4 weight + int8 activation + "int4dq": int8_dynamic_activation_int4_weight, + "int8_dynamic_activation_int4_weight": int8_dynamic_activation_int4_weight, + } + + INT8_QUANTIZATION_TYPES = { + # int8 weight + bfloat16/float16 activation + "int8wo": int8_weight_only, + "int8_weight_only": int8_weight_only, + # int8 weight + int8 activation + "int8dq": int8_dynamic_activation_int8_weight, + "int8_dynamic_activation_int8_weight": int8_dynamic_activation_int8_weight, + } + + # TODO(aryan): handle torch 2.2/2.3 + FLOATX_QUANTIZATION_TYPES = { + # float8_e5m2 weight + bfloat16/float16 activation + "float8wo": partial(float8_weight_only, weight_dtype=torch.float8_e5m2), + "float8_weight_only": float8_weight_only, + "float8wo_e5m2": partial(float8_weight_only, weight_dtype=torch.float8_e5m2), + # float8_e4m3 weight + bfloat16/float16 activation + "float8wo_e4m3": partial(float8_weight_only, weight_dtype=torch.float8_e4m3fn), + # float8_e5m2 weight + float8 activation (dynamic) + "float8dq": float8_dynamic_activation_float8_weight, + "float8_dynamic_activation_float8_weight": float8_dynamic_activation_float8_weight, + # ===== Matrix multiplication is not supported in float8_e5m2 so the following errors out. + # However, changing activation_dtype=torch.float8_e4m3 might work here ===== + # "float8dq_e5m2": partial( + # float8_dynamic_activation_float8_weight, + # activation_dtype=torch.float8_e5m2, + # weight_dtype=torch.float8_e5m2, + # ), + # **generate_float8dq_types(torch.float8_e5m2), + # ===== ===== + # float8_e4m3 weight + float8 activation (dynamic) + "float8dq_e4m3": partial( + float8_dynamic_activation_float8_weight, + activation_dtype=torch.float8_e4m3fn, + weight_dtype=torch.float8_e4m3fn, + ), + **generate_float8dq_types(torch.float8_e4m3fn), + # float8 weight + float8 activation (static) + "float8_static_activation_float8_weight": float8_static_activation_float8_weight, + } + + if is_torchao_version("<=", "0.14.1"): + FLOATX_QUANTIZATION_TYPES.update(generate_fpx_quantization_types(3)) + FLOATX_QUANTIZATION_TYPES.update(generate_fpx_quantization_types(4)) + FLOATX_QUANTIZATION_TYPES.update(generate_fpx_quantization_types(5)) + FLOATX_QUANTIZATION_TYPES.update(generate_fpx_quantization_types(6)) + FLOATX_QUANTIZATION_TYPES.update(generate_fpx_quantization_types(7)) + + UINTX_QUANTIZATION_DTYPES = { + "uintx_weight_only": uintx_weight_only, + "uint1wo": partial(uintx_weight_only, dtype=torch.uint1), + "uint2wo": partial(uintx_weight_only, dtype=torch.uint2), + "uint3wo": partial(uintx_weight_only, dtype=torch.uint3), + "uint4wo": partial(uintx_weight_only, dtype=torch.uint4), + "uint5wo": partial(uintx_weight_only, dtype=torch.uint5), + "uint6wo": partial(uintx_weight_only, dtype=torch.uint6), + "uint7wo": partial(uintx_weight_only, dtype=torch.uint7), + # "uint8wo": partial(uintx_weight_only, dtype=torch.uint8), # uint8 quantization is not supported + } + + QUANTIZATION_TYPES = {} + QUANTIZATION_TYPES.update(INT4_QUANTIZATION_TYPES) + QUANTIZATION_TYPES.update(INT8_QUANTIZATION_TYPES) + QUANTIZATION_TYPES.update(UINTX_QUANTIZATION_DTYPES) + + if cls._is_xpu_or_cuda_capability_atleast_8_9(): + QUANTIZATION_TYPES.update(FLOATX_QUANTIZATION_TYPES) + + return QUANTIZATION_TYPES + else: + raise ValueError( + "TorchAoConfig requires torchao to be installed, please install with `pip install torchao`" + ) + + @staticmethod + def _is_xpu_or_cuda_capability_atleast_8_9() -> bool: + if torch.cuda.is_available(): + major, minor = torch.cuda.get_device_capability() + if major == 8: + return minor >= 9 + return major >= 9 + elif torch.xpu.is_available(): + return True + else: + raise RuntimeError("TorchAO requires a CUDA compatible GPU or Intel XPU and installation of PyTorch.") + + def get_apply_tensor_subclass(self): + """Create the appropriate quantization method based on configuration.""" + if not isinstance(self.quant_type, str): + return self.quant_type + else: + methods = self._get_torchao_quant_type_to_method() + quant_type_kwargs = self.quant_type_kwargs.copy() + if ( + not torch.cuda.is_available() + and is_torchao_available() + and self.quant_type == "int4_weight_only" + and version.parse(importlib.metadata.version("torchao")) >= version.parse("0.8.0") + and quant_type_kwargs.get("layout", None) is None + ): + if torch.xpu.is_available(): + if version.parse(importlib.metadata.version("torchao")) >= version.parse( + "0.11.0" + ) and version.parse(importlib.metadata.version("torch")) > version.parse("2.7.9"): + from torchao.dtypes import Int4XPULayout + from torchao.quantization.quant_primitives import ZeroPointDomain + + quant_type_kwargs["layout"] = Int4XPULayout() + quant_type_kwargs["zero_point_domain"] = ZeroPointDomain.INT + else: + raise ValueError( + "TorchAoConfig requires torchao >= 0.11.0 and torch >= 2.8.0 for XPU support. Please upgrade the version or use run on CPU with the cpu version pytorch." + ) + else: + from torchao.dtypes import Int4CPULayout + + quant_type_kwargs["layout"] = Int4CPULayout() + + return methods[self.quant_type](**quant_type_kwargs) + + def __repr__(self): + r""" + Example of how this looks for `TorchAoConfig("uint4wo", group_size=32)`: + + ``` + TorchAoConfig { + "modules_to_not_convert": null, + "quant_method": "torchao", + "quant_type": "uint4wo", + "quant_type_kwargs": { + "group_size": 32 + } + } + ``` + """ + config_dict = self.to_dict() + return ( + f"{self.__class__.__name__} {json.dumps(config_dict, indent=2, sort_keys=True, cls=TorchAoJSONEncoder)}\n" + ) + + +@dataclass +class QuantoConfig(QuantizationConfigMixin): + """ + This is a wrapper class about all possible attributes and features that you can play with a model that has been + loaded using `quanto`. + + Args: + weights_dtype (`str`, *optional*, defaults to `"int8"`): + The target dtype for the weights after quantization. Supported values are ("float8","int8","int4","int2") + modules_to_not_convert (`list`, *optional*, default to `None`): + The list of modules to not quantize, useful for quantizing models that explicitly require to have some + modules left in their original precision (e.g. Whisper encoder, Llava encoder, Mixtral gate layers). + """ + + def __init__( + self, + weights_dtype: str = "int8", + modules_to_not_convert: list[str] | None = None, + **kwargs, + ): + self.quant_method = QuantizationMethod.QUANTO + self.weights_dtype = weights_dtype + self.modules_to_not_convert = modules_to_not_convert + + self.post_init() + + def post_init(self): + r""" + Safety checker that arguments are correct + """ + accepted_weights = ["float8", "int8", "int4", "int2"] + if self.weights_dtype not in accepted_weights: + raise ValueError(f"Only support weights in {accepted_weights} but found {self.weights_dtype}") + + +@dataclass +class NVIDIAModelOptConfig(QuantizationConfigMixin): + """This is a config class to use nvidia modelopt for quantization. + + Args: + quant_type (`str`): + The type of quantization we want to use, following is how to use: + **weightquant_activationquant ==> FP8_FP8** In the above example we have use FP8 for both weight and + activation quantization. Following are the all the options: + - FP8 + - INT8 + - INT4 + - NF4 + - NVFP4 + modules_to_not_convert (`list[str]`, *optional*, default to `None`): + The list of modules to not quantize, useful for quantizing models that explicitly require to have some + weight_only (`bool`, *optional*, default to `False`): + If set to `True`, the quantization will be applied only to the weights of the model. + channel_quantize (`int`, *optional*, default to `None`): + The channel quantization axis, useful for quantizing models across different axes. + block_quantize (`int`, *optional*, default to `None`): + The block size, useful to further quantize each channel/axes into blocks. + scale_channel_quantize (`int`, *optional*, default to `None`): + The scale channel quantization axis, useful for quantizing calculated scale across different axes. + scale_block_quantize (`int`, *optional*, default to `None`): + The scale block size, useful for quantizing each scale channel/axes into blocks. + algorithm (`str`, *optional*, default to `"max"`): + The algorithm to use for quantization, currently only supports `"max"`. + forward_loop (`Callable`, *optional*, default to `None`): + The forward loop function to use for calibration during quantization. + modelopt_config (`dict`, *optional*, default to `None`): + The modelopt config, useful for passing custom configs to modelopt. + disable_conv_quantization (`bool`, *optional*, default to `False`): + If set to `True`, the quantization will be disabled for convolutional layers. + kwargs (`dict[str, Any]`, *optional*): + Additional parameters which are to be used for calibration. + """ + + quanttype_to_numbits = { + "FP8": (4, 3), + "INT8": 8, + "INT4": 4, + "NF4": 4, + "NVFP4": (2, 1), + } + quanttype_to_scalingbits = { + "NF4": 8, + "NVFP4": (4, 3), + } + + def __init__( + self, + quant_type: str, + modules_to_not_convert: list[str] | None = None, + weight_only: bool = True, + channel_quantize: int | None = None, + block_quantize: int | None = None, + scale_channel_quantize: int | None = None, + scale_block_quantize: int | None = None, + algorithm: str = "max", + forward_loop: Callable | None = None, + modelopt_config: dict | None = None, + disable_conv_quantization: bool = False, + **kwargs, + ) -> None: + self.quant_method = QuantizationMethod.MODELOPT + self._normalize_quant_type(quant_type) + self.modules_to_not_convert = modules_to_not_convert + self.weight_only = weight_only + self.channel_quantize = channel_quantize + self.block_quantize = block_quantize + self.calib_cfg = { + "method": algorithm, + # add more options here if needed + } + self.forward_loop = forward_loop + self.scale_channel_quantize = scale_channel_quantize + self.scale_block_quantize = scale_block_quantize + self.modelopt_config = self.get_config_from_quant_type() if not modelopt_config else modelopt_config + self.disable_conv_quantization = disable_conv_quantization + + def check_model_patching(self, operation: str = "loading"): + # ModelOpt imports diffusers internally. This is here to prevent circular imports + from modelopt.torch.opt.plugins.huggingface import _PATCHED_CLASSES + + if len(_PATCHED_CLASSES) == 0: + warning_msg = ( + f"Not {operation} weights in modelopt format. This might cause unreliable behavior." + "Please make sure to run the following code before loading/saving model weights:\n\n" + " from modelopt.torch.opt import enable_huggingface_checkpointing\n" + " enable_huggingface_checkpointing()\n" + ) + warnings.warn(warning_msg) + + def _normalize_quant_type(self, quant_type: str) -> str: + """ + Validates and normalizes the quantization type string. + + Splits the quant_type into weight and activation components, verifies them against supported types, and + replaces unsupported values with safe defaults. + + Args: + quant_type (str): The input quantization type string (e.g., 'FP8_INT8'). + + Returns: + str: A valid quantization type string (e.g., 'FP8_INT8' or 'FP8'). + """ + parts = quant_type.split("_") + w_type = parts[0] + act_type = parts[1] if len(parts) > 1 else None + if len(parts) > 2: + logger.warning(f"Quantization type {quant_type} is not supported. Picking FP8_INT8 as default") + w_type = "FP8" + act_type = None + else: + if w_type not in NVIDIAModelOptConfig.quanttype_to_numbits: + logger.warning(f"Weight Quantization type {w_type} is not supported. Picking FP8 as default") + w_type = "FP8" + if act_type is not None and act_type not in NVIDIAModelOptConfig.quanttype_to_numbits: + logger.warning(f"Activation Quantization type {act_type} is not supported. Picking INT8 as default") + act_type = None + self.quant_type = w_type + ("_" + act_type if act_type is not None else "") + + def get_config_from_quant_type(self) -> dict[str, Any]: + """ + Get the config from the quantization type. + """ + import modelopt.torch.quantization as mtq + + BASE_CONFIG = { + "quant_cfg": { + "*weight_quantizer": {"fake_quant": False}, + "*input_quantizer": {}, + "*output_quantizer": {"enable": False}, + "*q_bmm_quantizer": {}, + "*k_bmm_quantizer": {}, + "*v_bmm_quantizer": {}, + "*softmax_quantizer": {}, + **mtq.config._default_disabled_quantizer_cfg, + }, + "algorithm": self.calib_cfg, + } + + quant_cfg = BASE_CONFIG["quant_cfg"] + if self.weight_only: + for k in quant_cfg: + if "*weight_quantizer" not in k and not quant_cfg[k]: + quant_cfg[k]["enable"] = False + + parts = self.quant_type.split("_") + w_type = parts[0] + act_type = parts[1].replace("A", "") if len(parts) > 1 else None + for k in quant_cfg: + if k not in mtq.config._default_disabled_quantizer_cfg and "enable" not in quant_cfg[k]: + if k == "*input_quantizer": + if act_type is not None: + quant_cfg[k]["num_bits"] = NVIDIAModelOptConfig.quanttype_to_numbits[act_type] + continue + quant_cfg[k]["num_bits"] = NVIDIAModelOptConfig.quanttype_to_numbits[w_type] + + if self.block_quantize is not None and self.channel_quantize is not None: + quant_cfg["*weight_quantizer"]["block_sizes"] = {self.channel_quantize: self.block_quantize} + quant_cfg["*input_quantizer"]["block_sizes"] = { + self.channel_quantize: self.block_quantize, + "type": "dynamic", + } + elif self.channel_quantize is not None: + quant_cfg["*weight_quantizer"]["axis"] = self.channel_quantize + quant_cfg["*input_quantizer"]["axis"] = self.channel_quantize + quant_cfg["*input_quantizer"]["type"] = "dynamic" + + # Only fixed scaling sizes are supported for now in modelopt + if self.scale_channel_quantize is not None and self.scale_block_quantize is not None: + if w_type in NVIDIAModelOptConfig.quanttype_to_scalingbits: + quant_cfg["*weight_quantizer"]["block_sizes"].update( + { + "scale_bits": NVIDIAModelOptConfig.quanttype_to_scalingbits[w_type], + "scale_block_sizes": {self.scale_channel_quantize: self.scale_block_quantize}, + } + ) + if act_type and act_type in NVIDIAModelOptConfig.quanttype_to_scalingbits: + quant_cfg["*input_quantizer"]["block_sizes"].update( + { + "scale_bits": NVIDIAModelOptConfig.quanttype_to_scalingbits[act_type], + "scale_block_sizes": {self.scale_channel_quantize: self.scale_block_quantize}, + } + ) + + return BASE_CONFIG diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a4e8a1f41a1e4bf3bbb53c5684398277eb671a83 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__init__.py @@ -0,0 +1 @@ +from .quanto_quantizer import QuantoQuantizer diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36237d32d5038803d0e69911c2313f34c80ebf76 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/quanto_quantizer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/quanto_quantizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21efa9dcd4601e08b196fb4ff8c4e7a0938635ad Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/quanto_quantizer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..acf9b34c1e28d84eb6c83a587b1b188fcd12428c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/__pycache__/utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/quanto_quantizer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/quanto_quantizer.py new file mode 100644 index 0000000000000000000000000000000000000000..a036dabfe6f4bd971860a7be1c067d7e1066e09e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/quanto_quantizer.py @@ -0,0 +1,181 @@ +from typing import TYPE_CHECKING, Any + +from diffusers.utils.import_utils import is_optimum_quanto_version + +from ...utils import ( + get_module_from_name, + is_accelerate_available, + is_accelerate_version, + is_optimum_quanto_available, + is_torch_available, + logging, +) +from ..base import DiffusersQuantizer + + +if TYPE_CHECKING: + from ...models.modeling_utils import ModelMixin + + +if is_torch_available(): + import torch + +if is_accelerate_available(): + from accelerate.utils import CustomDtype, set_module_tensor_to_device + +if is_optimum_quanto_available(): + from .utils import _replace_with_quanto_layers + +logger = logging.get_logger(__name__) + + +class QuantoQuantizer(DiffusersQuantizer): + r""" + Diffusers Quantizer for Optimum Quanto + """ + + use_keep_in_fp32_modules = True + requires_calibration = False + required_packages = ["quanto", "accelerate"] + + def __init__(self, quantization_config, **kwargs): + super().__init__(quantization_config, **kwargs) + + def validate_environment(self, *args, **kwargs): + if not is_optimum_quanto_available(): + raise ImportError( + "Loading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)" + ) + if not is_optimum_quanto_version(">=", "0.2.6"): + raise ImportError( + "Loading an optimum-quanto quantized model requires `optimum-quanto>=0.2.6`. " + "Please upgrade your installation with `pip install --upgrade optimum-quanto" + ) + + if not is_accelerate_available(): + raise ImportError( + "Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`)" + ) + + device_map = kwargs.get("device_map", None) + if isinstance(device_map, dict) and len(device_map.keys()) > 1: + raise ValueError( + "`device_map` for multi-GPU inference or CPU/disk offload is currently not supported with Diffusers and the Quanto backend" + ) + + def check_if_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + state_dict: dict[str, Any], + **kwargs, + ): + # Quanto imports diffusers internally. This is here to prevent circular imports + from optimum.quanto import QModuleMixin, QTensor + from optimum.quanto.tensor.packed import PackedTensor + + module, tensor_name = get_module_from_name(model, param_name) + if self.pre_quantized and any(isinstance(module, t) for t in [QTensor, PackedTensor]): + return True + elif isinstance(module, QModuleMixin) and "weight" in tensor_name: + return not module.frozen + + return False + + def create_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + target_device: "torch.device", + *args, + **kwargs, + ): + """ + Create the quantized parameter by calling .freeze() after setting it to the module. + """ + + dtype = kwargs.get("dtype", torch.float32) + module, tensor_name = get_module_from_name(model, param_name) + if self.pre_quantized: + setattr(module, tensor_name, param_value) + else: + set_module_tensor_to_device(model, param_name, target_device, param_value, dtype) + module.freeze() + module.weight.requires_grad = False + + def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]: + max_memory = {key: val * 0.90 for key, val in max_memory.items()} + return max_memory + + def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype": + if is_accelerate_version(">=", "0.27.0"): + mapping = { + "int8": torch.int8, + "float8": CustomDtype.FP8, + "int4": CustomDtype.INT4, + "int2": CustomDtype.INT2, + } + target_dtype = mapping[self.quantization_config.weights_dtype] + + return target_dtype + + def update_torch_dtype(self, torch_dtype: "torch.dtype" = None) -> "torch.dtype": + if torch_dtype is None: + logger.info("You did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.") + torch_dtype = torch.float32 + return torch_dtype + + def update_missing_keys(self, model, missing_keys: list[str], prefix: str) -> list[str]: + # Quanto imports diffusers internally. This is here to prevent circular imports + from optimum.quanto import QModuleMixin + + not_missing_keys = [] + for name, module in model.named_modules(): + if isinstance(module, QModuleMixin): + for missing in missing_keys: + if ( + (name in missing or name in f"{prefix}.{missing}") + and not missing.endswith(".weight") + and not missing.endswith(".bias") + ): + not_missing_keys.append(missing) + return [k for k in missing_keys if k not in not_missing_keys] + + def _process_model_before_weight_loading( + self, + model: "ModelMixin", + device_map, + keep_in_fp32_modules: list[str] = [], + **kwargs, + ): + self.modules_to_not_convert = self.quantization_config.modules_to_not_convert + + if not isinstance(self.modules_to_not_convert, list): + self.modules_to_not_convert = [self.modules_to_not_convert] + + self.modules_to_not_convert.extend(keep_in_fp32_modules) + + model = _replace_with_quanto_layers( + model, + modules_to_not_convert=self.modules_to_not_convert, + quantization_config=self.quantization_config, + pre_quantized=self.pre_quantized, + ) + model.config.quantization_config = self.quantization_config + + def _process_model_after_weight_loading(self, model, **kwargs): + return model + + @property + def is_trainable(self): + return True + + @property + def is_serializable(self): + return True + + @property + def is_compileable(self) -> bool: + return True diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6f41fd36b43a7c222de17a297523939d27f34537 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/quanto/utils.py @@ -0,0 +1,60 @@ +import torch.nn as nn + +from ...utils import is_accelerate_available, logging + + +logger = logging.get_logger(__name__) + +if is_accelerate_available(): + from accelerate import init_empty_weights + + +def _replace_with_quanto_layers(model, quantization_config, modules_to_not_convert: list, pre_quantized=False): + # Quanto imports diffusers internally. These are placed here to avoid circular imports + from optimum.quanto import QLinear, freeze, qfloat8, qint2, qint4, qint8 + + def _get_weight_type(dtype: str): + return {"float8": qfloat8, "int8": qint8, "int4": qint4, "int2": qint2}[dtype] + + def _replace_layers(model, quantization_config, modules_to_not_convert): + has_children = list(model.children()) + if not has_children: + return model + + for name, module in model.named_children(): + _replace_layers(module, quantization_config, modules_to_not_convert) + + if name in modules_to_not_convert: + continue + + if isinstance(module, nn.Linear): + with init_empty_weights(): + qlinear = QLinear( + in_features=module.in_features, + out_features=module.out_features, + bias=module.bias is not None, + dtype=module.weight.dtype, + weights=_get_weight_type(quantization_config.weights_dtype), + ) + model._modules[name] = qlinear + model._modules[name].source_cls = type(module) + model._modules[name].requires_grad_(False) + + return model + + model = _replace_layers(model, quantization_config, modules_to_not_convert) + has_been_replaced = any(isinstance(replaced_module, QLinear) for _, replaced_module in model.named_modules()) + + if not has_been_replaced: + logger.warning( + f"{model.__class__.__name__} does not appear to have any `nn.Linear` modules. Quantization will not be applied." + " Please check your model architecture, or submit an issue on Github if you think this is a bug." + " https://github.com/huggingface/diffusers/issues/new" + ) + + # We need to freeze the pre_quantized model in order for the loaded state_dict and model state dict + # to match when trying to load weights with load_model_dict_into_meta + if pre_quantized: + freeze(model) + + return model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c56bf54c25158fb9e7ef9ff3bc1eb651743764fb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .torchao_quantizer import TorchAoHfQuantizer diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74d6de1dae12f79db5e716bca12786ba489a1728 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__pycache__/torchao_quantizer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__pycache__/torchao_quantizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..88cbbc2fe6905eda7995bdc98579789b39eab3e1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/__pycache__/torchao_quantizer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/torchao_quantizer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/torchao_quantizer.py new file mode 100644 index 0000000000000000000000000000000000000000..1679ed26a1046d2d8084bc175f30207f22fb37c7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/quantizers/torchao/torchao_quantizer.py @@ -0,0 +1,424 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Adapted from +https://github.com/huggingface/transformers/blob/3a8eb74668e9c2cc563b2f5c62fac174797063e0/src/transformers/quantizers/quantizer_torchao.py +""" + +import importlib +import re +import types +from fnmatch import fnmatch +from typing import TYPE_CHECKING, Any + +from packaging import version + +from ...utils import ( + get_module_from_name, + is_torch_available, + is_torch_version, + is_torchao_available, + is_torchao_version, + logging, +) +from ..base import DiffusersQuantizer + + +logger = logging.get_logger(__name__) + + +if TYPE_CHECKING: + from ...models.modeling_utils import ModelMixin + + +if is_torch_available(): + import torch + import torch.nn as nn + + if is_torch_version(">=", "2.5"): + SUPPORTED_TORCH_DTYPES_FOR_QUANTIZATION = ( + # At the moment, only int8 is supported for integer quantization dtypes. + # In Torch 2.6, int1-int7 will be introduced, so this can be visited in the future + # to support more quantization methods, such as intx_weight_only. + torch.int8, + torch.float8_e4m3fn, + torch.float8_e5m2, + torch.uint1, + torch.uint2, + torch.uint3, + torch.uint4, + torch.uint5, + torch.uint6, + torch.uint7, + ) + else: + SUPPORTED_TORCH_DTYPES_FOR_QUANTIZATION = ( + torch.int8, + torch.float8_e4m3fn, + torch.float8_e5m2, + ) + +if is_torchao_available(): + from torchao.quantization import quantize_ + + +def _update_torch_safe_globals(): + safe_globals = [ + (torch.uint1, "torch.uint1"), + (torch.uint2, "torch.uint2"), + (torch.uint3, "torch.uint3"), + (torch.uint4, "torch.uint4"), + (torch.uint5, "torch.uint5"), + (torch.uint6, "torch.uint6"), + (torch.uint7, "torch.uint7"), + ] + try: + from torchao.dtypes import NF4Tensor + from torchao.dtypes.uintx.uintx_layout import UintxAQTTensorImpl, UintxTensor + + safe_globals.extend([UintxTensor, UintxAQTTensorImpl, NF4Tensor]) + + # note: is_torchao_version(">=", "0.16.0") does not work correctly + # with torchao nightly, so using a ">" check which does work correctly + if is_torchao_version(">", "0.15.0"): + pass + else: + from torchao.dtypes.floatx.float8_layout import Float8AQTTensorImpl + from torchao.dtypes.uintx.uint4_layout import UInt4Tensor + + safe_globals.extend([UInt4Tensor, Float8AQTTensorImpl]) + + except (ImportError, ModuleNotFoundError) as e: + logger.warning( + "Unable to import `torchao` Tensor objects. This may affect loading checkpoints serialized with `torchao`" + ) + logger.debug(e) + + finally: + torch.serialization.add_safe_globals(safe_globals=safe_globals) + + +if ( + is_torch_available() + and is_torch_version(">=", "2.6.0") + and is_torchao_available() + and is_torchao_version(">=", "0.7.0") +): + _update_torch_safe_globals() + + +def fuzzy_match_size(config_name: str) -> str | None: + """ + Extract the size digit from strings like "4weight", "8weight". Returns the digit as an integer if found, otherwise + None. + """ + config_name = config_name.lower() + + str_match = re.search(r"(\d)weight", config_name) + + if str_match: + return str_match.group(1) + + return None + + +def _quantization_type(weight): + from torchao.dtypes import AffineQuantizedTensor + from torchao.quantization.linear_activation_quantized_tensor import LinearActivationQuantizedTensor + + if isinstance(weight, AffineQuantizedTensor): + return f"{weight.__class__.__name__}({weight._quantization_type()})" + + if isinstance(weight, LinearActivationQuantizedTensor): + return f"{weight.__class__.__name__}(activation={weight.input_quant_func}, weight={_quantization_type(weight.original_weight_tensor)})" + + +def _linear_extra_repr(self): + weight = _quantization_type(self.weight) + if weight is None: + return f"in_features={self.weight.shape[1]}, out_features={self.weight.shape[0]}, weight=None" + else: + return f"in_features={self.weight.shape[1]}, out_features={self.weight.shape[0]}, weight={weight}" + + +class TorchAoHfQuantizer(DiffusersQuantizer): + r""" + Diffusers Quantizer for TorchAO: https://github.com/pytorch/ao/. + """ + + requires_calibration = False + required_packages = ["torchao"] + + def __init__(self, quantization_config, **kwargs): + super().__init__(quantization_config, **kwargs) + + def validate_environment(self, *args, **kwargs): + if not is_torchao_available(): + raise ImportError( + "Loading a TorchAO quantized model requires the torchao library. Please install with `pip install torchao`" + ) + torchao_version = version.parse(importlib.metadata.version("torch")) + if torchao_version < version.parse("0.7.0"): + raise RuntimeError( + f"The minimum required version of `torchao` is 0.7.0, but the current version is {torchao_version}. Please upgrade with `pip install -U torchao`." + ) + + self.offload = False + + device_map = kwargs.get("device_map", None) + if isinstance(device_map, dict): + if "cpu" in device_map.values() or "disk" in device_map.values(): + if self.pre_quantized: + raise ValueError( + "You are attempting to perform cpu/disk offload with a pre-quantized torchao model " + "This is not supported yet. Please remove the CPU or disk device from the `device_map` argument." + ) + else: + self.offload = True + + if self.pre_quantized: + weights_only = kwargs.get("weights_only", None) + if weights_only: + torch_version = version.parse(importlib.metadata.version("torch")) + if torch_version < version.parse("2.5.0"): + # TODO(aryan): TorchAO is compatible with Pytorch >= 2.2 for certain quantization types. Try to see if we can support it in future + raise RuntimeError( + f"In order to use TorchAO pre-quantized model, you need to have torch>=2.5.0. However, the current version is {torch_version}." + ) + + def update_torch_dtype(self, torch_dtype): + quant_type = self.quantization_config.quant_type + if isinstance(quant_type, str) and (quant_type.startswith("int") or quant_type.startswith("uint")): + if torch_dtype is not None and torch_dtype != torch.bfloat16: + logger.warning( + f"You are trying to set torch_dtype to {torch_dtype} for int4/int8/uintx quantization, but " + f"only bfloat16 is supported right now. Please set `torch_dtype=torch.bfloat16`." + ) + + if torch_dtype is None: + # We need to set the torch_dtype, otherwise we have dtype mismatch when performing the quantized linear op + logger.warning( + "Overriding `torch_dtype` with `torch_dtype=torch.bfloat16` due to requirements of `torchao` " + "to enable model loading in different precisions. Pass your own `torch_dtype` to specify the " + "dtype of the remaining non-linear layers, or pass torch_dtype=torch.bfloat16, to remove this warning." + ) + torch_dtype = torch.bfloat16 + + return torch_dtype + + def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype": + quant_type = self.quantization_config.quant_type + from accelerate.utils import CustomDtype + + if isinstance(quant_type, str): + if quant_type.startswith("int8"): + # Note that int4 weights are created by packing into torch.int8, but since there is no torch.int4, we use torch.int8 + return torch.int8 + elif quant_type.startswith("int4"): + return CustomDtype.INT4 + elif quant_type == "uintx_weight_only": + return self.quantization_config.quant_type_kwargs.get("dtype", torch.uint8) + elif quant_type.startswith("uint"): + return { + 1: torch.uint1, + 2: torch.uint2, + 3: torch.uint3, + 4: torch.uint4, + 5: torch.uint5, + 6: torch.uint6, + 7: torch.uint7, + }[int(quant_type[4])] + elif quant_type.startswith("float") or quant_type.startswith("fp"): + return torch.bfloat16 + + elif is_torchao_version(">", "0.9.0"): + from torchao.core.config import AOBaseConfig + + quant_type = self.quantization_config.quant_type + if isinstance(quant_type, AOBaseConfig): + # Extract size digit using fuzzy match on the class name + config_name = quant_type.__class__.__name__ + size_digit = fuzzy_match_size(config_name) + + # Map the extracted digit to appropriate dtype + if size_digit == "4": + return CustomDtype.INT4 + else: + # Default to int8 + return torch.int8 + + if isinstance(target_dtype, SUPPORTED_TORCH_DTYPES_FOR_QUANTIZATION): + return target_dtype + + # We need one of the supported dtypes to be selected in order for accelerate to determine + # the total size of modules/parameters for auto device placement. + possible_device_maps = ["auto", "balanced", "balanced_low_0", "sequential"] + raise ValueError( + f"You have set `device_map` as one of {possible_device_maps} on a TorchAO quantized model but a suitable target dtype " + f"could not be inferred. The supported target_dtypes are: {SUPPORTED_TORCH_DTYPES_FOR_QUANTIZATION}. If you think the " + f"dtype you are using should be supported, please open an issue at https://github.com/huggingface/diffusers/issues." + ) + + def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]: + max_memory = {key: val * 0.9 for key, val in max_memory.items()} + return max_memory + + def check_if_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + state_dict: dict[str, Any], + **kwargs, + ) -> bool: + param_device = kwargs.pop("param_device", None) + # Check if the param_name is not in self.modules_to_not_convert + if any((key + "." in param_name) or (key == param_name) for key in self.modules_to_not_convert): + return False + elif param_device == "cpu" and self.offload: + # We don't quantize weights that we offload + return False + else: + # We only quantize the weight of nn.Linear + module, tensor_name = get_module_from_name(model, param_name) + return isinstance(module, torch.nn.Linear) and (tensor_name == "weight") + + def create_quantized_param( + self, + model: "ModelMixin", + param_value: "torch.Tensor", + param_name: str, + target_device: "torch.device", + state_dict: dict[str, Any], + unexpected_keys: list[str], + **kwargs, + ): + r""" + Each nn.Linear layer that needs to be quantized is processed here. First, we set the value the weight tensor, + then we move it to the target device. Finally, we quantize the module. + """ + module, tensor_name = get_module_from_name(model, param_name) + + if self.pre_quantized: + # If we're loading pre-quantized weights, replace the repr of linear layers for pretty printing info + # about AffineQuantizedTensor + module._parameters[tensor_name] = torch.nn.Parameter(param_value.to(device=target_device)) + if isinstance(module, nn.Linear): + module.extra_repr = types.MethodType(_linear_extra_repr, module) + else: + # As we perform quantization here, the repr of linear layers is that of AQT, so we don't have to do it ourselves + module._parameters[tensor_name] = torch.nn.Parameter(param_value).to(device=target_device) + quantize_(module, self.quantization_config.get_apply_tensor_subclass()) + + def get_cuda_warm_up_factor(self): + """ + This factor is used in caching_allocator_warmup to determine how many bytes to pre-allocate for CUDA warmup. + - A factor of 2 means we pre-allocate the full memory footprint of the model. + - A factor of 4 means we pre-allocate half of that, and so on + + However, when using TorchAO, calculating memory usage with param.numel() * param.element_size() doesn't give + the correct size for quantized weights (like int4 or int8) That's because TorchAO internally represents + quantized tensors using subtensors and metadata, and the reported element_size() still corresponds to the + torch_dtype not the actual bit-width of the quantized data. + + To correct for this: + - Use a division factor of 8 for int4 weights + - Use a division factor of 4 for int8 weights + """ + # Original mapping for non-AOBaseConfig types + # For the uint types, this is a best guess. Once these types become more used + # we can look into their nuances. + if is_torchao_version(">", "0.9.0"): + from torchao.core.config import AOBaseConfig + + quant_type = self.quantization_config.quant_type + if isinstance(quant_type, AOBaseConfig): + # Extract size digit using fuzzy match on the class name + config_name = quant_type.__class__.__name__ + size_digit = fuzzy_match_size(config_name) + + if size_digit == "4": + return 8 + else: + return 4 + + map_to_target_dtype = {"int4_*": 8, "int8_*": 4, "uint*": 8, "float8*": 4} + quant_type = self.quantization_config.quant_type + for pattern, target_dtype in map_to_target_dtype.items(): + if fnmatch(quant_type, pattern): + return target_dtype + raise ValueError(f"Unsupported quant_type: {quant_type!r}") + + def _process_model_before_weight_loading( + self, + model: "ModelMixin", + device_map, + keep_in_fp32_modules: list[str] = [], + **kwargs, + ): + self.modules_to_not_convert = self.quantization_config.modules_to_not_convert + + if not isinstance(self.modules_to_not_convert, list): + self.modules_to_not_convert = [self.modules_to_not_convert] + + self.modules_to_not_convert.extend(keep_in_fp32_modules) + + # Extend `self.modules_to_not_convert` to keys that are supposed to be offloaded to `cpu` or `disk` + if isinstance(device_map, dict) and len(device_map.keys()) > 1: + keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]] + self.modules_to_not_convert.extend(keys_on_cpu) + + # Purge `None`. + # Unlike `transformers`, we don't know if we should always keep certain modules in FP32 + # in case of diffusion transformer models. For language models and others alike, `lm_head` + # and tied modules are usually kept in FP32. + self.modules_to_not_convert = [module for module in self.modules_to_not_convert if module is not None] + + model.config.quantization_config = self.quantization_config + + def _process_model_after_weight_loading(self, model: "ModelMixin"): + return model + + def is_serializable(self, safe_serialization=None): + # TODO(aryan): needs to be tested + if safe_serialization: + logger.warning( + "torchao quantized model does not support safe serialization, please set `safe_serialization` to False." + ) + return False + + _is_torchao_serializable = version.parse(importlib.metadata.version("huggingface_hub")) >= version.parse( + "0.25.0" + ) + + if not _is_torchao_serializable: + logger.warning("torchao quantized model is only serializable after huggingface_hub >= 0.25.0 ") + + if self.offload and self.quantization_config.modules_to_not_convert is None: + logger.warning( + "The model contains offloaded modules and these modules are not quantized. We don't recommend saving the model as we won't be able to reload them." + "If you want to specify modules to not quantize, please specify modules_to_not_convert in the quantization_config." + ) + return False + + return _is_torchao_serializable + + @property + def is_trainable(self): + return self.quantization_config.quant_type.startswith("int8") + + @property + def is_compileable(self) -> bool: + return True diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c7101d1b040145d9b0b12d51718cf43937e74190 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__init__.py @@ -0,0 +1,230 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING + +from ..utils import ( + DIFFUSERS_SLOW_IMPORT, + OptionalDependencyNotAvailable, + _LazyModule, + get_objects_from_module, + is_flax_available, + is_scipy_available, + is_torch_available, + is_torchsde_available, +) + + +_dummy_modules = {} +_import_structure = {} + +try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_pt_objects # noqa F403 + + _dummy_modules.update(get_objects_from_module(dummy_pt_objects)) + +else: + _import_structure["deprecated"] = ["KarrasVeScheduler", "ScoreSdeVpScheduler"] + _import_structure["scheduling_amused"] = ["AmusedScheduler"] + _import_structure["scheduling_consistency_decoder"] = ["ConsistencyDecoderScheduler"] + _import_structure["scheduling_consistency_models"] = ["CMStochasticIterativeScheduler"] + _import_structure["scheduling_ddim"] = ["DDIMScheduler"] + _import_structure["scheduling_ddim_cogvideox"] = ["CogVideoXDDIMScheduler"] + _import_structure["scheduling_ddim_inverse"] = ["DDIMInverseScheduler"] + _import_structure["scheduling_ddim_parallel"] = ["DDIMParallelScheduler"] + _import_structure["scheduling_ddpm"] = ["DDPMScheduler"] + _import_structure["scheduling_ddpm_parallel"] = ["DDPMParallelScheduler"] + _import_structure["scheduling_ddpm_wuerstchen"] = ["DDPMWuerstchenScheduler"] + _import_structure["scheduling_deis_multistep"] = ["DEISMultistepScheduler"] + _import_structure["scheduling_dpm_cogvideox"] = ["CogVideoXDPMScheduler"] + _import_structure["scheduling_dpmsolver_multistep"] = ["DPMSolverMultistepScheduler"] + _import_structure["scheduling_dpmsolver_multistep_inverse"] = ["DPMSolverMultistepInverseScheduler"] + _import_structure["scheduling_dpmsolver_singlestep"] = ["DPMSolverSinglestepScheduler"] + _import_structure["scheduling_edm_dpmsolver_multistep"] = ["EDMDPMSolverMultistepScheduler"] + _import_structure["scheduling_edm_euler"] = ["EDMEulerScheduler"] + _import_structure["scheduling_euler_ancestral_discrete"] = ["EulerAncestralDiscreteScheduler"] + _import_structure["scheduling_euler_discrete"] = ["EulerDiscreteScheduler"] + _import_structure["scheduling_flow_match_euler_discrete"] = ["FlowMatchEulerDiscreteScheduler"] + _import_structure["scheduling_flow_match_heun_discrete"] = ["FlowMatchHeunDiscreteScheduler"] + _import_structure["scheduling_flow_match_lcm"] = ["FlowMatchLCMScheduler"] + _import_structure["scheduling_helios"] = ["HeliosScheduler"] + _import_structure["scheduling_helios_dmd"] = ["HeliosDMDScheduler"] + _import_structure["scheduling_heun_discrete"] = ["HeunDiscreteScheduler"] + _import_structure["scheduling_ipndm"] = ["IPNDMScheduler"] + _import_structure["scheduling_k_dpm_2_ancestral_discrete"] = ["KDPM2AncestralDiscreteScheduler"] + _import_structure["scheduling_k_dpm_2_discrete"] = ["KDPM2DiscreteScheduler"] + _import_structure["scheduling_lcm"] = ["LCMScheduler"] + _import_structure["scheduling_ltx_euler_ancestral_rf"] = ["LTXEulerAncestralRFScheduler"] + _import_structure["scheduling_pndm"] = ["PNDMScheduler"] + _import_structure["scheduling_repaint"] = ["RePaintScheduler"] + _import_structure["scheduling_sasolver"] = ["SASolverScheduler"] + _import_structure["scheduling_scm"] = ["SCMScheduler"] + _import_structure["scheduling_sde_ve"] = ["ScoreSdeVeScheduler"] + _import_structure["scheduling_tcd"] = ["TCDScheduler"] + _import_structure["scheduling_unclip"] = ["UnCLIPScheduler"] + _import_structure["scheduling_unipc_multistep"] = ["UniPCMultistepScheduler"] + _import_structure["scheduling_utils"] = ["AysSchedules", "KarrasDiffusionSchedulers", "SchedulerMixin"] + _import_structure["scheduling_vq_diffusion"] = ["VQDiffusionScheduler"] + +try: + if not is_flax_available(): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_flax_objects # noqa F403 + + _dummy_modules.update(get_objects_from_module(dummy_flax_objects)) + +else: + _import_structure["scheduling_ddim_flax"] = ["FlaxDDIMScheduler"] + _import_structure["scheduling_ddpm_flax"] = ["FlaxDDPMScheduler"] + _import_structure["scheduling_dpmsolver_multistep_flax"] = ["FlaxDPMSolverMultistepScheduler"] + _import_structure["scheduling_euler_discrete_flax"] = ["FlaxEulerDiscreteScheduler"] + _import_structure["scheduling_karras_ve_flax"] = ["FlaxKarrasVeScheduler"] + _import_structure["scheduling_lms_discrete_flax"] = ["FlaxLMSDiscreteScheduler"] + _import_structure["scheduling_pndm_flax"] = ["FlaxPNDMScheduler"] + _import_structure["scheduling_sde_ve_flax"] = ["FlaxScoreSdeVeScheduler"] + _import_structure["scheduling_utils_flax"] = [ + "FlaxKarrasDiffusionSchedulers", + "FlaxSchedulerMixin", + "FlaxSchedulerOutput", + "broadcast_to_shape_from_left", + ] + + +try: + if not (is_torch_available() and is_scipy_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_torch_and_scipy_objects # noqa F403 + + _dummy_modules.update(get_objects_from_module(dummy_torch_and_scipy_objects)) + +else: + _import_structure["scheduling_lms_discrete"] = ["LMSDiscreteScheduler"] + +try: + if not (is_torch_available() and is_torchsde_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ..utils import dummy_torch_and_torchsde_objects # noqa F403 + + _dummy_modules.update(get_objects_from_module(dummy_torch_and_torchsde_objects)) + +else: + _import_structure["scheduling_cosine_dpmsolver_multistep"] = ["CosineDPMSolverMultistepScheduler"] + _import_structure["scheduling_dpmsolver_sde"] = ["DPMSolverSDEScheduler"] + +if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: + from ..utils import ( + OptionalDependencyNotAvailable, + is_flax_available, + is_scipy_available, + is_torch_available, + is_torchsde_available, + ) + + try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_pt_objects import * # noqa F403 + else: + from .deprecated import KarrasVeScheduler, ScoreSdeVpScheduler + from .scheduling_amused import AmusedScheduler + from .scheduling_consistency_decoder import ConsistencyDecoderScheduler + from .scheduling_consistency_models import CMStochasticIterativeScheduler + from .scheduling_ddim import DDIMScheduler + from .scheduling_ddim_cogvideox import CogVideoXDDIMScheduler + from .scheduling_ddim_inverse import DDIMInverseScheduler + from .scheduling_ddim_parallel import DDIMParallelScheduler + from .scheduling_ddpm import DDPMScheduler + from .scheduling_ddpm_parallel import DDPMParallelScheduler + from .scheduling_ddpm_wuerstchen import DDPMWuerstchenScheduler + from .scheduling_deis_multistep import DEISMultistepScheduler + from .scheduling_dpm_cogvideox import CogVideoXDPMScheduler + from .scheduling_dpmsolver_multistep import DPMSolverMultistepScheduler + from .scheduling_dpmsolver_multistep_inverse import DPMSolverMultistepInverseScheduler + from .scheduling_dpmsolver_singlestep import DPMSolverSinglestepScheduler + from .scheduling_edm_dpmsolver_multistep import EDMDPMSolverMultistepScheduler + from .scheduling_edm_euler import EDMEulerScheduler + from .scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler + from .scheduling_euler_discrete import EulerDiscreteScheduler + from .scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler + from .scheduling_flow_match_heun_discrete import FlowMatchHeunDiscreteScheduler + from .scheduling_flow_match_lcm import FlowMatchLCMScheduler + from .scheduling_helios import HeliosScheduler + from .scheduling_helios_dmd import HeliosDMDScheduler + from .scheduling_heun_discrete import HeunDiscreteScheduler + from .scheduling_ipndm import IPNDMScheduler + from .scheduling_k_dpm_2_ancestral_discrete import KDPM2AncestralDiscreteScheduler + from .scheduling_k_dpm_2_discrete import KDPM2DiscreteScheduler + from .scheduling_lcm import LCMScheduler + from .scheduling_ltx_euler_ancestral_rf import LTXEulerAncestralRFScheduler + from .scheduling_pndm import PNDMScheduler + from .scheduling_repaint import RePaintScheduler + from .scheduling_sasolver import SASolverScheduler + from .scheduling_scm import SCMScheduler + from .scheduling_sde_ve import ScoreSdeVeScheduler + from .scheduling_tcd import TCDScheduler + from .scheduling_unclip import UnCLIPScheduler + from .scheduling_unipc_multistep import UniPCMultistepScheduler + from .scheduling_utils import AysSchedules, KarrasDiffusionSchedulers, SchedulerMixin + from .scheduling_vq_diffusion import VQDiffusionScheduler + try: + if not is_flax_available(): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_flax_objects import * # noqa F403 + else: + from .scheduling_ddim_flax import FlaxDDIMScheduler + from .scheduling_ddpm_flax import FlaxDDPMScheduler + from .scheduling_dpmsolver_multistep_flax import FlaxDPMSolverMultistepScheduler + from .scheduling_euler_discrete_flax import FlaxEulerDiscreteScheduler + from .scheduling_karras_ve_flax import FlaxKarrasVeScheduler + from .scheduling_lms_discrete_flax import FlaxLMSDiscreteScheduler + from .scheduling_pndm_flax import FlaxPNDMScheduler + from .scheduling_sde_ve_flax import FlaxScoreSdeVeScheduler + from .scheduling_utils_flax import ( + FlaxKarrasDiffusionSchedulers, + FlaxSchedulerMixin, + FlaxSchedulerOutput, + broadcast_to_shape_from_left, + ) + + try: + if not (is_torch_available() and is_scipy_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_torch_and_scipy_objects import * # noqa F403 + else: + from .scheduling_lms_discrete import LMSDiscreteScheduler + + try: + if not (is_torch_available() and is_torchsde_available()): + raise OptionalDependencyNotAvailable() + except OptionalDependencyNotAvailable: + from ..utils.dummy_torch_and_torchsde_objects import * # noqa F403 + else: + from .scheduling_cosine_dpmsolver_multistep import CosineDPMSolverMultistepScheduler + from .scheduling_dpmsolver_sde import DPMSolverSDEScheduler + +else: + import sys + + sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) + for name, value in _dummy_modules.items(): + setattr(sys.modules[__name__], name, value) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c1a3ea6954e779af819132900458bd52a0aadfe Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_amused.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_amused.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf406079f5508bfe9947c53f22c82a918058bf67 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_amused.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_consistency_decoder.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_consistency_decoder.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17afd4c0592eb2d635af37cb6c525994fdd3e5ee Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_consistency_decoder.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_consistency_models.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_consistency_models.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d88997be7115dd49ed0fec1f36390facaa2e601 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_consistency_models.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_cosine_dpmsolver_multistep.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_cosine_dpmsolver_multistep.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..574f1e7e3833f55b55093f50ab57974a2c499d74 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_cosine_dpmsolver_multistep.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3f6a12ffb359eed2af52aecf4291bd1cce82f33 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_cogvideox.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_cogvideox.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60918ede5f363e1f8d38642616a7513a9e173768 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_cogvideox.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a2d34912f5001ba48997d701b17d6e807e05503d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_inverse.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_inverse.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..455bc12cae8a1abd27756e85ee45492fd90add28 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_inverse.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_parallel.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_parallel.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce9f7af36c35419d425aef08828076d1e5d75cbc Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddim_parallel.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e3131b7579fe5a89ce88ca7e56b99a19ca7f26b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a9fa5c55bdba9a3fa201db06b1e46e38ddcff12 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_parallel.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_parallel.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fb307d606176c132a297c29c900b1c0cdebe5a1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_parallel.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_wuerstchen.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_wuerstchen.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..73ef41f7905c25cadba4f2a6bf3dec935bee3d9c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ddpm_wuerstchen.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_deis_multistep.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_deis_multistep.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d32410823302c14754e232ca826591c3510b052 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_deis_multistep.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpm_cogvideox.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpm_cogvideox.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..432b4f24283878e4ba8b3e51a6a42b22f7f2e223 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpm_cogvideox.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0accd2e32319e443e354341d6a171de86257e5ff Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..505ed5a9714e39eb83f76e49c8df28e7c42ff6c3 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_inverse.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_inverse.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df4ad5784fa74272da9303a0076a4f445d1a76fb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_inverse.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_sde.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_sde.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a24d6c51b46fe958a3d7962b6d33888880e4c30 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_sde.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_singlestep.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_singlestep.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..686addeef0f97aa9a3dec4d4698035fd34b19f70 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_dpmsolver_singlestep.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_edm_dpmsolver_multistep.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_edm_dpmsolver_multistep.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f07dffe91c3c1f2224922fb21225fd130fb77aa Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_edm_dpmsolver_multistep.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_edm_euler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_edm_euler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..281ef4b1c533405236d1bc77c17ee5cd328e4817 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_edm_euler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_ancestral_discrete.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_ancestral_discrete.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..333042541f2ca63d2b3511e1a09d89db8c8e883b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_ancestral_discrete.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_discrete.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_discrete.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..556a6cc8b997e4d1c35e5e181b076af41f09eadb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_discrete.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_discrete_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_discrete_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9dff8e22a37cc1c14f8e9e33cf3f883f6c8556ae Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_euler_discrete_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_euler_discrete.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_euler_discrete.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c96147b0612256f48eb5dbae2696563b5f914ae Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_euler_discrete.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_heun_discrete.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_heun_discrete.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..73025bdc1e63a0c5ae751c746f64156268c86c05 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_heun_discrete.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_lcm.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_lcm.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6941d748420ccd63a1385cd5a2cab2c3b120e258 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_flow_match_lcm.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_helios.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_helios.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..805f7c54cf9845e32bac48e4a2b89d8281cf5627 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_helios.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_helios_dmd.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_helios_dmd.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5dcfb66ec9403e12984b0eb2719987d6f7d83c3a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_helios_dmd.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_heun_discrete.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_heun_discrete.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b61598557049ea8af7f57da64c15af5565786e86 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_heun_discrete.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ipndm.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ipndm.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d882063530c0c2e7fac4e7fa46b38c7bfc4fac7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ipndm.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_ancestral_discrete.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_ancestral_discrete.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c0b037059cba670f22561d659ed7c8c2e694d18d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_ancestral_discrete.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_discrete.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_discrete.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c46f6894328a0692c83b415d0bc37f1d1034decc Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_k_dpm_2_discrete.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_karras_ve_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_karras_ve_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ef9d637b2a80155b661da44a38ee3aaec5dc7ad Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_karras_ve_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lcm.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lcm.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ebed1f33ea983ef6800af60c976b3fe78bb91a2e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lcm.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c6576c04077ebd4e92ff92d4bc24fdf3fd9bd54 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lms_discrete_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lms_discrete_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..500fd3d92b099f085ce63267da0f62d48bed24bb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_lms_discrete_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ltx_euler_ancestral_rf.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ltx_euler_ancestral_rf.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7694aff4c73e4fe368922c7a8050f7e0e8c7cc9d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_ltx_euler_ancestral_rf.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_pndm.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_pndm.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a4559ac6e2cf2098d5a6ca48c4f429763a0acb1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_pndm.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_pndm_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_pndm_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..77237ae1607a1509a36b5004ba7c8f8d0a310003 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_pndm_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_repaint.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_repaint.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16b6839c11c77464c8dc5a910f58f9ffe5737f73 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_repaint.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sasolver.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sasolver.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63dca0e814c1ae1cebc1db52c8ed7c9c46c85109 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sasolver.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_scm.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_scm.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..452324e4042bc932da6f1dfd08f5eee221713ee9 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_scm.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ade5316c9661912f2eaefc0e0f9d06141b0a59f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sde_ve_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sde_ve_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..26630954159ef32e9696102bcf666deb3ac0b51b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_sde_ve_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_tcd.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_tcd.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dcee6047bb10ed177c6fbb93d1a3b9ca1ea44515 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_tcd.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_unclip.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_unclip.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f7a46ef9caf9f6b86c0122d364f1a49662a2b6c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_unclip.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_unipc_multistep.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_unipc_multistep.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb5523a1a8a21482cc4d933efeb0e3462bf94a7d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_unipc_multistep.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..007b6b16d983032b3dd9be234ce13ec48ceb578e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_utils_flax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_utils_flax.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b35ffb9254d3bff13d4d9493fe9da52f7e7433e0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_utils_flax.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_vq_diffusion.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_vq_diffusion.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b4090821b8856a7ed3575e144d4edf47dbcc46a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/__pycache__/scheduling_vq_diffusion.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..479cf9bd568be320b60e87a831bf80835e9943f2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__init__.py @@ -0,0 +1,50 @@ +from typing import TYPE_CHECKING + +from ...utils import ( + DIFFUSERS_SLOW_IMPORT, + OptionalDependencyNotAvailable, + _LazyModule, + get_objects_from_module, + is_torch_available, + is_transformers_available, +) + + +_dummy_objects = {} +_import_structure = {} + +try: + if not (is_transformers_available() and is_torch_available()): + raise OptionalDependencyNotAvailable() +except OptionalDependencyNotAvailable: + from ...utils import dummy_pt_objects # noqa F403 + + _dummy_objects.update(get_objects_from_module(dummy_pt_objects)) +else: + _import_structure["scheduling_karras_ve"] = ["KarrasVeScheduler"] + _import_structure["scheduling_sde_vp"] = ["ScoreSdeVpScheduler"] + +if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT: + try: + if not is_torch_available(): + raise OptionalDependencyNotAvailable() + + except OptionalDependencyNotAvailable: + from ...utils.dummy_pt_objects import * # noqa F403 + else: + from .scheduling_karras_ve import KarrasVeScheduler + from .scheduling_sde_vp import ScoreSdeVpScheduler + + +else: + import sys + + sys.modules[__name__] = _LazyModule( + __name__, + globals()["__file__"], + _import_structure, + module_spec=__spec__, + ) + + for name, value in _dummy_objects.items(): + setattr(sys.modules[__name__], name, value) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d0477daacced5b4b3bdf5319a87ab208301642f8 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/scheduling_karras_ve.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/scheduling_karras_ve.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c29a05563981264dad245cedb5ad6fcbfad54b74 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/scheduling_karras_ve.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/scheduling_sde_vp.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/scheduling_sde_vp.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ecaf78ee89c4de7853129a880f9ca3d93de47adf Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/__pycache__/scheduling_sde_vp.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/scheduling_karras_ve.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/scheduling_karras_ve.py new file mode 100644 index 0000000000000000000000000000000000000000..b6b905b0f08803352d575ffef29618ac744a9975 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/scheduling_karras_ve.py @@ -0,0 +1,239 @@ +# Copyright 2025 NVIDIA and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from dataclasses import dataclass + +import numpy as np +import torch + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils import BaseOutput +from ...utils.torch_utils import randn_tensor +from ..scheduling_utils import SchedulerMixin + + +@dataclass +class KarrasVeOutput(BaseOutput): + """ + Output class for the scheduler's step function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + derivative (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Derivative of predicted original image sample (x_0). + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample (x_{0}) based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + derivative: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +class KarrasVeScheduler(SchedulerMixin, ConfigMixin): + """ + A stochastic scheduler tailored to variance-expanding models. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + > [!TIP] > For more details on the parameters, see [Appendix E](https://huggingface.co/papers/2206.00364). The grid + search > values used to find the optimal `{s_noise, s_churn, s_min, s_max}` for a specific model are described in + Table 5 of > the paper. + + Args: + sigma_min (`float`, defaults to 0.02): + The minimum noise magnitude. + sigma_max (`float`, defaults to 100): + The maximum noise magnitude. + s_noise (`float`, defaults to 1.007): + The amount of additional noise to counteract loss of detail during sampling. A reasonable range is [1.000, + 1.011]. + s_churn (`float`, defaults to 80): + The parameter controlling the overall amount of stochasticity. A reasonable range is [0, 100]. + s_min (`float`, defaults to 0.05): + The start value of the sigma range to add noise (enable stochasticity). A reasonable range is [0, 10]. + s_max (`float`, defaults to 50): + The end value of the sigma range to add noise. A reasonable range is [0.2, 80]. + """ + + order = 2 + + @register_to_config + def __init__( + self, + sigma_min: float = 0.02, + sigma_max: float = 100, + s_noise: float = 1.007, + s_churn: float = 80, + s_min: float = 0.05, + s_max: float = 50, + ): + # standard deviation of the initial noise distribution + self.init_noise_sigma = sigma_max + + # setable values + self.num_inference_steps: int = None + self.timesteps: np.IntTensor = None + self.schedule: torch.Tensor = None # sigma(t_i) + + def scale_model_input(self, sample: torch.Tensor, timestep: int = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps + timesteps = np.arange(0, self.num_inference_steps)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps).to(device) + schedule = [ + ( + self.config.sigma_max**2 + * (self.config.sigma_min**2 / self.config.sigma_max**2) ** (i / (num_inference_steps - 1)) + ) + for i in self.timesteps + ] + self.schedule = torch.tensor(schedule, dtype=torch.float32, device=device) + + def add_noise_to_input( + self, sample: torch.Tensor, sigma: float, generator: torch.Generator | None = None + ) -> tuple[torch.Tensor, float]: + """ + Explicit Langevin-like "churn" step of adding noise to the sample according to a `gamma_i ≥ 0` to reach a + higher noise level `sigma_hat = sigma_i + gamma_i*sigma_i`. + + Args: + sample (`torch.Tensor`): + The input sample. + sigma (`float`): + generator (`torch.Generator`, *optional*): + A random number generator. + """ + if self.config.s_min <= sigma <= self.config.s_max: + gamma = min(self.config.s_churn / self.num_inference_steps, 2**0.5 - 1) + else: + gamma = 0 + + # sample eps ~ N(0, S_noise^2 * I) + eps = self.config.s_noise * randn_tensor(sample.shape, generator=generator).to(sample.device) + sigma_hat = sigma + gamma * sigma + sample_hat = sample + ((sigma_hat**2 - sigma**2) ** 0.5 * eps) + + return sample_hat, sigma_hat + + def step( + self, + model_output: torch.Tensor, + sigma_hat: float, + sigma_prev: float, + sample_hat: torch.Tensor, + return_dict: bool = True, + ) -> KarrasVeOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + sigma_hat (`float`): + sigma_prev (`float`): + sample_hat (`torch.Tensor`): + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_karras_ve.KarrasVESchedulerOutput`] is returned, + otherwise a tuple is returned where the first element is the sample tensor. + + """ + + pred_original_sample = sample_hat + sigma_hat * model_output + derivative = (sample_hat - pred_original_sample) / sigma_hat + sample_prev = sample_hat + (sigma_prev - sigma_hat) * derivative + + if not return_dict: + return (sample_prev, derivative) + + return KarrasVeOutput( + prev_sample=sample_prev, derivative=derivative, pred_original_sample=pred_original_sample + ) + + def step_correct( + self, + model_output: torch.Tensor, + sigma_hat: float, + sigma_prev: float, + sample_hat: torch.Tensor, + sample_prev: torch.Tensor, + derivative: torch.Tensor, + return_dict: bool = True, + ) -> KarrasVeOutput | tuple: + """ + Corrects the predicted sample based on the `model_output` of the network. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + sigma_hat (`float`): TODO + sigma_prev (`float`): TODO + sample_hat (`torch.Tensor`): TODO + sample_prev (`torch.Tensor`): TODO + derivative (`torch.Tensor`): TODO + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`. + + Returns: + prev_sample (TODO): updated sample in the diffusion chain. derivative (TODO): TODO + + """ + pred_original_sample = sample_prev + sigma_prev * model_output + derivative_corr = (sample_prev - pred_original_sample) / sigma_prev + sample_prev = sample_hat + (sigma_prev - sigma_hat) * (0.5 * derivative + 0.5 * derivative_corr) + + if not return_dict: + return (sample_prev, derivative) + + return KarrasVeOutput( + prev_sample=sample_prev, derivative=derivative, pred_original_sample=pred_original_sample + ) + + def add_noise(self, original_samples, noise, timesteps): + raise NotImplementedError() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/scheduling_sde_vp.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/scheduling_sde_vp.py new file mode 100644 index 0000000000000000000000000000000000000000..99fa408e70c0d5eb010b9dd72e8fa62ad461abd8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/deprecated/scheduling_sde_vp.py @@ -0,0 +1,108 @@ +# Copyright 2025 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch + +import math + +import torch + +from ...configuration_utils import ConfigMixin, register_to_config +from ...utils.torch_utils import randn_tensor +from ..scheduling_utils import SchedulerMixin + + +class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): + """ + `ScoreSdeVpScheduler` is a variance preserving stochastic differential equation (SDE) scheduler. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 2000): + The number of diffusion steps to train the model. + beta_min (`int`, defaults to 0.1): + beta_max (`int`, defaults to 20): + sampling_eps (`int`, defaults to 1e-3): + The end value of sampling where timesteps decrease progressively from 1 to epsilon. + """ + + order = 1 + + @register_to_config + def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3): + self.sigmas = None + self.discrete_sigmas = None + self.timesteps = None + + def set_timesteps(self, num_inference_steps, device: str | torch.device = None): + """ + Sets the continuous timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps, device=device) + + def step_pred(self, score, x, t, generator=None): + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + score (): + x (): + t (): + generator (`torch.Generator`, *optional*): + A random number generator. + """ + if self.timesteps is None: + raise ValueError( + "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler" + ) + + # TODO(Patrick) better comments + non-PyTorch + # postprocess model score + log_mean_coeff = -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min + std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff)) + std = std.flatten() + while len(std.shape) < len(score.shape): + std = std.unsqueeze(-1) + score = -score / std + + # compute + dt = -1.0 / len(self.timesteps) + + beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min) + beta_t = beta_t.flatten() + while len(beta_t.shape) < len(x.shape): + beta_t = beta_t.unsqueeze(-1) + drift = -0.5 * beta_t * x + + diffusion = torch.sqrt(beta_t) + drift = drift - diffusion**2 * score + x_mean = x + drift * dt + + # add noise + noise = randn_tensor(x.shape, layout=x.layout, generator=generator, device=x.device, dtype=x.dtype) + x = x_mean + diffusion * math.sqrt(-dt) * noise + + return x, x_mean + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_amused.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_amused.py new file mode 100644 index 0000000000000000000000000000000000000000..ef49d8db91b1c94e254b18a1f446ff1adb27b451 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_amused.py @@ -0,0 +1,235 @@ +import math +from dataclasses import dataclass +from typing import Literal + +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from .scheduling_utils import SchedulerMixin + + +def gumbel_noise(t: torch.Tensor, generator: torch.Generator | None = None) -> torch.Tensor: + """ + Generate Gumbel noise for sampling. + + Args: + t (`torch.Tensor`): + Input tensor to match the shape and dtype of the output noise. + generator (`torch.Generator`, *optional*): + A random number generator for reproducible sampling. + + Returns: + `torch.Tensor`: + Gumbel-distributed noise with the same shape, dtype, and device as the input tensor. + """ + device = generator.device if generator is not None else t.device + noise = torch.zeros_like(t, device=device).uniform_(0, 1, generator=generator).to(t.device) + return -torch.log((-torch.log(noise.clamp(1e-20))).clamp(1e-20)) + + +def mask_by_random_topk( + mask_len: torch.Tensor, + probs: torch.Tensor, + temperature: float = 1.0, + generator: torch.Generator | None = None, +) -> torch.Tensor: + """ + Mask tokens by selecting the top-k lowest confidence scores with temperature-based randomness. + + Args: + mask_len (`torch.Tensor`): + Number of tokens to mask per sample in the batch. + probs (`torch.Tensor`): + Probability scores for each token. + temperature (`float`, *optional*, defaults to 1.0): + Temperature parameter for controlling randomness in the masking process. + generator (`torch.Generator`, *optional*): + A random number generator for reproducible sampling. + + Returns: + `torch.Tensor`: + Boolean mask indicating which tokens should be masked. + """ + confidence = torch.log(probs.clamp(1e-20)) + temperature * gumbel_noise(probs, generator=generator) + sorted_confidence = torch.sort(confidence, dim=-1).values + cut_off = torch.gather(sorted_confidence, 1, mask_len.long()) + masking = confidence < cut_off + return masking + + +@dataclass +class AmusedSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.LongTensor` of shape `(batch_size, height, width)` or `(batch_size, sequence_length)`): + Computed sample `(x_{t-1})` of previous timestep with token IDs. `prev_sample` should be used as next model + input in the denoising loop. + pred_original_sample (`torch.LongTensor` of shape `(batch_size, height, width)` or `(batch_size, sequence_length)`, *optional*): + The predicted fully denoised sample `(x_{0})` with token IDs based on the model output from the current + timestep. `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Generator | None = None + + +class AmusedScheduler(SchedulerMixin, ConfigMixin): + """ + A scheduler for masked token generation as used in [`AmusedPipeline`]. + + This scheduler iteratively unmasks tokens based on their confidence scores, following either a cosine or linear + schedule. Unlike traditional diffusion schedulers that work with continuous pixel values, this scheduler operates + on discrete token IDs, making it suitable for autoregressive and non-autoregressive masked token generation models. + + This scheduler inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the + generic methods the library implements for all schedulers such as loading and saving. + + Args: + mask_token_id (`int`): + The token ID used to represent masked tokens in the sequence. + masking_schedule (`Literal["cosine", "linear"]`, *optional*, defaults to `"cosine"`): + The schedule type for determining the mask ratio at each timestep. Can be either `"cosine"` or `"linear"`. + """ + + order = 1 + + temperatures: torch.Generator | None + timesteps: torch.Generator | None + + @register_to_config + def __init__( + self, + mask_token_id: int, + masking_schedule: Literal["cosine", "linear"] = "cosine", + ): + self.temperatures = None + self.timesteps = None + + def set_timesteps( + self, + num_inference_steps: int, + temperature: int | tuple[int, int] | list[int] = (2, 0), + device: str | torch.device = None, + ): + self.timesteps = torch.arange(num_inference_steps, device=device).flip(0) + + if isinstance(temperature, (tuple, list)): + self.temperatures = torch.linspace(temperature[0], temperature[1], num_inference_steps, device=device) + else: + self.temperatures = torch.linspace(temperature, 0.01, num_inference_steps, device=device) + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.LongTensor, + starting_mask_ratio: float = 1.0, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> AmusedSchedulerOutput | tuple: + two_dim_input = sample.ndim == 3 and model_output.ndim == 4 + + if two_dim_input: + batch_size, codebook_size, height, width = model_output.shape + sample = sample.reshape(batch_size, height * width) + model_output = model_output.reshape(batch_size, codebook_size, height * width).permute(0, 2, 1) + + unknown_map = sample == self.config.mask_token_id + + probs = model_output.softmax(dim=-1) + + device = probs.device + probs_ = probs.to(generator.device) if generator is not None else probs # handles when generator is on CPU + if probs_.device.type == "cpu" and probs_.dtype != torch.float32: + probs_ = probs_.float() # multinomial is not implemented for cpu half precision + probs_ = probs_.reshape(-1, probs.size(-1)) + pred_original_sample = torch.multinomial(probs_, 1, generator=generator).to(device=device) + pred_original_sample = pred_original_sample[:, 0].view(*probs.shape[:-1]) + pred_original_sample = torch.where(unknown_map, pred_original_sample, sample) + + if timestep == 0: + prev_sample = pred_original_sample + else: + seq_len = sample.shape[1] + step_idx = (self.timesteps == timestep).nonzero() + ratio = (step_idx + 1) / len(self.timesteps) + + if self.config.masking_schedule == "cosine": + mask_ratio = torch.cos(ratio * math.pi / 2) + elif self.config.masking_schedule == "linear": + mask_ratio = 1 - ratio + else: + raise ValueError(f"unknown masking schedule {self.config.masking_schedule}") + + mask_ratio = starting_mask_ratio * mask_ratio + + mask_len = (seq_len * mask_ratio).floor() + # do not mask more than amount previously masked + mask_len = torch.min(unknown_map.sum(dim=-1, keepdim=True) - 1, mask_len) + # mask at least one + mask_len = torch.max(torch.tensor([1], device=model_output.device), mask_len) + + selected_probs = torch.gather(probs, -1, pred_original_sample[:, :, None])[:, :, 0] + # Ignores the tokens given in the input by overwriting their confidence. + selected_probs = torch.where(unknown_map, selected_probs, torch.finfo(selected_probs.dtype).max) + + masking = mask_by_random_topk(mask_len, selected_probs, self.temperatures[step_idx], generator) + + # Masks tokens with lower confidence. + prev_sample = torch.where(masking, self.config.mask_token_id, pred_original_sample) + + if two_dim_input: + prev_sample = prev_sample.reshape(batch_size, height, width) + pred_original_sample = pred_original_sample.reshape(batch_size, height, width) + + if not return_dict: + return (prev_sample, pred_original_sample) + + return AmusedSchedulerOutput(prev_sample, pred_original_sample) + + def add_noise( + self, + sample: torch.LongTensor, + timesteps: int, + generator: torch.Generator | None = None, + ) -> torch.LongTensor: + """ + Add noise to a sample by randomly masking tokens according to the masking schedule. + + Args: + sample (`torch.LongTensor`): + The input sample containing token IDs to be partially masked. + timesteps (`int`): + The timestep that determines how much masking to apply. Higher timesteps result in more masking. + generator (`torch.Generator`, *optional*): + A random number generator for reproducible masking. + + Returns: + `torch.LongTensor`: + The sample with some tokens replaced by `mask_token_id` according to the masking schedule. + """ + step_idx = (self.timesteps == timesteps).nonzero() + ratio = (step_idx + 1) / len(self.timesteps) + + if self.config.masking_schedule == "cosine": + mask_ratio = torch.cos(ratio * math.pi / 2) + elif self.config.masking_schedule == "linear": + mask_ratio = 1 - ratio + else: + raise ValueError(f"unknown masking schedule {self.config.masking_schedule}") + + mask_indices = ( + torch.rand( + sample.shape, device=generator.device if generator is not None else sample.device, generator=generator + ).to(sample.device) + < mask_ratio + ) + + masked_sample = sample.clone() + + masked_sample[mask_indices] = self.config.mask_token_id + + return masked_sample diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_consistency_decoder.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_consistency_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..7f5e0cad22b9b9b2b3920c8c25eb77d9acc7fd36 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_consistency_decoder.py @@ -0,0 +1,212 @@ +import math +from dataclasses import dataclass +from typing import Literal + +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +@dataclass +class ConsistencyDecoderSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.Tensor + + +class ConsistencyDecoderScheduler(SchedulerMixin, ConfigMixin): + """ + A scheduler for the consistency decoder used in Stable Diffusion pipelines. + + This scheduler implements a two-step denoising process using consistency models for decoding latent representations + into images. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, *optional*, defaults to `1024`): + The number of diffusion steps to train the model. + sigma_data (`float`, *optional*, defaults to `0.5`): + The standard deviation of the data distribution. Used for computing the skip and output scaling factors. + """ + + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1024, + sigma_data: float = 0.5, + ) -> None: + betas = betas_for_alpha_bar(num_train_timesteps) + + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + + self.sqrt_alphas_cumprod = torch.sqrt(alphas_cumprod) + self.sqrt_one_minus_alphas_cumprod = torch.sqrt(1.0 - alphas_cumprod) + + sigmas = torch.sqrt(1.0 / alphas_cumprod - 1) + + sqrt_recip_alphas_cumprod = torch.sqrt(1.0 / alphas_cumprod) + + self.c_skip = sqrt_recip_alphas_cumprod * sigma_data**2 / (sigmas**2 + sigma_data**2) + self.c_out = sigmas * sigma_data / (sigmas**2 + sigma_data**2) ** 0.5 + self.c_in = sqrt_recip_alphas_cumprod / (sigmas**2 + sigma_data**2) ** 0.5 + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device = None, + ): + if num_inference_steps != 2: + raise ValueError("Currently more than 2 inference steps are not supported.") + + self.timesteps = torch.tensor([1008, 512], dtype=torch.long, device=device) + self.sqrt_alphas_cumprod = self.sqrt_alphas_cumprod.to(device) + self.sqrt_one_minus_alphas_cumprod = self.sqrt_one_minus_alphas_cumprod.to(device) + self.c_skip = self.c_skip.to(device) + self.c_out = self.c_out.to(device) + self.c_in = self.c_in.to(device) + + @property + def init_noise_sigma(self) -> torch.Tensor: + """ + Return the standard deviation of the initial noise distribution. + + Returns: + `torch.Tensor`: + The initial noise sigma value from the precomputed `sqrt_one_minus_alphas_cumprod` at the first + timestep. + """ + return self.sqrt_one_minus_alphas_cumprod[self.timesteps[0]] + + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample * self.c_in[timestep] + + def step( + self, + model_output: torch.Tensor, + timestep: float | torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> ConsistencyDecoderSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator for reproducibility. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a + [`~schedulers.scheduling_consistency_decoder.ConsistencyDecoderSchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_consistency_decoder.ConsistencyDecoderSchedulerOutput`] or `tuple`: + If `return_dict` is `True`, + [`~schedulers.scheduling_consistency_decoder.ConsistencyDecoderSchedulerOutput`] is returned, otherwise + a tuple is returned where the first element is the sample tensor. + """ + x_0 = self.c_out[timestep] * model_output + self.c_skip[timestep] * sample + + timestep_idx = torch.where(self.timesteps == timestep)[0] + + if timestep_idx == len(self.timesteps) - 1: + prev_sample = x_0 + else: + noise = randn_tensor(x_0.shape, generator=generator, dtype=x_0.dtype, device=x_0.device) + prev_sample = ( + self.sqrt_alphas_cumprod[self.timesteps[timestep_idx + 1]].to(x_0.dtype) * x_0 + + self.sqrt_one_minus_alphas_cumprod[self.timesteps[timestep_idx + 1]].to(x_0.dtype) * noise + ) + + if not return_dict: + return (prev_sample,) + + return ConsistencyDecoderSchedulerOutput(prev_sample=prev_sample) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_consistency_models.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_consistency_models.py new file mode 100644 index 0000000000000000000000000000000000000000..d6bdf73ff9ce2c6d7592cbcd27fc111330b0f810 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_consistency_models.py @@ -0,0 +1,517 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class CMStochasticIterativeSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.Tensor + + +class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin): + """ + Multistep and onestep sampling for consistency models. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 40): + The number of diffusion steps to train the model. + sigma_min (`float`, defaults to 0.002): + Minimum noise magnitude in the sigma schedule. Defaults to 0.002 from the original implementation. + sigma_max (`float`, defaults to 80.0): + Maximum noise magnitude in the sigma schedule. Defaults to 80.0 from the original implementation. + sigma_data (`float`, defaults to 0.5): + The standard deviation of the data distribution from the EDM + [paper](https://huggingface.co/papers/2206.00364). Defaults to 0.5 from the original implementation. + s_noise (`float`, defaults to 1.0): + The amount of additional noise to counteract loss of detail during sampling. A reasonable range is [1.000, + 1.011]. Defaults to 1.0 from the original implementation. + rho (`float`, defaults to 7.0): + The parameter for calculating the Karras sigma schedule from the EDM + [paper](https://huggingface.co/papers/2206.00364). Defaults to 7.0 from the original implementation. + clip_denoised (`bool`, defaults to `True`): + Whether to clip the denoised outputs to `(-1, 1)`. + timesteps (`list` or `np.ndarray` or `torch.Tensor`, *optional*): + An explicit timestep schedule that can be optionally specified. The timesteps are expected to be in + increasing order. + """ + + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 40, + sigma_min: float = 0.002, + sigma_max: float = 80.0, + sigma_data: float = 0.5, + s_noise: float = 1.0, + rho: float = 7.0, + clip_denoised: bool = True, + ) -> None: + # standard deviation of the initial noise distribution + self.init_noise_sigma = sigma_max + + ramp = np.linspace(0, 1, num_train_timesteps) + sigmas = self._convert_to_karras(ramp) + timesteps = self.sigma_to_t(sigmas) + + # setable values + self.num_inference_steps = None + self.sigmas = torch.from_numpy(sigmas) + self.timesteps = torch.from_numpy(timesteps) + self.custom_timesteps = False + self.is_scale_input_called = False + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + + Returns: + `int` or `None`: + The current step index, or `None` if not yet initialized. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + + Returns: + `int` or `None`: + The begin index, or `None` if not yet set. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input(self, sample: torch.Tensor, timestep: float | torch.Tensor) -> torch.Tensor: + """ + Scales the consistency model input by `(sigma**2 + sigma_data**2) ** 0.5`. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + # Get sigma corresponding to timestep + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + + sample = sample / ((sigma**2 + self.config.sigma_data**2) ** 0.5) + + self.is_scale_input_called = True + return sample + + def sigma_to_t(self, sigmas: float | np.ndarray): + """ + Gets scaled timesteps from the Karras sigmas for input to the consistency model. + + Args: + sigmas (`float` or `np.ndarray`): + A single Karras sigma or an array of Karras sigmas. + + Returns: + `np.ndarray`: + A scaled input timestep array. + """ + if not isinstance(sigmas, np.ndarray): + sigmas = np.array(sigmas, dtype=np.float64) + + timesteps = 1000 * 0.25 * np.log(sigmas + 1e-44) + + return timesteps + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device = None, + timesteps: list[int] | None = None, + ): + """ + Sets the timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default + timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed, + `num_inference_steps` must be `None`. + """ + if num_inference_steps is None and timesteps is None: + raise ValueError("Exactly one of `num_inference_steps` or `timesteps` must be supplied.") + + if num_inference_steps is not None and timesteps is not None: + raise ValueError("Can only pass one of `num_inference_steps` or `timesteps`.") + + # Follow DDPMScheduler custom timesteps logic + if timesteps is not None: + for i in range(1, len(timesteps)): + if timesteps[i] >= timesteps[i - 1]: + raise ValueError("`timesteps` must be in descending order.") + + if timesteps[0] >= self.config.num_train_timesteps: + raise ValueError( + f"`timesteps` must start before `self.config.train_timesteps`: {self.config.num_train_timesteps}." + ) + + timesteps = np.array(timesteps, dtype=np.int64) + self.custom_timesteps = True + else: + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + self.num_inference_steps = num_inference_steps + + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64) + self.custom_timesteps = False + + # Map timesteps to Karras sigmas directly for multistep sampling + # See https://github.com/openai/consistency_models/blob/main/cm/karras_diffusion.py#L675 + num_train_timesteps = self.config.num_train_timesteps + ramp = timesteps[::-1].copy() + ramp = ramp / (num_train_timesteps - 1) + sigmas = self._convert_to_karras(ramp) + timesteps = self.sigma_to_t(sigmas) + + sigmas = np.concatenate([sigmas, [self.config.sigma_min]]).astype(np.float32) + self.sigmas = torch.from_numpy(sigmas).to(device=device) + + if str(device).startswith("mps"): + # mps does not support float64 + self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32) + else: + self.timesteps = torch.from_numpy(timesteps).to(device=device) + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Modified _convert_to_karras implementation that takes in ramp as argument + def _convert_to_karras(self, ramp: np.ndarray) -> np.ndarray: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + ramp (`np.ndarray`): + A ramp array of values between 0 and 1 used to interpolate between sigma_min and sigma_max. + + Returns: + `np.ndarray`: + The Karras sigma schedule array. + """ + sigma_min: float = self.config.sigma_min + sigma_max: float = self.config.sigma_max + + rho = self.config.rho + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + def get_scalings(self, sigma: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Computes the scaling factors for the consistency model output. + + Args: + sigma (`torch.Tensor`): + The current sigma value in the noise schedule. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A tuple containing `c_skip` (scaling for the input sample) and `c_out` (scaling for the model output). + """ + sigma_data = self.config.sigma_data + + c_skip = sigma_data**2 / (sigma**2 + sigma_data**2) + c_out = sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5 + return c_skip, c_out + + def get_scalings_for_boundary_condition(self, sigma: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Gets the scalings used in the consistency model parameterization (from Appendix C of the + [paper](https://huggingface.co/papers/2303.01469)) to enforce boundary condition. + + > [!TIP] > `epsilon` in the equations for `c_skip` and `c_out` is set to `sigma_min`. + + Args: + sigma (`torch.Tensor`): + The current sigma in the Karras sigma schedule. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A two-element tuple where `c_skip` (which weights the current sample) is the first element and `c_out` + (which weights the consistency model output) is the second element. + """ + sigma_min = self.config.sigma_min + sigma_data = self.config.sigma_data + + c_skip = sigma_data**2 / ((sigma - sigma_min) ** 2 + sigma_data**2) + c_out = (sigma - sigma_min) * sigma_data / (sigma**2 + sigma_data**2) ** 0.5 + return c_skip, c_out + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: float | torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> CMStochasticIterativeSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, defaults to `True`): + Whether or not to return a + [`~schedulers.scheduling_consistency_models.CMStochasticIterativeSchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_consistency_models.CMStochasticIterativeSchedulerOutput`] or `tuple`: + If return_dict is `True`, + [`~schedulers.scheduling_consistency_models.CMStochasticIterativeSchedulerOutput`] is returned, + otherwise a tuple is returned where the first element is the sample tensor. + """ + + if isinstance(timestep, (int, torch.IntTensor, torch.LongTensor)): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + f" `{self.__class__}.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if not self.is_scale_input_called: + logger.warning( + "The `scale_model_input` function should be called before `step` to ensure correct denoising. " + "See `StableDiffusionPipeline` for a usage example." + ) + + sigma_min = self.config.sigma_min + sigma_max = self.config.sigma_max + + if self.step_index is None: + self._init_step_index(timestep) + + # sigma_next corresponds to next_t in original implementation + sigma = self.sigmas[self.step_index] + if self.step_index + 1 < self.config.num_train_timesteps: + sigma_next = self.sigmas[self.step_index + 1] + else: + # Set sigma_next to sigma_min + sigma_next = self.sigmas[-1] + + # Get scalings for boundary conditions + c_skip, c_out = self.get_scalings_for_boundary_condition(sigma) + + # 1. Denoise model output using boundary conditions + denoised = c_out * model_output + c_skip * sample + if self.config.clip_denoised: + denoised = denoised.clamp(-1, 1) + + # 2. Sample z ~ N(0, s_noise^2 * I) + # Noise is not used for onestep sampling. + if len(self.timesteps) > 1: + noise = randn_tensor( + model_output.shape, + dtype=model_output.dtype, + device=model_output.device, + generator=generator, + ) + else: + noise = torch.zeros_like(model_output) + z = noise * self.config.s_noise + + sigma_hat = sigma_next.clamp(min=sigma_min, max=sigma_max) + + # 3. Return noisy sample + # tau = sigma_hat, eps = sigma_min + prev_sample = denoised + z * (sigma_hat**2 - sigma_min**2) ** 0.5 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return CMStochasticIterativeSchedulerOutput(prev_sample=prev_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def __len__(self) -> int: + """ + Returns the number of training timesteps. + + Returns: + `int`: + The number of training timesteps configured for the scheduler. + """ + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py new file mode 100644 index 0000000000000000000000000000000000000000..18ee272ef6193c9d5c9bfb09f3af7a7cc281449c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py @@ -0,0 +1,755 @@ +# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver and https://github.com/NVlabs/edm + +import math +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from .scheduling_dpmsolver_sde import BrownianTreeNoiseSampler +from .scheduling_utils import SchedulerMixin, SchedulerOutput + + +class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): + """ + Implements a variant of `DPMSolverMultistepScheduler` with cosine schedule, proposed by Nichol and Dhariwal (2021). + This scheduler was used in Stable Audio Open [1]. + + [1] Evans, Parker, et al. "Stable Audio Open" https://huggingface.co/papers/2407.14358 + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + sigma_min (`float`, defaults to `0.3`): + Minimum noise magnitude in the sigma schedule. This was set to 0.3 in Stable Audio Open [1]. + sigma_max (`float`, defaults to `500`): + Maximum noise magnitude in the sigma schedule. This was set to 500 in Stable Audio Open [1]. + sigma_data (`float`, defaults to `1.0`): + The standard deviation of the data distribution. This is set to 1.0 in Stable Audio Open [1]. + sigma_schedule (`str`, defaults to `"exponential"`): + Sigma schedule to compute the `sigmas`. Must be one of `"exponential"` or `"karras"`. The exponential + schedule was incorporated in [stabilityai/cosxl](https://huggingface.co/stabilityai/cosxl). The Karras + schedule is introduced in the [EDM](https://huggingface.co/papers/2206.00364) paper. + num_train_timesteps (`int`, defaults to `1000`): + The number of diffusion steps to train the model. + solver_order (`int`, defaults to `2`): + The DPMSolver order which can be `1` or `2`. It is recommended to use `solver_order=2`. + prediction_type (`str`, defaults to `"v_prediction"`): + Prediction type of the scheduler function. Must be one of `"epsilon"` (predicts the noise of the diffusion + process), `"sample"` (directly predicts the noisy sample), or `"v_prediction"` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + rho (`float`, defaults to `7.0`): + The parameter for calculating the Karras sigma schedule from the EDM + [paper](https://huggingface.co/papers/2206.00364). + solver_type (`str`, defaults to `"midpoint"`): + Solver type for the second-order solver. Must be one of `"midpoint"` or `"heun"`. The solver type slightly + affects the sample quality, especially for a small number of steps. It is recommended to use `"midpoint"`. + lower_order_final (`bool`, defaults to `True`): + Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can + stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10. + euler_at_final (`bool`, defaults to `False`): + Whether to use Euler's method in the final step. It is a trade-off between numerical stability and detail + richness. This can stabilize the sampling of the SDE variant of DPMSolver for small number of inference + steps, but sometimes may result in blurring. + final_sigmas_type (`str`, defaults to `"zero"`): + The final `sigma` value for the noise schedule during the sampling process. Must be one of `"zero"` or + `"sigma_min"`. If `"sigma_min"`, the final sigma is the same as the last sigma in the training schedule. If + `"zero"`, the final sigma is set to 0. + """ + + _compatibles = [] + order = 1 + + @register_to_config + def __init__( + self, + sigma_min: float = 0.3, + sigma_max: float = 500, + sigma_data: float = 1.0, + sigma_schedule: Literal["exponential", "karras"] = "exponential", + num_train_timesteps: int = 1000, + solver_order: int = 2, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "v_prediction", + rho: float = 7.0, + solver_type: Literal["midpoint", "heun"] = "midpoint", + lower_order_final: bool = True, + euler_at_final: bool = False, + final_sigmas_type: Literal["zero", "sigma_min"] = "zero", + ) -> None: + if solver_type not in ["midpoint", "heun"]: + if solver_type in ["logrho", "bh1", "bh2"]: + self.register_to_config(solver_type="midpoint") + else: + raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}") + + ramp = torch.linspace(0, 1, num_train_timesteps) + if sigma_schedule == "karras": + sigmas = self._compute_karras_sigmas(ramp) + elif sigma_schedule == "exponential": + sigmas = self._compute_exponential_sigmas(ramp) + + self.timesteps = self.precondition_noise(sigmas) + + self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) + + # setable values + self.num_inference_steps = None + self.model_outputs = [None] * solver_order + self.lower_order_nums = 0 + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def init_noise_sigma(self) -> float: + """ + The standard deviation of the initial noise distribution. + + Returns: + `float`: + The initial noise sigma value computed as `sqrt(sigma_max^2 + 1)`. + """ + return (self.config.sigma_max**2 + 1) ** 0.5 + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + + Returns: + `int` or `None`: + The current step index, or `None` if not yet initialized. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + + Returns: + `int` or `None`: + The begin index, or `None` if not yet set. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.precondition_inputs + def precondition_inputs(self, sample: torch.Tensor, sigma: float | torch.Tensor) -> torch.Tensor: + """ + Precondition the input sample by scaling it according to the EDM formulation. + + Args: + sample (`torch.Tensor`): + The input sample tensor to precondition. + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `torch.Tensor`: + The scaled input sample. + """ + c_in = self._get_conditioning_c_in(sigma) + scaled_sample = sample * c_in + return scaled_sample + + def precondition_noise(self, sigma: float | torch.Tensor) -> torch.Tensor: + """ + Precondition the noise level by computing a normalized timestep representation. + + Args: + sigma (`float` or `torch.Tensor`): + The sigma (noise level) value to precondition. + + Returns: + `torch.Tensor`: + The preconditioned noise value computed as `atan(sigma) / pi * 2`. + """ + if not isinstance(sigma, torch.Tensor): + sigma = torch.tensor([sigma]) + + return sigma.atan() / math.pi * 2 + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.precondition_outputs + def precondition_outputs( + self, + sample: torch.Tensor, + model_output: torch.Tensor, + sigma: float | torch.Tensor, + ) -> torch.Tensor: + """ + Precondition the model outputs according to the EDM formulation. + + Args: + sample (`torch.Tensor`): + The input sample tensor. + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `torch.Tensor`: + The denoised sample computed by combining the skip connection and output scaling. + """ + sigma_data = self.config.sigma_data + c_skip = sigma_data**2 / (sigma**2 + sigma_data**2) + + if self.config.prediction_type == "epsilon": + c_out = sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5 + elif self.config.prediction_type == "v_prediction": + c_out = -sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5 + else: + raise ValueError(f"Prediction type {self.config.prediction_type} is not supported.") + + denoised = c_skip * sample + c_out * model_output + + return denoised + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.scale_model_input + def scale_model_input(self, sample: torch.Tensor, timestep: float | torch.Tensor) -> torch.Tensor: + """ + Scale the denoising model input to match the Euler algorithm. Ensures interchangeability with schedulers that + need to scale the denoising model input depending on the current timestep. + + Args: + sample (`torch.Tensor`): + The input sample tensor. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + sample = self.precondition_inputs(sample, sigma) + + self.is_scale_input_called = True + return sample + + def set_timesteps(self, num_inference_steps: int | None = None, device: str | torch.device | None = None) -> None: + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + + self.num_inference_steps = num_inference_steps + + ramp = torch.linspace(0, 1, self.num_inference_steps) + if self.config.sigma_schedule == "karras": + sigmas = self._compute_karras_sigmas(ramp) + elif self.config.sigma_schedule == "exponential": + sigmas = self._compute_exponential_sigmas(ramp) + + sigmas = sigmas.to(dtype=torch.float32, device=device) + self.timesteps = self.precondition_noise(sigmas) + + if self.config.final_sigmas_type == "sigma_min": + sigma_last = self.config.sigma_min + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + + self.sigmas = torch.cat([sigmas, torch.tensor([sigma_last], dtype=torch.float32, device=device)]) + + self.model_outputs = [ + None, + ] * self.config.solver_order + self.lower_order_nums = 0 + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # if a noise sampler is used, reinitialise it + self.noise_sampler = None + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler._compute_karras_sigmas + def _compute_karras_sigmas( + self, + ramp: torch.Tensor, + sigma_min: float | None = None, + sigma_max: float | None = None, + ) -> torch.Tensor: + """ + Construct the noise schedule of [Karras et al. (2022)](https://huggingface.co/papers/2206.00364). + + Args: + ramp (`torch.Tensor`): + A tensor of values in [0, 1] representing the interpolation positions. + sigma_min (`float`, *optional*): + Minimum sigma value. If `None`, uses `self.config.sigma_min`. + sigma_max (`float`, *optional*): + Maximum sigma value. If `None`, uses `self.config.sigma_max`. + + Returns: + `torch.Tensor`: + The computed Karras sigma schedule. + """ + sigma_min = sigma_min or self.config.sigma_min + sigma_max = sigma_max or self.config.sigma_max + + rho = self.config.rho + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler._compute_exponential_sigmas + def _compute_exponential_sigmas( + self, + ramp: torch.Tensor, + sigma_min: float | None = None, + sigma_max: float | None = None, + ) -> torch.Tensor: + """ + Compute the exponential sigma schedule. Implementation closely follows k-diffusion: + https://github.com/crowsonkb/k-diffusion/blob/6ab5146d4a5ef63901326489f31f1d8e7dd36b48/k_diffusion/sampling.py#L26 + + Args: + ramp (`torch.Tensor`): + A tensor of values representing the interpolation positions. + sigma_min (`float`, *optional*): + Minimum sigma value. If `None`, uses `self.config.sigma_min`. + sigma_max (`float`, *optional*): + Maximum sigma value. If `None`, uses `self.config.sigma_max`. + + Returns: + `torch.Tensor`: + The computed exponential sigma schedule. + """ + sigma_min = sigma_min or self.config.sigma_min + sigma_max = sigma_max or self.config.sigma_max + sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), len(ramp)).exp().flip(0) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + def _sigma_to_alpha_sigma_t(self, sigma: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Convert sigma to alpha and sigma_t values for the diffusion process. + + Args: + sigma (`torch.Tensor`): + The sigma (noise level) value. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A tuple containing `alpha_t` (always 1 since inputs are pre-scaled) and `sigma_t` (same as input + sigma). + """ + alpha_t = torch.tensor(1) # Inputs are pre-scaled before going into unet, so alpha_t = 1 + sigma_t = sigma + + return alpha_t, sigma_t + + def convert_model_output( + self, + model_output: torch.Tensor, + sample: torch.Tensor = None, + ) -> torch.Tensor: + """ + Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is + designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an + integral of the data prediction model. + + > [!TIP] > The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both + noise > prediction and data prediction models. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + sigma = self.sigmas[self.step_index] + x0_pred = self.precondition_outputs(sample, model_output, sigma) + + return x0_pred + + def dpm_solver_first_order_update( + self, + model_output: torch.Tensor, + sample: torch.Tensor = None, + noise: torch.Tensor | None = None, + ) -> torch.Tensor: + """ + One step for the first-order DPMSolver (equivalent to DDIM). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + sigma_t, sigma_s = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + ) + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s) + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s = torch.log(alpha_s) - torch.log(sigma_s) + + h = lambda_t - lambda_s + assert noise is not None + x_t = ( + (sigma_t / sigma_s * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * model_output + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + + return x_t + + def multistep_dpm_solver_second_order_update( + self, + model_output_list: list[torch.Tensor], + sample: torch.Tensor = None, + noise: torch.Tensor | None = None, + ) -> torch.Tensor: + """ + One step for the second-order multistep DPMSolver. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + sigma_t, sigma_s0, sigma_s1 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + + m0, m1 = model_output_list[-1], model_output_list[-2] + + h, h_0 = lambda_t - lambda_s0, lambda_s0 - lambda_s1 + r0 = h_0 / h + D0, D1 = m0, (1.0 / r0) * (m0 - m1) + + # sde-dpmsolver++ + assert noise is not None + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + 0.5 * (alpha_t * (1 - torch.exp(-2.0 * h))) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + + return x_t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep + def index_for_timestep( + self, + timestep: int | torch.Tensor, + schedule_timesteps: torch.Tensor | None = None, + ) -> int: + """ + Find the index for a given timestep in the schedule. + + Args: + timestep (`int` or `torch.Tensor`): + The timestep for which to find the index. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + index_candidates = (schedule_timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + return step_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._init_step_index + def _init_step_index(self, timestep: int | torch.Tensor) -> None: + """ + Initialize the step_index counter for the scheduler. + + Args: + timestep (`int` or `torch.Tensor`): + The current timestep for which to initialize the step index. + """ + + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the multistep DPMSolver. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # Improve numerical stability for small number of steps + lower_order_final = (self.step_index == len(self.timesteps) - 1) and ( + self.config.euler_at_final + or (self.config.lower_order_final and len(self.timesteps) < 15) + or self.config.final_sigmas_type == "zero" + ) + lower_order_second = ( + (self.step_index == len(self.timesteps) - 2) and self.config.lower_order_final and len(self.timesteps) < 15 + ) + + model_output = self.convert_model_output(model_output, sample=sample) + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.model_outputs[-1] = model_output + + if self.noise_sampler is None: + seed = None + if generator is not None: + seed = ( + [g.initial_seed() for g in generator] if isinstance(generator, list) else generator.initial_seed() + ) + self.noise_sampler = BrownianTreeNoiseSampler( + model_output, + sigma_min=self.config.sigma_min, + sigma_max=self.config.sigma_max, + seed=seed, + ) + noise = self.noise_sampler(self.sigmas[self.step_index], self.sigmas[self.step_index + 1]).to( + model_output.device + ) + + if self.config.solver_order == 1 or self.lower_order_nums < 1 or lower_order_final: + prev_sample = self.dpm_solver_first_order_update(model_output, sample=sample, noise=noise) + elif self.config.solver_order == 2 or self.lower_order_nums < 2 or lower_order_second: + prev_sample = self.multistep_dpm_solver_second_order_update(self.model_outputs, sample=sample, noise=noise) + + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler._get_conditioning_c_in + def _get_conditioning_c_in(self, sigma: float | torch.Tensor) -> float | torch.Tensor: + """ + Compute the input conditioning factor for the EDM formulation. + + Args: + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `float` or `torch.Tensor`: + The input conditioning factor `c_in`. + """ + c_in = 1 / ((sigma**2 + self.config.sigma_data**2) ** 0.5) + return c_in + + def __len__(self) -> int: + """ + Returns the number of training timesteps. + + Returns: + `int`: + The number of training timesteps configured for the scheduler. + """ + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim.py new file mode 100644 index 0000000000000000000000000000000000000000..d29bb5b2593cf99535d608e91af0a8d33a5c3e44 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim.py @@ -0,0 +1,595 @@ +# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM +class DDIMSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class DDIMScheduler(SchedulerMixin, ConfigMixin): + """ + `DDIMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with + non-Markovian guidance. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`Literal["linear", "scaled_linear", "squaredcos_cap_v2"]`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Must be one + of `"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample for numerical stability. + clip_sample_range (`float`, defaults to 1.0): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + set_alpha_to_one (`bool`, defaults to `True`): + Each diffusion step uses the alphas product value at that step and at the previous one. For the final step + there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the alpha value at step 0. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + prediction_type (`Literal["epsilon", "sample", "v_prediction"]`, defaults to `"epsilon"`): + Prediction type of the scheduler function. Must be one of `"epsilon"` (predicts the noise of the diffusion + process), `"sample"` (directly predicts the noisy sample), or `"v_prediction"` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True`. + timestep_spacing (`Literal["leading", "trailing", "linspace"]`, defaults to `"leading"`): + The way the timesteps should be scaled. Must be one of `"leading"`, `"trailing"`, or `"linspace"`. Refer to + Table 2 of the [Common Diffusion Noise Schedules and Sample Steps are + Flawed](https://huggingface.co/papers/2305.08891) for more information. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + clip_sample: bool = True, + set_alpha_to_one: bool = True, + steps_offset: int = 0, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + clip_sample_range: float = 1.0, + sample_max_value: float = 1.0, + timestep_spacing: Literal["leading", "trailing", "linspace"] = "leading", + rescale_betas_zero_snr: bool = False, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this parameter simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) + + def scale_model_input(self, sample: torch.Tensor, timestep: int = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def _get_variance(self, timestep: int, prev_timestep: int) -> torch.Tensor: + """ + Computes the variance of the noise added at a given diffusion step. + + For a given `timestep` and its previous step, this method calculates the variance as defined in DDIM/DDPM + literature: + var_t = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + where alpha_prod and beta_prod are cumulative products of alphas and betas, respectively. + + Args: + timestep (`int`): + The current timestep in the diffusion process. + prev_timestep (`int`): + The previous timestep in the diffusion process. If negative, uses `final_alpha_cumprod`. + + Returns: + `torch.Tensor`: + The variance for the current timestep. + """ + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str | torch.device`, *optional*): + The device to use for the timesteps. + + Raises: + ValueError: If `num_inference_steps` is larger than `self.config.num_train_timesteps`. + """ + + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + self.num_inference_steps = num_inference_steps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps) + .round()[::-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'leading' or 'trailing'." + ) + + self.timesteps = torch.from_numpy(timesteps).to(device) + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + eta: float = 0.0, + use_clipped_model_output: bool = False, + generator: torch.Generator | None = None, + variance_noise: torch.Tensor | None = None, + return_dict: bool = True, + ) -> DDIMSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + eta (`float`, *optional*, defaults to 0.0): + The weight of noise for added noise in diffusion step. A value of 0 corresponds to DDIM (deterministic) + and 1 corresponds to DDPM (fully stochastic). + use_clipped_model_output (`bool`, *optional*, defaults to `False`): + If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary + because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no + clipping has happened, "corrected" `model_output` would coincide with the one provided as input and + `use_clipped_model_output` has no effect. + generator (`torch.Generator`, *optional*): + A random number generator for reproducible sampling. + variance_noise (`torch.Tensor`, *optional*): + Alternative to generating noise with `generator` by directly providing the noise for the variance + itself. Useful for methods such as [`CycleDiffusion`]. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502 + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointing to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + pred_epsilon = model_output + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction`" + ) + + # 4. Clip or threshold "predicted x_0" + if self.config.thresholding: + pred_original_sample = self._threshold_sample(pred_original_sample) + elif self.config.clip_sample: + pred_original_sample = pred_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 5. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = self._get_variance(timestep, prev_timestep) + std_dev_t = eta * variance ** (0.5) + + if use_clipped_model_output: + # the pred_epsilon is always re-derived from the clipped x_0 in Glide + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + + # 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502 + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon + + # 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502 + prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction + + if eta > 0: + if variance_noise is not None and generator is not None: + raise ValueError( + "Cannot pass both generator and variance_noise. Please make sure that either `generator` or" + " `variance_noise` stays `None`." + ) + + if variance_noise is None: + variance_noise = randn_tensor( + model_output.shape, generator=generator, device=model_output.device, dtype=model_output.dtype + ) + variance = std_dev_t * variance_noise + + prev_sample = prev_sample + variance + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: + """ + Compute the velocity prediction from the sample and noise according to the velocity formula. + + Args: + sample (`torch.Tensor`): + The input sample. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps for velocity computation. + + Returns: + `torch.Tensor`: + The computed velocity. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as sample + self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) + timesteps = timesteps.to(sample.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_cogvideox.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_cogvideox.py new file mode 100644 index 0000000000000000000000000000000000000000..11cbb8a6b9f7e372005c2ae460ac4676362fcc28 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_cogvideox.py @@ -0,0 +1,504 @@ +# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM +class DDIMSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +def rescale_zero_terminal_snr(alphas_cumprod: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on (Algorithm 1)[https://huggingface.co/papers/2305.08891] + + Args: + alphas_cumprod (`torch.Tensor`): + The alphas cumulative products that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: rescaled betas with zero terminal SNR + """ + + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + + return alphas_bar + + +class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): + """ + `DDIMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with + non-Markovian guidance. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.00085): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.0120): + The final `beta` value. + beta_schedule (`str`, defaults to `"scaled_linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample for numerical stability. + clip_sample_range (`float`, defaults to 1.0): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + set_alpha_to_one (`bool`, defaults to `True`): + Each diffusion step uses the alphas product value at that step and at the previous one. For the final step + there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the alpha value at step 0. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True`. + timestep_spacing (`str`, defaults to `"leading"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + snr_shift_scale (`float`, defaults to 3.0): + Shift scale for SNR. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.00085, + beta_end: float = 0.0120, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "scaled_linear", + trained_betas: np.ndarray | list[float] | None = None, + clip_sample: bool = True, + set_alpha_to_one: bool = True, + steps_offset: int = 0, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + clip_sample_range: float = 1.0, + sample_max_value: float = 1.0, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading", + rescale_betas_zero_snr: bool = False, + snr_shift_scale: float = 3.0, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float64, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # Modify: SNR shift following SD3 + self.alphas_cumprod = self.alphas_cumprod / (snr_shift_scale + (1 - snr_shift_scale) * self.alphas_cumprod) + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.alphas_cumprod = rescale_zero_terminal_snr(self.alphas_cumprod) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this parameter simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) + + def _get_variance(self, timestep: int, prev_timestep: int) -> torch.Tensor: + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def scale_model_input(self, sample: torch.Tensor, timestep: int = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def set_timesteps( + self, + num_inference_steps: int, + device: str | torch.device | None = None, + ) -> None: + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + """ + + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + self.num_inference_steps = num_inference_steps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps) + .round()[::-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'leading' or 'trailing'." + ) + + self.timesteps = torch.from_numpy(timesteps).to(device) + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + eta: float = 0.0, + use_clipped_model_output: bool = False, + generator: torch.Generator | None = None, + variance_noise: torch.Tensor | None = None, + return_dict: bool = True, + ) -> DDIMSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + eta (`float`): + The weight of noise for added noise in diffusion step. + use_clipped_model_output (`bool`, defaults to `False`): + If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary + because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no + clipping has happened, "corrected" `model_output` would coincide with the one provided as input and + `use_clipped_model_output` has no effect. + generator (`torch.Generator`, *optional*): + A random number generator. + variance_noise (`torch.Tensor`): + Alternative to generating noise with `generator` by directly providing the noise for the variance + itself. Useful for methods such as [`CycleDiffusion`]. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502 + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointing to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502 + # To make style tests pass, commented out `pred_epsilon` as it is an unused variable + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + # pred_epsilon = model_output + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + # pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + # pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction`" + ) + + a_t = ((1 - alpha_prod_t_prev) / (1 - alpha_prod_t)) ** 0.5 + b_t = alpha_prod_t_prev**0.5 - alpha_prod_t**0.5 * a_t + + prev_sample = a_t * sample + b_t * pred_original_sample + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: + """ + Compute the velocity prediction from the sample and noise according to the velocity formula. + + Args: + sample (`torch.Tensor`): + The input sample. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps for velocity computation. + + Returns: + `torch.Tensor`: + The computed velocity. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as sample + self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) + timesteps = timesteps.to(sample.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..3bb44caef2bf477b49051a4ededcf52c7e1b43ec --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_flax.py @@ -0,0 +1,326 @@ +# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion + +from dataclasses import dataclass + +import flax +import jax.numpy as jnp + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging +from .scheduling_utils_flax import ( + CommonSchedulerState, + FlaxKarrasDiffusionSchedulers, + FlaxSchedulerMixin, + FlaxSchedulerOutput, + add_noise_common, + get_velocity_common, +) + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class DDIMSchedulerState: + common: CommonSchedulerState + final_alpha_cumprod: jnp.ndarray + + # setable values + init_noise_sigma: jnp.ndarray + timesteps: jnp.ndarray + num_inference_steps: int = None + + @classmethod + def create( + cls, + common: CommonSchedulerState, + final_alpha_cumprod: jnp.ndarray, + init_noise_sigma: jnp.ndarray, + timesteps: jnp.ndarray, + ): + return cls( + common=common, + final_alpha_cumprod=final_alpha_cumprod, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + ) + + +@dataclass +class FlaxDDIMSchedulerOutput(FlaxSchedulerOutput): + state: DDIMSchedulerState + + +class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + Denoising diffusion implicit models is a scheduler that extends the denoising procedure introduced in denoising + diffusion probabilistic models (DDPMs) with non-Markovian guidance. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + For more details, see the original paper: https://huggingface.co/papers/2010.02502 + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`jnp.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + clip_sample (`bool`, default `True`): + option to clip predicted sample between for numerical stability. The clip range is determined by + `clip_sample_range`. + clip_sample_range (`float`, default `1.0`): + the maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + set_alpha_to_one (`bool`, default `True`): + each diffusion step uses the value of alphas product at that step and at the previous one. For the final + step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the value of alpha at step 0. + steps_offset (`int`, default `0`): + An offset added to the inference steps, as required by some model families. + prediction_type (`str`, default `epsilon`): + indicates whether the model predicts the noise (epsilon), or the samples. One of `epsilon`, `sample`. + `v-prediction` is not supported for this scheduler. + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + the `dtype` used for params and computation. + """ + + _compatibles = [e.name for e in FlaxKarrasDiffusionSchedulers] + + dtype: jnp.dtype + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: jnp.ndarray | None = None, + clip_sample: bool = True, + clip_sample_range: float = 1.0, + set_alpha_to_one: bool = True, + steps_offset: int = 0, + prediction_type: str = "epsilon", + dtype: jnp.dtype = jnp.float32, + ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + self.dtype = dtype + + def create_state(self, common: CommonSchedulerState | None = None) -> DDIMSchedulerState: + if common is None: + common = CommonSchedulerState.create(self) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this parameter simply to one or + # whether we use the final alpha of the "non-previous" one. + final_alpha_cumprod = ( + jnp.array(1.0, dtype=self.dtype) if self.config.set_alpha_to_one else common.alphas_cumprod[0] + ) + + # standard deviation of the initial noise distribution + init_noise_sigma = jnp.array(1.0, dtype=self.dtype) + + timesteps = jnp.arange(0, self.config.num_train_timesteps).round()[::-1] + + return DDIMSchedulerState.create( + common=common, + final_alpha_cumprod=final_alpha_cumprod, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + ) + + def scale_model_input( + self, + state: DDIMSchedulerState, + sample: jnp.ndarray, + timestep: int | None = None, + ) -> jnp.ndarray: + """ + Args: + state (`PNDMSchedulerState`): the `FlaxPNDMScheduler` state data class instance. + sample (`jnp.ndarray`): input sample + timestep (`int`, optional): current timestep + + Returns: + `jnp.ndarray`: scaled input sample + """ + return sample + + def set_timesteps( + self, state: DDIMSchedulerState, num_inference_steps: int, shape: tuple = () + ) -> DDIMSchedulerState: + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`DDIMSchedulerState`): + the `FlaxDDIMScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + """ + step_ratio = self.config.num_train_timesteps // num_inference_steps + # creates integer timesteps by multiplying by ratio + # rounding to avoid issues when num_inference_step is power of 3 + timesteps = (jnp.arange(0, num_inference_steps) * step_ratio).round()[::-1] + self.config.steps_offset + + return state.replace( + num_inference_steps=num_inference_steps, + timesteps=timesteps, + ) + + def _get_variance(self, state: DDIMSchedulerState, timestep, prev_timestep): + alpha_prod_t = state.common.alphas_cumprod[timestep] + alpha_prod_t_prev = jnp.where( + prev_timestep >= 0, + state.common.alphas_cumprod[prev_timestep], + state.final_alpha_cumprod, + ) + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def step( + self, + state: DDIMSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + eta: float = 0.0, + return_dict: bool = True, + ) -> FlaxDDIMSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + state (`DDIMSchedulerState`): the `FlaxDDIMScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + return_dict (`bool`): option for returning tuple rather than FlaxDDIMSchedulerOutput class + + Returns: + [`FlaxDDIMSchedulerOutput`] or `tuple`: [`FlaxDDIMSchedulerOutput`] if `return_dict` is True, otherwise a + `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + if state.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502 + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointing to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // state.num_inference_steps + + alphas_cumprod = state.common.alphas_cumprod + final_alpha_cumprod = state.final_alpha_cumprod + + # 2. compute alphas, betas + alpha_prod_t = alphas_cumprod[timestep] + alpha_prod_t_prev = jnp.where(prev_timestep >= 0, alphas_cumprod[prev_timestep], final_alpha_cumprod) + + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + pred_epsilon = model_output + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction`" + ) + + # 4. Clip or threshold "predicted x_0" + if self.config.clip_sample: + pred_original_sample = pred_original_sample.clip( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 4. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = self._get_variance(state, timestep, prev_timestep) + std_dev_t = eta * variance ** (0.5) + + # 5. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502 + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon + + # 6. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502 + prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction + + if not return_dict: + return (prev_sample, state) + + return FlaxDDIMSchedulerOutput(prev_sample=prev_sample, state=state) + + def add_noise( + self, + state: DDIMSchedulerState, + original_samples: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + return add_noise_common(state.common, original_samples, noise, timesteps) + + def get_velocity( + self, + state: DDIMSchedulerState, + sample: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + return get_velocity_common(state.common, sample, noise, timesteps) + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_inverse.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_inverse.py new file mode 100644 index 0000000000000000000000000000000000000000..4d130266841bb9405e7fa062f0195ec157c6ea19 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_inverse.py @@ -0,0 +1,385 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from diffusers.configuration_utils import ConfigMixin, register_to_config +from diffusers.schedulers.scheduling_utils import SchedulerMixin +from diffusers.utils import BaseOutput, deprecate + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM +class DDIMSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class DDIMInverseScheduler(SchedulerMixin, ConfigMixin): + """ + `DDIMInverseScheduler` is the reverse scheduler of [`DDIMScheduler`]. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample for numerical stability. + clip_sample_range (`float`, defaults to 1.0): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + set_alpha_to_one (`bool`, defaults to `True`): + Each diffusion step uses the alphas product value at that step and at the previous one. For the final step + there is no previous alpha. When this option is `True` the previous alpha product is fixed to 0, otherwise + it uses the alpha value at step `num_train_timesteps - 1`. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + timestep_spacing (`str`, defaults to `"leading"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + order = 1 + ignore_for_config = ["kwargs"] + _deprecated_kwargs = ["set_alpha_to_zero"] + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear", + trained_betas: np.ndarray | list[float] | None = None, + clip_sample: bool = True, + set_alpha_to_one: bool = True, + steps_offset: int = 0, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + clip_sample_range: float = 1.0, + timestep_spacing: Literal["leading", "trailing"] = "leading", + rescale_betas_zero_snr: bool = False, + **kwargs, + ): + if kwargs.get("set_alpha_to_zero", None) is not None: + deprecation_message = ( + "The `set_alpha_to_zero` argument is deprecated. Please use `set_alpha_to_one` instead." + ) + deprecate("set_alpha_to_zero", "1.0.0", deprecation_message, standard_warn=False) + set_alpha_to_one = kwargs["set_alpha_to_zero"] + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # At every step in inverted ddim, we are looking into the next alphas_cumprod + # For the initial step, there is no current alphas_cumprod, and the index is out of bounds + # `set_alpha_to_one` decides whether we set this parameter simply to one + # in this case, self.step() just output the predicted noise + # or whether we use the initial alpha used in training the diffusion model. + self.initial_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps).copy().astype(np.int64)) + + # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler.scale_model_input + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def set_timesteps( + self, + num_inference_steps: int, + device: str | torch.device | None = None, + ) -> None: + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + """ + + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + self.num_inference_steps = num_inference_steps + + # "leading" and "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round().copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)[::-1]).astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'leading' or 'trailing'." + ) + + self.timesteps = torch.from_numpy(timesteps).to(device) + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + return_dict: bool = True, + ) -> DDIMSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_ddim_inverse.DDIMInverseSchedulerOutput`] or + `tuple`. + + Returns: + [`~schedulers.scheduling_ddim_inverse.DDIMInverseSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_ddim_inverse.DDIMInverseSchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor. + + """ + # 1. get previous step value (=t+1) + prev_timestep = timestep + timestep = min( + timestep - self.config.num_train_timesteps // self.num_inference_steps, + self.config.num_train_timesteps - 1, + ) + + # 2. compute alphas, betas + # change original implementation to exactly match noise levels for analogous forward process + alpha_prod_t = self.alphas_cumprod[timestep] if timestep >= 0 else self.initial_alpha_cumprod + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] + + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + pred_epsilon = model_output + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction`" + ) + + # 4. Clip or threshold "predicted x_0" + if self.config.clip_sample: + pred_original_sample = pred_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 5. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502 + pred_sample_direction = (1 - alpha_prod_t_prev) ** (0.5) * pred_epsilon + + # 6. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502 + prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction + + if not return_dict: + return (prev_sample, pred_original_sample) + return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_parallel.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..a5bbac60275c00b50aef45204ad86507e9ae3bad --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddim_parallel.py @@ -0,0 +1,705 @@ +# Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput +class DDIMParallelSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class DDIMParallelScheduler(SchedulerMixin, ConfigMixin): + """ + Denoising diffusion implicit models is a scheduler that extends the denoising procedure introduced in denoising + diffusion probabilistic models (DDPMs) with non-Markovian guidance. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + For more details, see the original paper: https://huggingface.co/papers/2010.02502 + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + clip_sample (`bool`, default `True`): + option to clip predicted sample for numerical stability. + clip_sample_range (`float`, default `1.0`): + the maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + set_alpha_to_one (`bool`, default `True`): + each diffusion step uses the value of alphas product at that step and at the previous one. For the final + step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the value of alpha at step 0. + steps_offset (`int`, default `0`): + An offset added to the inference steps, as required by some model families. + prediction_type (`str`, default `epsilon`, optional): + prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion + process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4 + https://huggingface.co/papers/2210.02303) + thresholding (`bool`, default `False`): + whether to use the "dynamic thresholding" method (introduced by Imagen, + https://huggingface.co/papers/2205.11487). Note that the thresholding method is unsuitable for latent-space + diffusion models (such as stable-diffusion). + dynamic_thresholding_ratio (`float`, default `0.995`): + the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen + (https://huggingface.co/papers/2205.11487). Valid only when `thresholding=True`. + sample_max_value (`float`, default `1.0`): + the threshold value for dynamic thresholding. Valid only when `thresholding=True`. + timestep_spacing (`str`, default `"leading"`): + The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample + Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + rescale_betas_zero_snr (`bool`, default `False`): + whether to rescale the betas to have zero terminal SNR (proposed by + https://huggingface.co/papers/2305.08891). This can enable the model to generate very bright and dark + samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + _is_ode_scheduler = True + + @register_to_config + # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler.__init__ + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + clip_sample: bool = True, + set_alpha_to_one: bool = True, + steps_offset: int = 0, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + clip_sample_range: float = 1.0, + sample_max_value: float = 1.0, + timestep_spacing: Literal["leading", "trailing", "linspace"] = "leading", + rescale_betas_zero_snr: bool = False, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this parameter simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) + + # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler.scale_model_input + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def _get_variance(self, timestep: int, prev_timestep: int | None = None) -> torch.Tensor: + if prev_timestep is None: + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def _batch_get_variance(self, t: torch.Tensor, prev_t: torch.Tensor) -> torch.Tensor: + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[torch.clip(prev_t, min=0)] + alpha_prod_t_prev[prev_t < 0] = torch.tensor(1.0) + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler.set_timesteps + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None) -> None: + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str | torch.device`, *optional*): + The device to use for the timesteps. + + Raises: + ValueError: If `num_inference_steps` is larger than `self.config.num_train_timesteps`. + """ + + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + self.num_inference_steps = num_inference_steps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps) + .round()[::-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'leading' or 'trailing'." + ) + + self.timesteps = torch.from_numpy(timesteps).to(device) + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + eta: float = 0.0, + use_clipped_model_output: bool = False, + generator: torch.Generator | None = None, + variance_noise: torch.Tensor | None = None, + return_dict: bool = True, + ) -> DDIMParallelSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + current instance of sample being created by diffusion process. + eta (`float`): weight of noise for added noise in diffusion step. + use_clipped_model_output (`bool`, defaults to `False`): + If `True`, compute "corrected" `model_output` from the clipped predicted original sample. This + correction is necessary because the predicted original sample is clipped to [-1, 1] when + `self.config.clip_sample` is `True`. If no clipping occurred, the "corrected" `model_output` matches + the input and `use_clipped_model_output` has no effect. + generator (`torch.Generator`, *optional*): + Random number generator. + variance_noise (`torch.Tensor`): instead of generating noise for the variance using `generator`, we + can directly provide the noise for the variance itself. This is useful for methods such as + CycleDiffusion. (https://huggingface.co/papers/2210.05559) + return_dict (`bool`): option for returning tuple rather than DDIMParallelSchedulerOutput class + + Returns: + [`~schedulers.scheduling_utils.DDIMParallelSchedulerOutput`] or `tuple`: + [`~schedulers.scheduling_utils.DDIMParallelSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. + When returning a tuple, the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502 + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointing to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + pred_epsilon = model_output + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction`" + ) + + # 4. Clip or threshold "predicted x_0" + if self.config.thresholding: + pred_original_sample = self._threshold_sample(pred_original_sample) + elif self.config.clip_sample: + pred_original_sample = pred_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 5. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = self._get_variance(timestep, prev_timestep) + std_dev_t = eta * variance ** (0.5) + + if use_clipped_model_output: + # the pred_epsilon is always re-derived from the clipped x_0 in Glide + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + + # 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502 + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon + + # 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502 + prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction + + if eta > 0: + if variance_noise is not None and generator is not None: + raise ValueError( + "Cannot pass both generator and variance_noise. Please make sure that either `generator` or" + " `variance_noise` stays `None`." + ) + + if variance_noise is None: + variance_noise = randn_tensor( + model_output.shape, + generator=generator, + device=model_output.device, + dtype=model_output.dtype, + ) + variance = std_dev_t * variance_noise + + prev_sample = prev_sample + variance + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return DDIMParallelSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + def batch_step_no_noise( + self, + model_output: torch.Tensor, + timesteps: torch.Tensor, + sample: torch.Tensor, + eta: float = 0.0, + use_clipped_model_output: bool = False, + ) -> torch.Tensor: + """ + Batched version of the `step` function, to be able to reverse the SDE for multiple samples/timesteps at once. + Also, does not add any noise to the predicted sample, which is necessary for parallel sampling where the noise + is pre-sampled by the pipeline. + + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): direct output from learned diffusion model. + timesteps (`torch.Tensor`): + current discrete timesteps in the diffusion chain. This is now a list of integers. + sample (`torch.Tensor`): + current instance of sample being created by diffusion process. + eta (`float`): weight of noise for added noise in diffusion step. + use_clipped_model_output (`bool`): if `True`, compute "corrected" `model_output` from the clipped + predicted original sample. Necessary because predicted original sample is clipped to [-1, 1] when + `self.config.clip_sample` is `True`. If no clipping has happened, "corrected" `model_output` would + coincide with the one provided as input and `use_clipped_model_output` will have not effect. + + Returns: + `torch.Tensor`: sample tensor at previous timestep. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + assert eta == 0.0 + + # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502 + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointing to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + t = timesteps + prev_t = t - self.config.num_train_timesteps // self.num_inference_steps + + t = t.view(-1, *([1] * (model_output.ndim - 1))) + prev_t = prev_t.view(-1, *([1] * (model_output.ndim - 1))) + + # 1. compute alphas, betas + self.alphas_cumprod = self.alphas_cumprod.to(model_output.device) + self.final_alpha_cumprod = self.final_alpha_cumprod.to(model_output.device) + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[torch.clip(prev_t, min=0)] + alpha_prod_t_prev[prev_t < 0] = torch.tensor(1.0) + + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + pred_epsilon = model_output + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction`" + ) + + # 4. Clip or threshold "predicted x_0" + if self.config.thresholding: + pred_original_sample = self._threshold_sample(pred_original_sample) + elif self.config.clip_sample: + pred_original_sample = pred_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 5. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = self._batch_get_variance(t, prev_t).to(model_output.device).view(*alpha_prod_t_prev.shape) + std_dev_t = eta * variance ** (0.5) + + if use_clipped_model_output: + # the pred_epsilon is always re-derived from the clipped x_0 in Glide + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + + # 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502 + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon + + # 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502 + prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction + + return prev_sample + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: + """ + Compute the velocity prediction from the sample and noise according to the velocity formula. + + Args: + sample (`torch.Tensor`): + The input sample. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps for velocity computation. + + Returns: + `torch.Tensor`: + The computed velocity. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as sample + self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) + timesteps = timesteps.to(sample.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm.py new file mode 100644 index 0000000000000000000000000000000000000000..972c46c6e930119886ee66db455588ef5de603bb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm.py @@ -0,0 +1,668 @@ +# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +@dataclass +class DDPMSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class DDPMScheduler(SchedulerMixin, ConfigMixin): + """ + `DDPMScheduler` explores the connections between denoising score matching and Langevin dynamics sampling. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to `1000`): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to `0.0001`): + The starting `beta` value of inference. + beta_end (`float`, defaults to `0.02`): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, `"squaredcos_cap_v2"`, or `"sigmoid"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. + trained_betas (`np.ndarray`, *optional*): + An array of betas to pass directly to the constructor without using `beta_start` and `beta_end`. + variance_type (`"fixed_small"`, `"fixed_small_log"`, `"fixed_large"`, `"fixed_large_log"`, `"learned"`, or `"learned_range"`, defaults to `"fixed_small"`): + Clip the variance when adding noise to the denoised sample. + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample for numerical stability. + clip_sample_range (`float`, defaults to `1.0`): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + prediction_type (`"epsilon"`, `"sample"`, or `"v_prediction"`, defaults to `"epsilon"`): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to `0.995`): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to `1.0`): + The threshold value for dynamic thresholding. Valid only when `thresholding=True`. + timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"leading"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to `0`): + An offset added to the inference steps, as required by some model families. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear", + trained_betas: np.ndarray | list[float] | None = None, + variance_type: Literal[ + "fixed_small", + "fixed_small_log", + "fixed_large", + "fixed_large_log", + "learned", + "learned_range", + ] = "fixed_small", + clip_sample: bool = True, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + clip_sample_range: float = 1.0, + sample_max_value: float = 1.0, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading", + steps_offset: int = 0, + rescale_betas_zero_snr: bool = False, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + elif beta_schedule == "laplace": + self.betas = betas_for_alpha_bar(num_train_timesteps, alpha_transform_type="laplace") + elif beta_schedule == "sigmoid": + # GeoDiff sigmoid schedule + betas = torch.linspace(-6, 6, num_train_timesteps) + self.betas = torch.sigmoid(betas) * (beta_end - beta_start) + beta_start + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + self.one = torch.tensor(1.0) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.custom_timesteps = False + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy()) + + self.variance_type = variance_type + + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def set_timesteps( + self, + num_inference_steps: int = None, + device: str | torch.device = None, + timesteps: list[int] | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. If used, + `timesteps` must be `None`. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default + timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed, + `num_inference_steps` must be `None`. + + """ + if num_inference_steps is not None and timesteps is not None: + raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.") + + if timesteps is not None: + for i in range(1, len(timesteps)): + if timesteps[i] >= timesteps[i - 1]: + raise ValueError("`custom_timesteps` must be in descending order.") + + if timesteps[0] >= self.config.num_train_timesteps: + raise ValueError( + f"`timesteps` must start before `self.config.train_timesteps`: {self.config.num_train_timesteps}." + ) + + timesteps = np.array(timesteps, dtype=np.int64) + self.custom_timesteps = True + else: + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + self.num_inference_steps = num_inference_steps + self.custom_timesteps = False + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps) + .round()[::-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + self.timesteps = torch.from_numpy(timesteps).to(device) + + def _get_variance( + self, + t: int, + predicted_variance: torch.Tensor | None = None, + variance_type: Literal[ + "fixed_small", + "fixed_small_log", + "fixed_large", + "fixed_large_log", + "learned", + "learned_range", + ] + | None = None, + ) -> torch.Tensor: + """ + Compute the variance for a given timestep according to the specified variance type. + + Args: + t (`int`): + The current timestep. + predicted_variance (`torch.Tensor`, *optional*): + The predicted variance from the model. Used only when `variance_type` is `"learned"` or + `"learned_range"`. + variance_type (`"fixed_small"`, `"fixed_small_log"`, `"fixed_large"`, `"fixed_large_log"`, `"learned"`, or `"learned_range"`, *optional*): + The type of variance to compute. If `None`, uses the variance type specified in the scheduler + configuration. + + Returns: + `torch.Tensor`: + The computed variance. + """ + prev_t = self.previous_timestep(t) + + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one + current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * current_beta_t + + # we always take the log of variance, so clamp it to ensure it's not 0 + variance = torch.clamp(variance, min=1e-20) + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probably added for training stability + if variance_type == "fixed_small": + variance = variance + # for rl-diffuser https://huggingface.co/papers/2205.09991 + elif variance_type == "fixed_small_log": + variance = torch.log(variance) + variance = torch.exp(0.5 * variance) + elif variance_type == "fixed_large": + variance = current_beta_t + elif variance_type == "fixed_large_log": + # Glide max_log + variance = torch.log(current_beta_t) + elif variance_type == "learned": + return predicted_variance + elif variance_type == "learned_range": + min_log = torch.log(variance) + max_log = torch.log(current_beta_t) + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> DDPMSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + t = timestep + + prev_t = self.previous_timestep(t) + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [ + "learned", + "learned_range", + ]: + model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + current_alpha_t = alpha_prod_t / alpha_prod_t_prev + current_beta_t = 1 - current_alpha_t + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or" + " `v_prediction` for the DDPMScheduler." + ) + + # 3. Clip or threshold "predicted x_0" + if self.config.thresholding: + pred_original_sample = self._threshold_sample(pred_original_sample) + elif self.config.clip_sample: + pred_original_sample = pred_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t + current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + variance = 0 + if t > 0: + device = model_output.device + variance_noise = randn_tensor( + model_output.shape, + generator=generator, + device=device, + dtype=model_output.dtype, + ) + if self.variance_type == "fixed_small_log": + variance = self._get_variance(t, predicted_variance=predicted_variance) * variance_noise + elif self.variance_type == "learned_range": + variance = self._get_variance(t, predicted_variance=predicted_variance) + variance = torch.exp(0.5 * variance) * variance_noise + else: + variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * variance_noise + + pred_prev_sample = pred_prev_sample + variance + + if not return_dict: + return ( + pred_prev_sample, + pred_original_sample, + ) + + return DDPMSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample) + + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: + """ + Compute the velocity prediction from the sample and noise according to the velocity formula. + + Args: + sample (`torch.Tensor`): + The input sample. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps for velocity computation. + + Returns: + `torch.Tensor`: + The computed velocity. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as sample + self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) + timesteps = timesteps.to(sample.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self) -> int: + return self.config.num_train_timesteps + + def previous_timestep(self, timestep: int) -> int | torch.Tensor: + """ + Compute the previous timestep in the diffusion chain. + + Args: + timestep (`int`): + The current timestep. + + Returns: + `int` or `torch.Tensor`: + The previous timestep. + """ + if self.custom_timesteps or self.num_inference_steps: + index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] + if index == self.timesteps.shape[0] - 1: + prev_t = torch.tensor(-1) + else: + prev_t = self.timesteps[index + 1] + else: + prev_t = timestep - 1 + return prev_t diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..7840ff729488f0b902bfa49c69fc0145f127067a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_flax.py @@ -0,0 +1,318 @@ +# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim + +from dataclasses import dataclass + +import flax +import jax +import jax.numpy as jnp + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging +from .scheduling_utils_flax import ( + CommonSchedulerState, + FlaxKarrasDiffusionSchedulers, + FlaxSchedulerMixin, + FlaxSchedulerOutput, + add_noise_common, + get_velocity_common, +) + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class DDPMSchedulerState: + common: CommonSchedulerState + + # setable values + init_noise_sigma: jnp.ndarray + timesteps: jnp.ndarray + num_inference_steps: int = None + + @classmethod + def create( + cls, + common: CommonSchedulerState, + init_noise_sigma: jnp.ndarray, + timesteps: jnp.ndarray, + ): + return cls(common=common, init_noise_sigma=init_noise_sigma, timesteps=timesteps) + + +@dataclass +class FlaxDDPMSchedulerOutput(FlaxSchedulerOutput): + state: DDPMSchedulerState + + +class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and + Langevin dynamics sampling. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + For more details, see the original paper: https://huggingface.co/papers/2006.11239 + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + variance_type (`str`): + options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, + `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. + clip_sample (`bool`, default `True`): + option to clip predicted sample between -1 and 1 for numerical stability. + prediction_type (`str`, default `epsilon`): + indicates whether the model predicts the noise (epsilon), or the samples. One of `epsilon`, `sample`. + `v-prediction` is not supported for this scheduler. + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + the `dtype` used for params and computation. + """ + + _compatibles = [e.name for e in FlaxKarrasDiffusionSchedulers] + + dtype: jnp.dtype + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: jnp.ndarray | None = None, + variance_type: str = "fixed_small", + clip_sample: bool = True, + prediction_type: str = "epsilon", + dtype: jnp.dtype = jnp.float32, + ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + self.dtype = dtype + + def create_state(self, common: CommonSchedulerState | None = None) -> DDPMSchedulerState: + if common is None: + common = CommonSchedulerState.create(self) + + # standard deviation of the initial noise distribution + init_noise_sigma = jnp.array(1.0, dtype=self.dtype) + + timesteps = jnp.arange(0, self.config.num_train_timesteps).round()[::-1] + + return DDPMSchedulerState.create( + common=common, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + ) + + def scale_model_input( + self, + state: DDPMSchedulerState, + sample: jnp.ndarray, + timestep: int | None = None, + ) -> jnp.ndarray: + """ + Args: + state (`PNDMSchedulerState`): the `FlaxPNDMScheduler` state data class instance. + sample (`jnp.ndarray`): input sample + timestep (`int`, optional): current timestep + + Returns: + `jnp.ndarray`: scaled input sample + """ + return sample + + def set_timesteps( + self, state: DDPMSchedulerState, num_inference_steps: int, shape: tuple = () + ) -> DDPMSchedulerState: + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`DDIMSchedulerState`): + the `FlaxDDPMScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + """ + + step_ratio = self.config.num_train_timesteps // num_inference_steps + # creates integer timesteps by multiplying by ratio + # rounding to avoid issues when num_inference_step is power of 3 + timesteps = (jnp.arange(0, num_inference_steps) * step_ratio).round()[::-1] + + return state.replace( + num_inference_steps=num_inference_steps, + timesteps=timesteps, + ) + + def _get_variance(self, state: DDPMSchedulerState, t, predicted_variance=None, variance_type=None): + alpha_prod_t = state.common.alphas_cumprod[t] + alpha_prod_t_prev = jnp.where(t > 0, state.common.alphas_cumprod[t - 1], jnp.array(1.0, dtype=self.dtype)) + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * state.common.betas[t] + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probably added for training stability + if variance_type == "fixed_small": + variance = jnp.clip(variance, a_min=1e-20) + # for rl-diffuser https://huggingface.co/papers/2205.09991 + elif variance_type == "fixed_small_log": + variance = jnp.log(jnp.clip(variance, a_min=1e-20)) + elif variance_type == "fixed_large": + variance = state.common.betas[t] + elif variance_type == "fixed_large_log": + # Glide max_log + variance = jnp.log(state.common.betas[t]) + elif variance_type == "learned": + return predicted_variance + elif variance_type == "learned_range": + min_log = variance + max_log = state.common.betas[t] + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + def step( + self, + state: DDPMSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + key: jax.Array | None = None, + return_dict: bool = True, + ) -> FlaxDDPMSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + state (`DDPMSchedulerState`): the `FlaxDDPMScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + key (`jax.Array`): a PRNG key. + return_dict (`bool`): option for returning tuple rather than FlaxDDPMSchedulerOutput class + + Returns: + [`FlaxDDPMSchedulerOutput`] or `tuple`: [`FlaxDDPMSchedulerOutput`] if `return_dict` is True, otherwise a + `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + t = timestep + + if key is None: + key = jax.random.key(0) + + if ( + len(model_output.shape) > 1 + and model_output.shape[1] == sample.shape[1] * 2 + and self.config.variance_type in ["learned", "learned_range"] + ): + model_output, predicted_variance = jnp.split(model_output, sample.shape[1], axis=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + alpha_prod_t = state.common.alphas_cumprod[t] + alpha_prod_t_prev = jnp.where(t > 0, state.common.alphas_cumprod[t - 1], jnp.array(1.0, dtype=self.dtype)) + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` " + " for the FlaxDDPMScheduler." + ) + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = jnp.clip(pred_original_sample, -1, 1) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * state.common.betas[t]) / beta_prod_t + current_sample_coeff = state.common.alphas[t] ** (0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + def random_variance(): + split_key = jax.random.split(key, num=1)[0] + noise = jax.random.normal(split_key, shape=model_output.shape, dtype=self.dtype) + return (self._get_variance(state, t, predicted_variance=predicted_variance) ** 0.5) * noise + + variance = jnp.where(t > 0, random_variance(), jnp.zeros(model_output.shape, dtype=self.dtype)) + + pred_prev_sample = pred_prev_sample + variance + + if not return_dict: + return (pred_prev_sample, state) + + return FlaxDDPMSchedulerOutput(prev_sample=pred_prev_sample, state=state) + + def add_noise( + self, + state: DDPMSchedulerState, + original_samples: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + return add_noise_common(state.common, original_samples, noise, timesteps) + + def get_velocity( + self, + state: DDPMSchedulerState, + sample: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + return get_velocity_common(state.common, sample, noise, timesteps) + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_parallel.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..725958b01a44e511344e2b23dc1afe2f0565c2a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_parallel.py @@ -0,0 +1,769 @@ +# Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput +class DDPMParallelSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas): + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): + """ + Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and + Langevin dynamics sampling. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + For more details, see the original paper: https://huggingface.co/papers/2006.11239 + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, `squaredcos_cap_v2` or `sigmoid`. + trained_betas (`np.ndarray`, *optional*): + Option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + variance_type (`str`, defaults to `"fixed_small"`): + Options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, + `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. + clip_sample (`bool`, defaults to `True`): + Option to clip predicted sample for numerical stability. + prediction_type (`str`, defaults to `"epsilon"`): + Prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion + process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4 + https://huggingface.co/papers/2210.02303) + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method (introduced by Imagen, + https://huggingface.co/papers/2205.11487). Note that the thresholding method is unsuitable for latent-space + diffusion models (such as stable-diffusion). + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen + (https://huggingface.co/papers/2205.11487). Valid only when `thresholding=True`. + clip_sample_range (`float`, defaults to 1.0): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True`. + timestep_spacing (`str`, defaults to `"leading"`): + The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample + Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + _is_ode_scheduler = False + + @register_to_config + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.__init__ + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear", + trained_betas: np.ndarray | list[float] | None = None, + variance_type: Literal[ + "fixed_small", + "fixed_small_log", + "fixed_large", + "fixed_large_log", + "learned", + "learned_range", + ] = "fixed_small", + clip_sample: bool = True, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + clip_sample_range: float = 1.0, + sample_max_value: float = 1.0, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading", + steps_offset: int = 0, + rescale_betas_zero_snr: bool = False, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + elif beta_schedule == "laplace": + self.betas = betas_for_alpha_bar(num_train_timesteps, alpha_transform_type="laplace") + elif beta_schedule == "sigmoid": + # GeoDiff sigmoid schedule + betas = torch.linspace(-6, 6, num_train_timesteps) + self.betas = torch.sigmoid(betas) * (beta_end - beta_start) + beta_start + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + self.one = torch.tensor(1.0) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.custom_timesteps = False + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy()) + + self.variance_type = variance_type + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.scale_model_input + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.set_timesteps + def set_timesteps( + self, + num_inference_steps: int = None, + device: str | torch.device = None, + timesteps: list[int] | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. If used, + `timesteps` must be `None`. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default + timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed, + `num_inference_steps` must be `None`. + + """ + if num_inference_steps is not None and timesteps is not None: + raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.") + + if timesteps is not None: + for i in range(1, len(timesteps)): + if timesteps[i] >= timesteps[i - 1]: + raise ValueError("`custom_timesteps` must be in descending order.") + + if timesteps[0] >= self.config.num_train_timesteps: + raise ValueError( + f"`timesteps` must start before `self.config.train_timesteps`: {self.config.num_train_timesteps}." + ) + + timesteps = np.array(timesteps, dtype=np.int64) + self.custom_timesteps = True + else: + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + self.num_inference_steps = num_inference_steps + self.custom_timesteps = False + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps) + .round()[::-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + self.timesteps = torch.from_numpy(timesteps).to(device) + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._get_variance + def _get_variance( + self, + t: int, + predicted_variance: torch.Tensor | None = None, + variance_type: Literal[ + "fixed_small", + "fixed_small_log", + "fixed_large", + "fixed_large_log", + "learned", + "learned_range", + ] + | None = None, + ) -> torch.Tensor: + """ + Compute the variance for a given timestep according to the specified variance type. + + Args: + t (`int`): + The current timestep. + predicted_variance (`torch.Tensor`, *optional*): + The predicted variance from the model. Used only when `variance_type` is `"learned"` or + `"learned_range"`. + variance_type (`"fixed_small"`, `"fixed_small_log"`, `"fixed_large"`, `"fixed_large_log"`, `"learned"`, or `"learned_range"`, *optional*): + The type of variance to compute. If `None`, uses the variance type specified in the scheduler + configuration. + + Returns: + `torch.Tensor`: + The computed variance. + """ + prev_t = self.previous_timestep(t) + + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one + current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * current_beta_t + + # we always take the log of variance, so clamp it to ensure it's not 0 + variance = torch.clamp(variance, min=1e-20) + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probably added for training stability + if variance_type == "fixed_small": + variance = variance + # for rl-diffuser https://huggingface.co/papers/2205.09991 + elif variance_type == "fixed_small_log": + variance = torch.log(variance) + variance = torch.exp(0.5 * variance) + elif variance_type == "fixed_large": + variance = current_beta_t + elif variance_type == "fixed_large_log": + # Glide max_log + variance = torch.log(current_beta_t) + elif variance_type == "learned": + return predicted_variance + elif variance_type == "learned_range": + min_log = torch.log(variance) + max_log = torch.log(current_beta_t) + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> DDPMParallelSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + current instance of sample being created by diffusion process. + generator (`torch.Generator`, *optional*): + Random number generator. + return_dict (`bool`): option for returning tuple rather than DDPMParallelSchedulerOutput class + + Returns: + [`~schedulers.scheduling_utils.DDPMParallelSchedulerOutput`] or `tuple`: + [`~schedulers.scheduling_utils.DDPMParallelSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. + When returning a tuple, the first element is the sample tensor. + + """ + t = timestep + + prev_t = self.previous_timestep(t) + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [ + "learned", + "learned_range", + ]: + model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + current_alpha_t = alpha_prod_t / alpha_prod_t_prev + current_beta_t = 1 - current_alpha_t + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or" + " `v_prediction` for the DDPMScheduler." + ) + + # 3. Clip or threshold "predicted x_0" + if self.config.thresholding: + pred_original_sample = self._threshold_sample(pred_original_sample) + elif self.config.clip_sample: + pred_original_sample = pred_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t + current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + variance = 0 + if t > 0: + device = model_output.device + variance_noise = randn_tensor( + model_output.shape, + generator=generator, + device=device, + dtype=model_output.dtype, + ) + if self.variance_type == "fixed_small_log": + variance = self._get_variance(t, predicted_variance=predicted_variance) * variance_noise + elif self.variance_type == "learned_range": + variance = self._get_variance(t, predicted_variance=predicted_variance) + variance = torch.exp(0.5 * variance) * variance_noise + else: + variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * variance_noise + + pred_prev_sample = pred_prev_sample + variance + + if not return_dict: + return ( + pred_prev_sample, + pred_original_sample, + ) + + return DDPMParallelSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample) + + def batch_step_no_noise( + self, + model_output: torch.Tensor, + timesteps: torch.Tensor, + sample: torch.Tensor, + ) -> torch.Tensor: + """ + Batched version of the `step` function, to be able to reverse the SDE for multiple samples/timesteps at once. + Also, does not add any noise to the predicted sample, which is necessary for parallel sampling where the noise + is pre-sampled by the pipeline. + + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): direct output from learned diffusion model. + timesteps (`torch.Tensor`): + Current discrete timesteps in the diffusion chain. This is a tensor of integers. + sample (`torch.Tensor`): + current instance of sample being created by diffusion process. + + Returns: + `torch.Tensor`: sample tensor at previous timestep. + """ + t = timesteps + num_inference_steps = self.num_inference_steps if self.num_inference_steps else self.config.num_train_timesteps + prev_t = t - self.config.num_train_timesteps // num_inference_steps + + t = t.view(-1, *([1] * (model_output.ndim - 1))) + prev_t = prev_t.view(-1, *([1] * (model_output.ndim - 1))) + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [ + "learned", + "learned_range", + ]: + model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1) + else: + pass + + # 1. compute alphas, betas + self.alphas_cumprod = self.alphas_cumprod.to(model_output.device) + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[torch.clip(prev_t, min=0)] + alpha_prod_t_prev[prev_t < 0] = torch.tensor(1.0) + + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + current_alpha_t = alpha_prod_t / alpha_prod_t_prev + current_beta_t = 1 - current_alpha_t + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or" + " `v_prediction` for the DDPMParallelScheduler." + ) + + # 3. Clip or threshold "predicted x_0" + if self.config.thresholding: + pred_original_sample = self._threshold_sample(pred_original_sample) + elif self.config.clip_sample: + pred_original_sample = pred_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t + current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + return pred_prev_sample + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: + """ + Compute the velocity prediction from the sample and noise according to the velocity formula. + + Args: + sample (`torch.Tensor`): + The input sample. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps for velocity computation. + + Returns: + `torch.Tensor`: + The computed velocity. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as sample + self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) + timesteps = timesteps.to(sample.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self): + return self.config.num_train_timesteps + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep + def previous_timestep(self, timestep: int) -> int | torch.Tensor: + """ + Compute the previous timestep in the diffusion chain. + + Args: + timestep (`int`): + The current timestep. + + Returns: + `int` or `torch.Tensor`: + The previous timestep. + """ + if self.custom_timesteps or self.num_inference_steps: + index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] + if index == self.timesteps.shape[0] - 1: + prev_t = torch.tensor(-1) + else: + prev_t = self.timesteps[index + 1] + else: + prev_t = timestep - 1 + return prev_t diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_wuerstchen.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_wuerstchen.py new file mode 100644 index 0000000000000000000000000000000000000000..e3b87b789a03a3d903d430563b9f778219b2f38e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ddpm_wuerstchen.py @@ -0,0 +1,229 @@ +# Copyright (c) 2022 Pablo Pernías MIT License +# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim + +import math +from dataclasses import dataclass + +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +@dataclass +class DDPMWuerstchenSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's step function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.Tensor + + +def betas_for_alpha_bar( + num_diffusion_timesteps, + max_beta=0.999, + alpha_transform_type="cosine", +): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + + Args: + num_diffusion_timesteps (`int`): the number of betas to produce. + max_beta (`float`): the maximum beta to use; use values lower than 1 to + prevent singularities. + alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. + Choose from `cosine` or `exp` + + Returns: + betas (`np.ndarray`): the betas used by the scheduler to step the model outputs + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class DDPMWuerstchenScheduler(SchedulerMixin, ConfigMixin): + """ + Denoising diffusion probabilistic models (DDPMs) explores the connections between denoising score matching and + Langevin dynamics sampling. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + For more details, see the original paper: https://huggingface.co/papers/2006.11239 + + Args: + scaler (`float`): .... + s (`float`): .... + """ + + @register_to_config + def __init__( + self, + scaler: float = 1.0, + s: float = 0.008, + ): + self.scaler = scaler + self.s = torch.tensor([s]) + self._init_alpha_cumprod = torch.cos(self.s / (1 + self.s) * torch.pi * 0.5) ** 2 + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + def _alpha_cumprod(self, t, device): + if self.scaler > 1: + t = 1 - (1 - t) ** self.scaler + elif self.scaler < 1: + t = t**self.scaler + alpha_cumprod = torch.cos( + (t + self.s.to(device)) / (1 + self.s.to(device)) * torch.pi * 0.5 + ) ** 2 / self._init_alpha_cumprod.to(device) + return alpha_cumprod.clamp(0.0001, 0.9999) + + def scale_model_input(self, sample: torch.Tensor, timestep: int = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): input sample + timestep (`int`, optional): current timestep + + Returns: + `torch.Tensor`: scaled input sample + """ + return sample + + def set_timesteps( + self, + num_inference_steps: int = None, + timesteps: list[int] | None = None, + device: str | torch.device = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + num_inference_steps (`dict[float, int]`): + the number of diffusion steps used when generating samples with a pre-trained model. If passed, then + `timesteps` must be `None`. + device (`str` or `torch.device`, optional): + the device to which the timesteps are moved to. {2 / 3: 20, 0.0: 10} + """ + if timesteps is None: + timesteps = torch.linspace(1.0, 0.0, num_inference_steps + 1, device=device) + if not isinstance(timesteps, torch.Tensor): + timesteps = torch.Tensor(timesteps).to(device) + self.timesteps = timesteps + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + generator=None, + return_dict: bool = True, + ) -> DDPMWuerstchenSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + current instance of sample being created by diffusion process. + generator: random number generator. + return_dict (`bool`): option for returning tuple rather than DDPMWuerstchenSchedulerOutput class + + Returns: + [`DDPMWuerstchenSchedulerOutput`] or `tuple`: [`DDPMWuerstchenSchedulerOutput`] if `return_dict` is True, + otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + dtype = model_output.dtype + device = model_output.device + t = timestep + + prev_t = self.previous_timestep(t) + + alpha_cumprod = self._alpha_cumprod(t, device).view(t.size(0), *[1 for _ in sample.shape[1:]]) + alpha_cumprod_prev = self._alpha_cumprod(prev_t, device).view(prev_t.size(0), *[1 for _ in sample.shape[1:]]) + alpha = alpha_cumprod / alpha_cumprod_prev + + mu = (1.0 / alpha).sqrt() * (sample - (1 - alpha) * model_output / (1 - alpha_cumprod).sqrt()) + + std_noise = randn_tensor(mu.shape, generator=generator, device=model_output.device, dtype=model_output.dtype) + std = ((1 - alpha) * (1.0 - alpha_cumprod_prev) / (1.0 - alpha_cumprod)).sqrt() * std_noise + pred = mu + std * (prev_t != 0).float().view(prev_t.size(0), *[1 for _ in sample.shape[1:]]) + + if not return_dict: + return (pred.to(dtype),) + + return DDPMWuerstchenSchedulerOutput(prev_sample=pred.to(dtype)) + + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + device = original_samples.device + dtype = original_samples.dtype + alpha_cumprod = self._alpha_cumprod(timesteps, device=device).view( + timesteps.size(0), *[1 for _ in original_samples.shape[1:]] + ) + noisy_samples = alpha_cumprod.sqrt() * original_samples + (1 - alpha_cumprod).sqrt() * noise + return noisy_samples.to(dtype=dtype) + + def __len__(self): + return self.config.num_train_timesteps + + def previous_timestep(self, timestep): + index = (self.timesteps - timestep[0]).abs().argmin().item() + prev_t = self.timesteps[index + 1][None].expand(timestep.shape[0]) + return prev_t diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_deis_multistep.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_deis_multistep.py new file mode 100644 index 0000000000000000000000000000000000000000..c22d2b5b439dccf31e62a50e69873a1bc62d7c04 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_deis_multistep.py @@ -0,0 +1,1047 @@ +# Copyright 2025 FLAIR Lab and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: check https://huggingface.co/papers/2204.13902 and https://github.com/qsh-zh/deis for more info +# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py + +import math +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import deprecate, is_scipy_available +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput + + +if is_scipy_available(): + import scipy.stats + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class DEISMultistepScheduler(SchedulerMixin, ConfigMixin): + """ + `DEISMultistepScheduler` is a fast high order solver for diffusion ordinary differential equations (ODEs). + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to `1000`): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to `0.0001`): + The starting `beta` value of inference. + beta_end (`float`, defaults to `0.02`): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray` or `list[float]`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + solver_order (`int`, defaults to `2`): + The DEIS order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided + sampling, and `solver_order=3` for unconditional sampling. + prediction_type (`"epsilon"`, `"sample"`, `"v_prediction"`, or `"flow_prediction"`, defaults to `"epsilon"`): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`), `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper), or `flow_prediction`. + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to `0.995`): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to `1.0`): + The threshold value for dynamic thresholding. Valid only when `thresholding=True`. + algorithm_type (`"deis"`, defaults to `"deis"`): + The algorithm type for the solver. + solver_type (`"logrho"`, defaults to `"logrho"`): + Solver type for DEIS. + lower_order_final (`bool`, defaults to `True`): + Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + use_flow_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use flow sigmas for step sizes in the noise schedule during the sampling process. + flow_shift (`float`, *optional*, defaults to `1.0`): + The flow shift parameter for flow-based models. + timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to `0`): + An offset added to the inference steps, as required by some model families. + use_dynamic_shifting (`bool`, defaults to `False`): + Whether to use dynamic shifting for the noise schedule. + time_shift_type (`"exponential"`, defaults to `"exponential"`): + The type of time shifting to apply. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear", + trained_betas: np.ndarray | list[float] | None = None, + solver_order: int = 2, + prediction_type: Literal["epsilon", "sample", "v_prediction", "flow_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: Literal["deis"] = "deis", + solver_type: Literal["logrho"] = "logrho", + lower_order_final: bool = True, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + use_flow_sigmas: bool = False, + flow_shift: float = 1.0, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace", + steps_offset: int = 0, + use_dynamic_shifting: bool = False, + time_shift_type: Literal["exponential"] = "exponential", + ) -> None: + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if ( + sum( + [ + self.config.use_beta_sigmas, + self.config.use_exponential_sigmas, + self.config.use_karras_sigmas, + ] + ) + > 1 + ): + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + # Currently we only support VP-type noise schedule + self.alpha_t = torch.sqrt(self.alphas_cumprod) + self.sigma_t = torch.sqrt(1 - self.alphas_cumprod) + self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t) + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # settings for DEIS + if algorithm_type not in ["deis"]: + if algorithm_type in ["dpmsolver", "dpmsolver++"]: + self.register_to_config(algorithm_type="deis") + else: + raise NotImplementedError(f"{algorithm_type} is not implemented for {self.__class__}") + + if solver_type not in ["logrho"]: + if solver_type in ["midpoint", "heun", "bh1", "bh2"]: + self.register_to_config(solver_type="logrho") + else: + raise NotImplementedError(f"solver type {solver_type} is not implemented for {self.__class__}") + + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps) + self.model_outputs = [None] * solver_order + self.lower_order_nums = 0 + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None, mu: float | None = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + mu (`float`, *optional*): + The mu parameter for dynamic shifting. Only used when `use_dynamic_shifting=True` and + `time_shift_type="exponential"`. + """ + if mu is not None: + assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential" + self.config.flow_shift = np.exp(mu) + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1) + .round()[::-1][:-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // (num_inference_steps + 1) + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.arange(self.config.num_train_timesteps, 0, -step_ratio).round().copy().astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + if self.config.use_karras_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round() + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_exponential_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_beta_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_flow_sigmas: + alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1) + sigmas = 1.0 - alphas + sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy() + timesteps = (sigmas * self.config.num_train_timesteps).copy() + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + else: + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + + self.sigmas = torch.from_numpy(sigmas) + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.int64) + + self.num_inference_steps = len(timesteps) + + self.model_outputs = [ + None, + ] * self.config.solver_order + self.lower_order_nums = 0 + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t + def _sigma_to_alpha_sigma_t(self, sigma: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Convert sigma values to alpha_t and sigma_t values. + + Args: + sigma (`torch.Tensor`): + The sigma value(s) to convert. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A tuple containing (alpha_t, sigma_t) values. + """ + if self.config.use_flow_sigmas: + alpha_t = 1 - sigma + sigma_t = sigma + else: + alpha_t = 1 / ((sigma**2 + 1) ** 0.5) + sigma_t = sigma * alpha_t + + return alpha_t, sigma_t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def convert_model_output( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + """ + Convert the model output to the corresponding type the DEIS algorithm needs. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + if self.config.prediction_type == "epsilon": + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + x0_pred = alpha_t * sample - sigma_t * model_output + elif self.config.prediction_type == "flow_prediction": + sigma_t = self.sigmas[self.step_index] + x0_pred = sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + "`v_prediction`, or `flow_prediction` for the DEISMultistepScheduler." + ) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + if self.config.algorithm_type == "deis": + return (sample - alpha_t * x0_pred) / sigma_t + else: + raise NotImplementedError("only support log-rho multistep deis now") + + def deis_first_order_update( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the first-order DEIS (equivalent to DDIM). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + ) + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s) + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s = torch.log(alpha_s) - torch.log(sigma_s) + + h = lambda_t - lambda_s + if self.config.algorithm_type == "deis": + x_t = (alpha_t / alpha_s) * sample - (sigma_t * (torch.exp(h) - 1.0)) * model_output + else: + raise NotImplementedError("only support log-rho multistep deis now") + return x_t + + def multistep_deis_second_order_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the second-order multistep DEIS. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s0, sigma_s1 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + + m0, m1 = model_output_list[-1], model_output_list[-2] + + rho_t, rho_s0, rho_s1 = ( + sigma_t / alpha_t, + sigma_s0 / alpha_s0, + sigma_s1 / alpha_s1, + ) + + if self.config.algorithm_type == "deis": + + def ind_fn(t, b, c): + # Integrate[(log(t) - log(c)) / (log(b) - log(c)), {t}] + return t * (-np.log(c) + np.log(t) - 1) / (np.log(b) - np.log(c)) + + coef1 = ind_fn(rho_t, rho_s0, rho_s1) - ind_fn(rho_s0, rho_s0, rho_s1) + coef2 = ind_fn(rho_t, rho_s1, rho_s0) - ind_fn(rho_s0, rho_s1, rho_s0) + + x_t = alpha_t * (sample / alpha_s0 + coef1 * m0 + coef2 * m1) + return x_t + else: + raise NotImplementedError("only support log-rho multistep deis now") + + def multistep_deis_third_order_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the third-order multistep DEIS. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`): + A current instance of a sample created by diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s0, sigma_s1, sigma_s2 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + self.sigmas[self.step_index - 2], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + alpha_s2, sigma_s2 = self._sigma_to_alpha_sigma_t(sigma_s2) + + m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3] + + rho_t, rho_s0, rho_s1, rho_s2 = ( + sigma_t / alpha_t, + sigma_s0 / alpha_s0, + sigma_s1 / alpha_s1, + sigma_s2 / alpha_s2, + ) + + if self.config.algorithm_type == "deis": + + def ind_fn(t, b, c, d): + # Integrate[(log(t) - log(c))(log(t) - log(d)) / (log(b) - log(c))(log(b) - log(d)), {t}] + numerator = t * ( + np.log(c) * (np.log(d) - np.log(t) + 1) + - np.log(d) * np.log(t) + + np.log(d) + + np.log(t) ** 2 + - 2 * np.log(t) + + 2 + ) + denominator = (np.log(b) - np.log(c)) * (np.log(b) - np.log(d)) + return numerator / denominator + + coef1 = ind_fn(rho_t, rho_s0, rho_s1, rho_s2) - ind_fn(rho_s0, rho_s0, rho_s1, rho_s2) + coef2 = ind_fn(rho_t, rho_s1, rho_s2, rho_s0) - ind_fn(rho_s0, rho_s1, rho_s2, rho_s0) + coef3 = ind_fn(rho_t, rho_s2, rho_s0, rho_s1) - ind_fn(rho_s0, rho_s2, rho_s0, rho_s1) + + x_t = alpha_t * (sample / alpha_s0 + coef1 * m0 + coef2 * m1 + coef3 * m2) + + return x_t + else: + raise NotImplementedError("only support log-rho multistep deis now") + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep + def index_for_timestep( + self, + timestep: int | torch.Tensor, + schedule_timesteps: torch.Tensor | None = None, + ) -> int: + """ + Find the index for a given timestep in the schedule. + + Args: + timestep (`int` or `torch.Tensor`): + The timestep for which to find the index. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + index_candidates = (schedule_timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + return step_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._init_step_index + def _init_step_index(self, timestep: int | torch.Tensor) -> None: + """ + Initialize the step_index counter for the scheduler. + + Args: + timestep (`int` or `torch.Tensor`): + The current timestep for which to initialize the step index. + """ + + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor, + sample: torch.Tensor, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the multistep DEIS. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + lower_order_final = ( + (self.step_index == len(self.timesteps) - 1) and self.config.lower_order_final and len(self.timesteps) < 15 + ) + lower_order_second = ( + (self.step_index == len(self.timesteps) - 2) and self.config.lower_order_final and len(self.timesteps) < 15 + ) + + model_output = self.convert_model_output(model_output, sample=sample) + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.model_outputs[-1] = model_output + + if self.config.solver_order == 1 or self.lower_order_nums < 1 or lower_order_final: + prev_sample = self.deis_first_order_update(model_output, sample=sample) + elif self.config.solver_order == 2 or self.lower_order_nums < 2 or lower_order_second: + prev_sample = self.multistep_deis_second_order_update(self.model_outputs, sample=sample) + else: + prev_sample = self.multistep_deis_third_order_update(self.model_outputs, sample=sample) + + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples without noise. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps at which to add noise to the samples. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + noisy_samples = alpha_t * original_samples + sigma_t * noise + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpm_cogvideox.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpm_cogvideox.py new file mode 100644 index 0000000000000000000000000000000000000000..4a238a5408aed3d421e0c8cf426f05250ea1d1d7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpm_cogvideox.py @@ -0,0 +1,600 @@ +# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM +class DDIMSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +def rescale_zero_terminal_snr(alphas_cumprod): + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + the betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: rescaled betas with zero terminal SNR + """ + + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + + return alphas_bar + + +class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin): + """ + `DDIMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with + non-Markovian guidance. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample for numerical stability. + clip_sample_range (`float`, defaults to 1.0): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + set_alpha_to_one (`bool`, defaults to `True`): + Each diffusion step uses the alphas product value at that step and at the previous one. For the final step + there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the alpha value at step 0. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True`. + timestep_spacing (`str`, defaults to `"leading"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. Choose from + `leading`, `linspace` or `trailing`. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + snr_shift_scale (`float`, defaults to 3.0): + Shift scale for SNR. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.00085, + beta_end: float = 0.0120, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "scaled_linear", + trained_betas: np.ndarray | list[float] | None = None, + clip_sample: bool = True, + set_alpha_to_one: bool = True, + steps_offset: int = 0, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + clip_sample_range: float = 1.0, + sample_max_value: float = 1.0, + timestep_spacing: Literal["leading", "linspace", "trailing"] = "leading", + rescale_betas_zero_snr: bool = False, + snr_shift_scale: float = 3.0, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float64, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # Modify: SNR shift following SD3 + self.alphas_cumprod = self.alphas_cumprod / (snr_shift_scale + (1 - snr_shift_scale) * self.alphas_cumprod) + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.alphas_cumprod = rescale_zero_terminal_snr(self.alphas_cumprod) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this parameter simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) + + def _get_variance(self, timestep, prev_timestep): + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def set_timesteps( + self, + num_inference_steps: int, + device: str | torch.device | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None` (the default), the timesteps are not + moved. + """ + + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + self.num_inference_steps = num_inference_steps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps) + .round()[::-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'leading' or 'trailing'." + ) + + self.timesteps = torch.from_numpy(timesteps).to(device) + + def get_variables( + self, + alpha_prod_t: torch.Tensor, + alpha_prod_t_prev: torch.Tensor, + alpha_prod_t_back: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Compute the variables used for DPM-Solver++ (2M) referencing the original implementation. + + Args: + alpha_prod_t (`torch.Tensor`): + The cumulative product of alphas at the current timestep. + alpha_prod_t_prev (`torch.Tensor`): + The cumulative product of alphas at the previous timestep. + alpha_prod_t_back (`torch.Tensor`, *optional*): + The cumulative product of alphas at the timestep before the previous timestep. + + Returns: + `tuple`: + A tuple containing the variables `h`, `r`, `lamb`, `lamb_next`. + """ + lamb = ((alpha_prod_t / (1 - alpha_prod_t)) ** 0.5).log() + lamb_next = ((alpha_prod_t_prev / (1 - alpha_prod_t_prev)) ** 0.5).log() + h = lamb_next - lamb + + if alpha_prod_t_back is not None: + lamb_previous = ((alpha_prod_t_back / (1 - alpha_prod_t_back)) ** 0.5).log() + h_last = lamb - lamb_previous + r = h_last / h + return h, r, lamb, lamb_next + else: + return h, None, lamb, lamb_next + + def get_mult( + self, + h: torch.Tensor, + r: torch.Tensor, + alpha_prod_t: torch.Tensor, + alpha_prod_t_prev: torch.Tensor, + alpha_prod_t_back: torch.Tensor | None = None, + ) -> tuple[torch.Tensor, torch.Tensor] | tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Compute the multipliers for the previous sample and the predicted original sample. + + Args: + h (`torch.Tensor`): + The log-SNR difference. + r (`torch.Tensor`): + The ratio of log-SNR differences. + alpha_prod_t (`torch.Tensor`): + The cumulative product of alphas at the current timestep. + alpha_prod_t_prev (`torch.Tensor`): + The cumulative product of alphas at the previous timestep. + alpha_prod_t_back (`torch.Tensor`, *optional*): + The cumulative product of alphas at the timestep before the previous timestep. + + Returns: + `tuple`: + A tuple containing the multipliers. + """ + mult1 = ((1 - alpha_prod_t_prev) / (1 - alpha_prod_t)) ** 0.5 * (-h).exp() + mult2 = (-2 * h).expm1() * alpha_prod_t_prev**0.5 + + if alpha_prod_t_back is not None: + mult3 = 1 + 1 / (2 * r) + mult4 = 1 / (2 * r) + return mult1, mult2, mult3, mult4 + else: + return mult1, mult2 + + def step( + self, + model_output: torch.Tensor, + old_pred_original_sample: torch.Tensor, + timestep: int, + timestep_back: int, + sample: torch.Tensor, + eta: float = 0.0, + use_clipped_model_output: bool = False, + generator: torch.Generator | None = None, + variance_noise: torch.Tensor | None = None, + return_dict: bool = False, + ) -> DDIMSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + old_pred_original_sample (`torch.Tensor`): + The predicted original sample from the previous timestep. + timestep (`int`): + The current discrete timestep in the diffusion chain. + timestep_back (`int`): + The timestep to look back to. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + eta (`float`): + The weight of noise for added noise in diffusion step. + use_clipped_model_output (`bool`, defaults to `False`): + If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary + because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no + clipping has happened, "corrected" `model_output` would coincide with the one provided as input and + `use_clipped_model_output` has no effect. + generator (`torch.Generator`, *optional*): + A random number generator. + variance_noise (`torch.Tensor`): + Alternative to generating noise with `generator` by directly providing the noise for the variance + itself. Useful for methods such as [`CycleDiffusion`]. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502 + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointing to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + alpha_prod_t_back = self.alphas_cumprod[timestep_back] if timestep_back is not None else None + + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502 + # To make style tests pass, commented out `pred_epsilon` as it is an unused variable + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + # pred_epsilon = model_output + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + # pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + elif self.config.prediction_type == "v_prediction": + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + # pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction`" + ) + + h, r, lamb, lamb_next = self.get_variables(alpha_prod_t, alpha_prod_t_prev, alpha_prod_t_back) + mult = list(self.get_mult(h, r, alpha_prod_t, alpha_prod_t_prev, alpha_prod_t_back)) + mult_noise = (1 - alpha_prod_t_prev) ** 0.5 * (1 - (-2 * h).exp()) ** 0.5 + + noise = randn_tensor(sample.shape, generator=generator, device=sample.device, dtype=sample.dtype) + prev_sample = mult[0] * sample - mult[1] * pred_original_sample + mult_noise * noise + + if old_pred_original_sample is None or prev_timestep < 0: + # Save a network evaluation if all noise levels are 0 or on the first step + return prev_sample, pred_original_sample + else: + denoised_d = mult[2] * pred_original_sample - mult[3] * old_pred_original_sample + noise = randn_tensor( + sample.shape, + generator=generator, + device=sample.device, + dtype=sample.dtype, + ) + x_advanced = mult[0] * sample - mult[1] * denoised_d + mult_noise * noise + + prev_sample = x_advanced + + if not return_dict: + return (prev_sample, pred_original_sample) + + return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: + """ + Compute the velocity prediction from the sample and noise according to the velocity formula. + + Args: + sample (`torch.Tensor`): + The input sample. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps for velocity computation. + + Returns: + `torch.Tensor`: + The computed velocity. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as sample + self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) + timesteps = timesteps.to(sample.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep.py new file mode 100644 index 0000000000000000000000000000000000000000..9c15df4569caf6534230f0c0d6b1db7176f63b66 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep.py @@ -0,0 +1,1347 @@ +# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver + +import math +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import deprecate, is_scipy_available +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput + + +if is_scipy_available(): + import scipy.stats + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas): + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): + """ + `DPMSolverMultistepScheduler` is a fast dedicated high-order solver for diffusion ODEs. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + solver_order (`int`, defaults to 2): + The DPMSolver order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided + sampling, and `solver_order=3` for unconditional sampling. + prediction_type (`"epsilon"`, `"sample"`, `"v_prediction"`, or `"flow_prediction"`, defaults to `"epsilon"`): + Prediction type of the scheduler function. `epsilon` predicts the noise of the diffusion process, `sample` + directly predicts the noisy sample, `v_prediction` predicts the velocity (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper), and `flow_prediction` predicts the flow. + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True` and + `algorithm_type="dpmsolver++"`. + algorithm_type (`"dpmsolver"`, `"dpmsolver++"`, `"sde-dpmsolver"`, or `"sde-dpmsolver++"`, defaults to `"dpmsolver++"`): + Algorithm type for the solver. The `dpmsolver` type implements the algorithms in the + [DPMSolver](https://huggingface.co/papers/2206.00927) paper, and the `dpmsolver++` type implements the + algorithms in the [DPMSolver++](https://huggingface.co/papers/2211.01095) paper. It is recommended to use + `dpmsolver++` or `sde-dpmsolver++` with `solver_order=2` for guided sampling like in Stable Diffusion. + solver_type (`"midpoint"` or `"heun"`, defaults to `"midpoint"`): + Solver type for the second-order solver. The solver type slightly affects the sample quality, especially + for a small number of steps. It is recommended to use `midpoint` solvers. + lower_order_final (`bool`, defaults to `True`): + Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can + stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10. + euler_at_final (`bool`, defaults to `False`): + Whether to use Euler's method in the final step. It is a trade-off between numerical stability and detail + richness. This can stabilize the sampling of the SDE variant of DPMSolver for small number of inference + steps, but sometimes may result in blurring. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + use_lu_lambdas (`bool`, *optional*, defaults to `False`): + Whether to use the uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the noise schedule during + the sampling process. If `True`, the sigmas and time steps are determined according to a sequence of + `lambda(t)`. + use_flow_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use flow sigmas for step sizes in the noise schedule during the sampling process. + flow_shift (`float`, *optional*, defaults to 1.0): + The shift value for the timestep schedule for flow matching. + final_sigmas_type (`"zero"` or `"sigma_min"`, *optional*, defaults to `"zero"`): + The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final + sigma is the same as the last sigma in the training schedule. If `"zero"`, the final sigma is set to 0. + lambda_min_clipped (`float`, defaults to `-inf`): + Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the + cosine (`squaredcos_cap_v2`) noise schedule. + variance_type (`"learned"` or `"learned_range"`, *optional*): + Set to `"learned"` or `"learned_range"` for diffusion models that predict variance. If set, the model's + output contains the predicted Gaussian variance. + timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + use_dynamic_shifting (`bool`, defaults to `False`): + Whether to use dynamic shifting for the timestep schedule. + time_shift_type (`"exponential"`, defaults to `"exponential"`): + The type of time shift to apply when using dynamic shifting. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + solver_order: int = 2, + prediction_type: Literal["epsilon", "sample", "v_prediction", "flow_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: Literal["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"] = "dpmsolver++", + solver_type: Literal["midpoint", "heun"] = "midpoint", + lower_order_final: bool = True, + euler_at_final: bool = False, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + use_lu_lambdas: bool = False, + use_flow_sigmas: bool = False, + flow_shift: float = 1.0, + final_sigmas_type: Literal["zero", "sigma_min"] = "zero", + lambda_min_clipped: float = -float("inf"), + variance_type: Literal["learned", "learned_range"] | None = None, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace", + steps_offset: int = 0, + rescale_betas_zero_snr: bool = False, + use_dynamic_shifting: bool = False, + time_shift_type: Literal["exponential"] = "exponential", + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if ( + sum( + [ + self.config.use_beta_sigmas, + self.config.use_exponential_sigmas, + self.config.use_karras_sigmas, + ] + ) + > 1 + ): + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if algorithm_type in ["dpmsolver", "sde-dpmsolver"]: + deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead" + deprecate( + "algorithm_types dpmsolver and sde-dpmsolver", + "1.0.0", + deprecation_message, + ) + + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + if rescale_betas_zero_snr: + # Close to 0 without being 0 so first sigma is not inf + # FP16 smallest positive subnormal works well here + self.alphas_cumprod[-1] = 2**-24 + + # Currently we only support VP-type noise schedule + self.alpha_t = torch.sqrt(self.alphas_cumprod) + self.sigma_t = torch.sqrt(1 - self.alphas_cumprod) + self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t) + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # settings for DPM-Solver + if algorithm_type not in [ + "dpmsolver", + "dpmsolver++", + "sde-dpmsolver", + "sde-dpmsolver++", + ]: + if algorithm_type == "deis": + self.register_to_config(algorithm_type="dpmsolver++") + else: + raise NotImplementedError(f"{algorithm_type} is not implemented for {self.__class__}") + + if solver_type not in ["midpoint", "heun"]: + if solver_type in ["logrho", "bh1", "bh2"]: + self.register_to_config(solver_type="midpoint") + else: + raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}") + + if algorithm_type not in ["dpmsolver++", "sde-dpmsolver++"] and final_sigmas_type == "zero": + raise ValueError( + f"`final_sigmas_type` {final_sigmas_type} is not supported for `algorithm_type` {algorithm_type}. Please choose `sigma_min` instead." + ) + + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps) + self.model_outputs = [None] * solver_order + self.lower_order_nums = 0 + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_timesteps( + self, + num_inference_steps: int = None, + device: str | torch.device = None, + mu: float | None = None, + timesteps: list[int] | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary timesteps schedule. If `None`, timesteps will be generated + based on the `timestep_spacing` attribute. If `timesteps` is passed, `num_inference_steps` and `sigmas` + must be `None`, and `timestep_spacing` attribute will be ignored. + """ + if mu is not None: + assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential" + self.config.flow_shift = np.exp(mu) + if num_inference_steps is None and timesteps is None: + raise ValueError("Must pass exactly one of `num_inference_steps` or `timesteps`.") + if num_inference_steps is not None and timesteps is not None: + raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.") + if timesteps is not None and self.config.use_karras_sigmas: + raise ValueError("Cannot use `timesteps` with `config.use_karras_sigmas = True`") + if timesteps is not None and self.config.use_lu_lambdas: + raise ValueError("Cannot use `timesteps` with `config.use_lu_lambdas = True`") + if timesteps is not None and self.config.use_exponential_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.") + if timesteps is not None and self.config.use_beta_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_beta_sigmas = True`.") + + if timesteps is not None: + timesteps = np.array(timesteps).astype(np.int64) + else: + # Clipping the minimum of all lambda(t) for numerical stability. + # This is critical for cosine (squaredcos_cap_v2) noise schedule. + clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped) + last_timestep = ((self.config.num_train_timesteps - clipped_idx).numpy()).item() + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, last_timestep - 1, num_inference_steps + 1) + .round()[::-1][:-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = last_timestep // (num_inference_steps + 1) + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = ( + (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64) + ) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.arange(last_timestep, 0, -step_ratio).round().copy().astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + + if self.config.use_karras_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + if self.config.beta_schedule != "squaredcos_cap_v2": + timesteps = timesteps.round() + elif self.config.use_lu_lambdas: + lambdas = np.flip(log_sigmas.copy()) + lambdas = self._convert_to_lu(in_lambdas=lambdas, num_inference_steps=num_inference_steps) + sigmas = np.exp(lambdas) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + if self.config.beta_schedule != "squaredcos_cap_v2": + timesteps = timesteps.round() + elif self.config.use_exponential_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_flow_sigmas: + alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1) + sigmas = 1.0 - alphas + sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy() + timesteps = (sigmas * self.config.num_train_timesteps).copy() + else: + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + + if self.config.final_sigmas_type == "sigma_min": + sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + + self.sigmas = torch.from_numpy(sigmas) + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.int64) + + self.num_inference_steps = len(timesteps) + + self.model_outputs = [ + None, + ] * self.config.solver_order + self.lower_order_nums = 0 + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + def _sigma_to_alpha_sigma_t(self, sigma: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Convert sigma values to alpha_t and sigma_t values. + + Args: + sigma (`torch.Tensor`): + The sigma value(s) to convert. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A tuple containing (alpha_t, sigma_t) values. + """ + if self.config.use_flow_sigmas: + alpha_t = 1 - sigma + sigma_t = sigma + else: + alpha_t = 1 / ((sigma**2 + 1) ** 0.5) + sigma_t = sigma * alpha_t + + return alpha_t, sigma_t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + def _convert_to_lu(self, in_lambdas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [DPM-Solver: A Fast ODE Solver for Diffusion Probabilistic Model + Sampling in Around 10 Steps](https://huggingface.co/papers/2206.00927) by Lu et al. (2022). + + Args: + in_lambdas (`torch.Tensor`): + The input lambda values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted lambda values following the Lu noise schedule. + """ + + lambda_min: float = in_lambdas[-1].item() + lambda_max: float = in_lambdas[0].item() + + rho = 1.0 # 1.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = lambda_min ** (1 / rho) + max_inv_rho = lambda_max ** (1 / rho) + lambdas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return lambdas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def convert_model_output( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is + designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an + integral of the data prediction model. + + > [!TIP] > The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both + noise > prediction and data prediction models. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`, *optional*): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + # DPM-Solver++ needs to solve an integral of the data prediction model. + if self.config.algorithm_type in ["dpmsolver++", "sde-dpmsolver++"]: + if self.config.prediction_type == "epsilon": + # DPM-Solver and DPM-Solver++ only need the "mean" output. + if self.config.variance_type in ["learned", "learned_range"]: + model_output = model_output[:, :3] + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + x0_pred = alpha_t * sample - sigma_t * model_output + elif self.config.prediction_type == "flow_prediction": + sigma_t = self.sigmas[self.step_index] + x0_pred = sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + "`v_prediction`, or `flow_prediction` for the DPMSolverMultistepScheduler." + ) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + return x0_pred + + # DPM-Solver needs to solve an integral of the noise prediction model. + elif self.config.algorithm_type in ["dpmsolver", "sde-dpmsolver"]: + if self.config.prediction_type == "epsilon": + # DPM-Solver and DPM-Solver++ only need the "mean" output. + if self.config.variance_type in ["learned", "learned_range"]: + epsilon = model_output[:, :3] + else: + epsilon = model_output + elif self.config.prediction_type == "sample": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + epsilon = (sample - alpha_t * model_output) / sigma_t + elif self.config.prediction_type == "v_prediction": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + epsilon = alpha_t * model_output + sigma_t * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction` for the DPMSolverMultistepScheduler." + ) + + if self.config.thresholding: + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + x0_pred = (sample - sigma_t * epsilon) / alpha_t + x0_pred = self._threshold_sample(x0_pred) + epsilon = (sample - alpha_t * x0_pred) / sigma_t + + return epsilon + + def dpm_solver_first_order_update( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the first-order DPMSolver (equivalent to DDIM). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`, *optional*): + A current instance of a sample created by the diffusion process. + noise (`torch.Tensor`, *optional*): + The noise tensor. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + ) + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s) + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s = torch.log(alpha_s) - torch.log(sigma_s) + + h = lambda_t - lambda_s + if self.config.algorithm_type == "dpmsolver++": + x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output + elif self.config.algorithm_type == "dpmsolver": + x_t = (alpha_t / alpha_s) * sample - (sigma_t * (torch.exp(h) - 1.0)) * model_output + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + x_t = ( + (sigma_t / sigma_s * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * model_output + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.algorithm_type == "sde-dpmsolver": + assert noise is not None + x_t = ( + (alpha_t / alpha_s) * sample + - 2.0 * (sigma_t * (torch.exp(h) - 1.0)) * model_output + + sigma_t * torch.sqrt(torch.exp(2 * h) - 1.0) * noise + ) + return x_t + + def multistep_dpm_solver_second_order_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the second-order multistep DPMSolver. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`, *optional*): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s0, sigma_s1 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + + m0, m1 = model_output_list[-1], model_output_list[-2] + + h, h_0 = lambda_t - lambda_s0, lambda_s0 - lambda_s1 + r0 = h_0 / h + D0, D1 = m0, (1.0 / r0) * (m0 - m1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2211.01095 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - 0.5 * (sigma_t * (torch.exp(h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + ) + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + 0.5 * (alpha_t * (1 - torch.exp(-2.0 * h))) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.algorithm_type == "sde-dpmsolver": + assert noise is not None + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s0) * sample + - 2.0 * (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * (torch.exp(h) - 1.0)) * D1 + + sigma_t * torch.sqrt(torch.exp(2 * h) - 1.0) * noise + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s0) * sample + - 2.0 * (sigma_t * (torch.exp(h) - 1.0)) * D0 + - 2.0 * (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + + sigma_t * torch.sqrt(torch.exp(2 * h) - 1.0) * noise + ) + return x_t + + def multistep_dpm_solver_third_order_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the third-order multistep DPMSolver. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`, *optional*): + A current instance of a sample created by diffusion process. + noise (`torch.Tensor`, *optional*): + The noise tensor. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s0, sigma_s1, sigma_s2 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + self.sigmas[self.step_index - 2], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + alpha_s2, sigma_s2 = self._sigma_to_alpha_sigma_t(sigma_s2) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + lambda_s2 = torch.log(alpha_s2) - torch.log(sigma_s2) + + m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3] + + h, h_0, h_1 = lambda_t - lambda_s0, lambda_s0 - lambda_s1, lambda_s1 - lambda_s2 + r0, r1 = h_0 / h, h_1 / h + D0 = m0 + D1_0, D1_1 = (1.0 / r0) * (m0 - m1), (1.0 / r1) * (m1 - m2) + D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1) + D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + - (alpha_t * ((torch.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + - (sigma_t * ((torch.exp(h) - 1.0 - h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1.0 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h) - 2.0 * h) / (2.0 * h) ** 2 - 0.5)) * D2 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + return x_t + + def index_for_timestep( + self, + timestep: int | torch.Tensor, + schedule_timesteps: torch.Tensor | None = None, + ) -> int: + """ + Find the index for a given timestep in the schedule. + + Args: + timestep (`int` or `torch.Tensor`): + The timestep for which to find the index. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + index_candidates = (schedule_timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + return step_index + + def _init_step_index(self, timestep: int | torch.Tensor) -> None: + """ + Initialize the step_index counter for the scheduler. + + Args: + timestep (`int` or `torch.Tensor`): + The current timestep for which to initialize the step index. + """ + + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + variance_noise: torch.Tensor | None = None, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the multistep DPMSolver. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`int` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + variance_noise (`torch.Tensor`, *optional*): + Alternative to generating noise with `generator` by directly providing the noise for the variance + itself. Useful for methods such as [`LEdits++`]. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If `return_dict` is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # Improve numerical stability for small number of steps + lower_order_final = (self.step_index == len(self.timesteps) - 1) and ( + self.config.euler_at_final + or (self.config.lower_order_final and len(self.timesteps) < 15) + or self.config.final_sigmas_type == "zero" + ) + lower_order_second = ( + (self.step_index == len(self.timesteps) - 2) and self.config.lower_order_final and len(self.timesteps) < 15 + ) + + model_output = self.convert_model_output(model_output, sample=sample) + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.model_outputs[-1] = model_output + + # Upcast to avoid precision issues when computing prev_sample + sample = sample.to(torch.float32) + if self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"] and variance_noise is None: + noise = randn_tensor( + model_output.shape, + generator=generator, + device=model_output.device, + dtype=torch.float32, + ) + elif self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"]: + noise = variance_noise.to(device=model_output.device, dtype=torch.float32) + else: + noise = None + + if self.config.solver_order == 1 or self.lower_order_nums < 1 or lower_order_final: + prev_sample = self.dpm_solver_first_order_update(model_output, sample=sample, noise=noise) + elif self.config.solver_order == 2 or self.lower_order_nums < 2 or lower_order_second: + prev_sample = self.multistep_dpm_solver_second_order_update(self.model_outputs, sample=sample, noise=noise) + else: + prev_sample = self.multistep_dpm_solver_third_order_update(self.model_outputs, sample=sample, noise=noise) + + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # Cast sample back to expected dtype + prev_sample = prev_sample.to(model_output.dtype) + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples without noise. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps at which to add noise to the samples. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + noisy_samples = alpha_t * original_samples + sigma_t * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..dd579e84d6099910700ba576ecf9c3f86a3cd865 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py @@ -0,0 +1,670 @@ +# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver + +from dataclasses import dataclass + +import flax +import jax +import jax.numpy as jnp + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging +from .scheduling_utils_flax import ( + CommonSchedulerState, + FlaxKarrasDiffusionSchedulers, + FlaxSchedulerMixin, + FlaxSchedulerOutput, + add_noise_common, +) + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class DPMSolverMultistepSchedulerState: + common: CommonSchedulerState + alpha_t: jnp.ndarray + sigma_t: jnp.ndarray + lambda_t: jnp.ndarray + + # setable values + init_noise_sigma: jnp.ndarray + timesteps: jnp.ndarray + num_inference_steps: int = None + + # running values + model_outputs: jnp.ndarray | None = None + lower_order_nums: jnp.int32 | None = None + prev_timestep: jnp.int32 | None = None + cur_sample: jnp.ndarray | None = None + + @classmethod + def create( + cls, + common: CommonSchedulerState, + alpha_t: jnp.ndarray, + sigma_t: jnp.ndarray, + lambda_t: jnp.ndarray, + init_noise_sigma: jnp.ndarray, + timesteps: jnp.ndarray, + ): + return cls( + common=common, + alpha_t=alpha_t, + sigma_t=sigma_t, + lambda_t=lambda_t, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + ) + + +@dataclass +class FlaxDPMSolverMultistepSchedulerOutput(FlaxSchedulerOutput): + state: DPMSolverMultistepSchedulerState + + +class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + DPM-Solver (and the improved version DPM-Solver++) is a fast dedicated high-order solver for diffusion ODEs with + the convergence order guarantee. Empirically, sampling by DPM-Solver with only 20 steps can generate high-quality + samples, and it can generate quite good samples even in only 10 steps. + + For more details, see the original paper: https://huggingface.co/papers/2206.00927 and + https://huggingface.co/papers/2211.01095 + + Currently, we support the multistep DPM-Solver for both noise prediction models and data prediction models. We + recommend to use `solver_order=2` for guided sampling, and `solver_order=3` for unconditional sampling. + + We also support the "dynamic thresholding" method in Imagen (https://huggingface.co/papers/2205.11487). For + pixel-space diffusion models, you can set both `algorithm_type="dpmsolver++"` and `thresholding=True` to use the + dynamic thresholding. Note that the thresholding method is unsuitable for latent-space diffusion models (such as + stable-diffusion). + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + For more details, see the original paper: https://huggingface.co/papers/2206.00927 and + https://huggingface.co/papers/2211.01095 + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + solver_order (`int`, default `2`): + the order of DPM-Solver; can be `1` or `2` or `3`. We recommend to use `solver_order=2` for guided + sampling, and `solver_order=3` for unconditional sampling. + prediction_type (`str`, default `epsilon`): + indicates whether the model predicts the noise (epsilon), or the data / `x0`. One of `epsilon`, `sample`, + or `v-prediction`. + thresholding (`bool`, default `False`): + whether to use the "dynamic thresholding" method (introduced by Imagen, + https://huggingface.co/papers/2205.11487). For pixel-space diffusion models, you can set both + `algorithm_type=dpmsolver++` and `thresholding=True` to use the dynamic thresholding. Note that the + thresholding method is unsuitable for latent-space diffusion models (such as stable-diffusion). + dynamic_thresholding_ratio (`float`, default `0.995`): + the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen + (https://huggingface.co/papers/2205.11487). + sample_max_value (`float`, default `1.0`): + the threshold value for dynamic thresholding. Valid only when `thresholding=True` and + `algorithm_type="dpmsolver++`. + algorithm_type (`str`, default `dpmsolver++`): + the algorithm type for the solver. Either `dpmsolver` or `dpmsolver++`. The `dpmsolver` type implements the + algorithms in https://huggingface.co/papers/2206.00927, and the `dpmsolver++` type implements the + algorithms in https://huggingface.co/papers/2211.01095. We recommend to use `dpmsolver++` with + `solver_order=2` for guided sampling (e.g. stable-diffusion). + solver_type (`str`, default `midpoint`): + the solver type for the second-order solver. Either `midpoint` or `heun`. The solver type slightly affects + the sample quality, especially for small number of steps. We empirically find that `midpoint` solvers are + slightly better, so we recommend to use the `midpoint` type. + lower_order_final (`bool`, default `True`): + whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. We empirically + find this trick can stabilize the sampling of DPM-Solver for steps < 15, especially for steps <= 10. + timestep_spacing (`str`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + the `dtype` used for params and computation. + """ + + _compatibles = [e.name for e in FlaxKarrasDiffusionSchedulers] + + dtype: jnp.dtype + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: jnp.ndarray | None = None, + solver_order: int = 2, + prediction_type: str = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: str = "dpmsolver++", + solver_type: str = "midpoint", + lower_order_final: bool = True, + timestep_spacing: str = "linspace", + dtype: jnp.dtype = jnp.float32, + ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + self.dtype = dtype + + def create_state(self, common: CommonSchedulerState | None = None) -> DPMSolverMultistepSchedulerState: + if common is None: + common = CommonSchedulerState.create(self) + + # Currently we only support VP-type noise schedule + alpha_t = jnp.sqrt(common.alphas_cumprod) + sigma_t = jnp.sqrt(1 - common.alphas_cumprod) + lambda_t = jnp.log(alpha_t) - jnp.log(sigma_t) + + # settings for DPM-Solver + if self.config.algorithm_type not in ["dpmsolver", "dpmsolver++"]: + raise NotImplementedError(f"{self.config.algorithm_type} is not implemented for {self.__class__}") + if self.config.solver_type not in ["midpoint", "heun"]: + raise NotImplementedError(f"{self.config.solver_type} is not implemented for {self.__class__}") + + # standard deviation of the initial noise distribution + init_noise_sigma = jnp.array(1.0, dtype=self.dtype) + + timesteps = jnp.arange(0, self.config.num_train_timesteps).round()[::-1] + + return DPMSolverMultistepSchedulerState.create( + common=common, + alpha_t=alpha_t, + sigma_t=sigma_t, + lambda_t=lambda_t, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + ) + + def set_timesteps( + self, + state: DPMSolverMultistepSchedulerState, + num_inference_steps: int, + shape: tuple, + ) -> DPMSolverMultistepSchedulerState: + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`DPMSolverMultistepSchedulerState`): + the `FlaxDPMSolverMultistepScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + shape (`tuple`): + the shape of the samples to be generated. + """ + last_timestep = self.config.num_train_timesteps + if self.config.timestep_spacing == "linspace": + timesteps = ( + jnp.linspace(0, last_timestep - 1, num_inference_steps + 1).round()[::-1][:-1].astype(jnp.int32) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = last_timestep // (num_inference_steps + 1) + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = ( + (jnp.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(jnp.int32) + ) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = jnp.arange(last_timestep, 0, -step_ratio).round().copy().astype(jnp.int32) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + # initial running values + + model_outputs = jnp.zeros((self.config.solver_order,) + shape, dtype=self.dtype) + lower_order_nums = jnp.int32(0) + prev_timestep = jnp.int32(-1) + cur_sample = jnp.zeros(shape, dtype=self.dtype) + + return state.replace( + num_inference_steps=num_inference_steps, + timesteps=timesteps, + model_outputs=model_outputs, + lower_order_nums=lower_order_nums, + prev_timestep=prev_timestep, + cur_sample=cur_sample, + ) + + def convert_model_output( + self, + state: DPMSolverMultistepSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + ) -> jnp.ndarray: + """ + Convert the model output to the corresponding type that the algorithm (DPM-Solver / DPM-Solver++) needs. + + DPM-Solver is designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to + discretize an integral of the data prediction model. So we need to first convert the model output to the + corresponding type to match the algorithm. + + Note that the algorithm type and the model type is decoupled. That is to say, we can use either DPM-Solver or + DPM-Solver++ for both noise prediction model and data prediction model. + + Args: + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + + Returns: + `jnp.ndarray`: the converted model output. + """ + # DPM-Solver++ needs to solve an integral of the data prediction model. + if self.config.algorithm_type == "dpmsolver++": + if self.config.prediction_type == "epsilon": + alpha_t, sigma_t = state.alpha_t[timestep], state.sigma_t[timestep] + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + alpha_t, sigma_t = state.alpha_t[timestep], state.sigma_t[timestep] + x0_pred = alpha_t * sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + " or `v_prediction` for the FlaxDPMSolverMultistepScheduler." + ) + + if self.config.thresholding: + # Dynamic thresholding in https://huggingface.co/papers/2205.11487 + dynamic_max_val = jnp.percentile( + jnp.abs(x0_pred), + self.config.dynamic_thresholding_ratio, + axis=tuple(range(1, x0_pred.ndim)), + ) + dynamic_max_val = jnp.maximum( + dynamic_max_val, + self.config.sample_max_value * jnp.ones_like(dynamic_max_val), + ) + x0_pred = jnp.clip(x0_pred, -dynamic_max_val, dynamic_max_val) / dynamic_max_val + return x0_pred + # DPM-Solver needs to solve an integral of the noise prediction model. + elif self.config.algorithm_type == "dpmsolver": + if self.config.prediction_type == "epsilon": + return model_output + elif self.config.prediction_type == "sample": + alpha_t, sigma_t = state.alpha_t[timestep], state.sigma_t[timestep] + epsilon = (sample - alpha_t * model_output) / sigma_t + return epsilon + elif self.config.prediction_type == "v_prediction": + alpha_t, sigma_t = state.alpha_t[timestep], state.sigma_t[timestep] + epsilon = alpha_t * model_output + sigma_t * sample + return epsilon + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + " or `v_prediction` for the FlaxDPMSolverMultistepScheduler." + ) + + def dpm_solver_first_order_update( + self, + state: DPMSolverMultistepSchedulerState, + model_output: jnp.ndarray, + timestep: int, + prev_timestep: int, + sample: jnp.ndarray, + ) -> jnp.ndarray: + """ + One step for the first-order DPM-Solver (equivalent to DDIM). + + See https://huggingface.co/papers/2206.00927 for the detailed derivation. + + Args: + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + + Returns: + `jnp.ndarray`: the sample tensor at the previous timestep. + """ + t, s0 = prev_timestep, timestep + m0 = model_output + lambda_t, lambda_s = state.lambda_t[t], state.lambda_t[s0] + alpha_t, alpha_s = state.alpha_t[t], state.alpha_t[s0] + sigma_t, sigma_s = state.sigma_t[t], state.sigma_t[s0] + h = lambda_t - lambda_s + if self.config.algorithm_type == "dpmsolver++": + x_t = (sigma_t / sigma_s) * sample - (alpha_t * (jnp.exp(-h) - 1.0)) * m0 + elif self.config.algorithm_type == "dpmsolver": + x_t = (alpha_t / alpha_s) * sample - (sigma_t * (jnp.exp(h) - 1.0)) * m0 + return x_t + + def multistep_dpm_solver_second_order_update( + self, + state: DPMSolverMultistepSchedulerState, + model_output_list: jnp.ndarray, + timestep_list: list[int], + prev_timestep: int, + sample: jnp.ndarray, + ) -> jnp.ndarray: + """ + One step for the second-order multistep DPM-Solver. + + Args: + model_output_list (`list[jnp.ndarray]`): + direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + + Returns: + `jnp.ndarray`: the sample tensor at the previous timestep. + """ + t, s0, s1 = prev_timestep, timestep_list[-1], timestep_list[-2] + m0, m1 = model_output_list[-1], model_output_list[-2] + lambda_t, lambda_s0, lambda_s1 = ( + state.lambda_t[t], + state.lambda_t[s0], + state.lambda_t[s1], + ) + alpha_t, alpha_s0 = state.alpha_t[t], state.alpha_t[s0] + sigma_t, sigma_s0 = state.sigma_t[t], state.sigma_t[s0] + h, h_0 = lambda_t - lambda_s0, lambda_s0 - lambda_s1 + r0 = h_0 / h + D0, D1 = m0, (1.0 / r0) * (m0 - m1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2211.01095 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (jnp.exp(-h) - 1.0)) * D0 + - 0.5 * (alpha_t * (jnp.exp(-h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (jnp.exp(-h) - 1.0)) * D0 + + (alpha_t * ((jnp.exp(-h) - 1.0) / h + 1.0)) * D1 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (jnp.exp(h) - 1.0)) * D0 + - 0.5 * (sigma_t * (jnp.exp(h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (jnp.exp(h) - 1.0)) * D0 + - (sigma_t * ((jnp.exp(h) - 1.0) / h - 1.0)) * D1 + ) + return x_t + + def multistep_dpm_solver_third_order_update( + self, + state: DPMSolverMultistepSchedulerState, + model_output_list: jnp.ndarray, + timestep_list: list[int], + prev_timestep: int, + sample: jnp.ndarray, + ) -> jnp.ndarray: + """ + One step for the third-order multistep DPM-Solver. + + Args: + model_output_list (`list[jnp.ndarray]`): + direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + + Returns: + `jnp.ndarray`: the sample tensor at the previous timestep. + """ + t, s0, s1, s2 = ( + prev_timestep, + timestep_list[-1], + timestep_list[-2], + timestep_list[-3], + ) + m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3] + lambda_t, lambda_s0, lambda_s1, lambda_s2 = ( + state.lambda_t[t], + state.lambda_t[s0], + state.lambda_t[s1], + state.lambda_t[s2], + ) + alpha_t, alpha_s0 = state.alpha_t[t], state.alpha_t[s0] + sigma_t, sigma_s0 = state.sigma_t[t], state.sigma_t[s0] + h, h_0, h_1 = lambda_t - lambda_s0, lambda_s0 - lambda_s1, lambda_s1 - lambda_s2 + r0, r1 = h_0 / h, h_1 / h + D0 = m0 + D1_0, D1_1 = (1.0 / r0) * (m0 - m1), (1.0 / r1) * (m1 - m2) + D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1) + D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (jnp.exp(-h) - 1.0)) * D0 + + (alpha_t * ((jnp.exp(-h) - 1.0) / h + 1.0)) * D1 + - (alpha_t * ((jnp.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (jnp.exp(h) - 1.0)) * D0 + - (sigma_t * ((jnp.exp(h) - 1.0) / h - 1.0)) * D1 + - (sigma_t * ((jnp.exp(h) - 1.0 - h) / h**2 - 0.5)) * D2 + ) + return x_t + + def step( + self, + state: DPMSolverMultistepSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + return_dict: bool = True, + ) -> FlaxDPMSolverMultistepSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by DPM-Solver. Core function to propagate the diffusion process + from the learned model outputs (most often the predicted noise). + + Args: + state (`DPMSolverMultistepSchedulerState`): + the `FlaxDPMSolverMultistepScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + return_dict (`bool`): option for returning tuple rather than FlaxDPMSolverMultistepSchedulerOutput class + + Returns: + [`FlaxDPMSolverMultistepSchedulerOutput`] or `tuple`: [`FlaxDPMSolverMultistepSchedulerOutput`] if + `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + if state.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + (step_index,) = jnp.where(state.timesteps == timestep, size=1) + step_index = step_index[0] + + prev_timestep = jax.lax.select(step_index == len(state.timesteps) - 1, 0, state.timesteps[step_index + 1]) + + model_output = self.convert_model_output(state, model_output, timestep, sample) + + model_outputs_new = jnp.roll(state.model_outputs, -1, axis=0) + model_outputs_new = model_outputs_new.at[-1].set(model_output) + state = state.replace( + model_outputs=model_outputs_new, + prev_timestep=prev_timestep, + cur_sample=sample, + ) + + def step_1(state: DPMSolverMultistepSchedulerState) -> jnp.ndarray: + return self.dpm_solver_first_order_update( + state, + state.model_outputs[-1], + state.timesteps[step_index], + state.prev_timestep, + state.cur_sample, + ) + + def step_23(state: DPMSolverMultistepSchedulerState) -> jnp.ndarray: + def step_2(state: DPMSolverMultistepSchedulerState) -> jnp.ndarray: + timestep_list = jnp.array([state.timesteps[step_index - 1], state.timesteps[step_index]]) + return self.multistep_dpm_solver_second_order_update( + state, + state.model_outputs, + timestep_list, + state.prev_timestep, + state.cur_sample, + ) + + def step_3(state: DPMSolverMultistepSchedulerState) -> jnp.ndarray: + timestep_list = jnp.array( + [ + state.timesteps[step_index - 2], + state.timesteps[step_index - 1], + state.timesteps[step_index], + ] + ) + return self.multistep_dpm_solver_third_order_update( + state, + state.model_outputs, + timestep_list, + state.prev_timestep, + state.cur_sample, + ) + + step_2_output = step_2(state) + step_3_output = step_3(state) + + if self.config.solver_order == 2: + return step_2_output + elif self.config.lower_order_final and len(state.timesteps) < 15: + return jax.lax.select( + state.lower_order_nums < 2, + step_2_output, + jax.lax.select( + step_index == len(state.timesteps) - 2, + step_2_output, + step_3_output, + ), + ) + else: + return jax.lax.select( + state.lower_order_nums < 2, + step_2_output, + step_3_output, + ) + + step_1_output = step_1(state) + step_23_output = step_23(state) + + if self.config.solver_order == 1: + prev_sample = step_1_output + + elif self.config.lower_order_final and len(state.timesteps) < 15: + prev_sample = jax.lax.select( + state.lower_order_nums < 1, + step_1_output, + jax.lax.select( + step_index == len(state.timesteps) - 1, + step_1_output, + step_23_output, + ), + ) + + else: + prev_sample = jax.lax.select( + state.lower_order_nums < 1, + step_1_output, + step_23_output, + ) + + state = state.replace( + lower_order_nums=jnp.minimum(state.lower_order_nums + 1, self.config.solver_order), + ) + + if not return_dict: + return (prev_sample, state) + + return FlaxDPMSolverMultistepSchedulerOutput(prev_sample=prev_sample, state=state) + + def scale_model_input( + self, + state: DPMSolverMultistepSchedulerState, + sample: jnp.ndarray, + timestep: int | None = None, + ) -> jnp.ndarray: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + state (`DPMSolverMultistepSchedulerState`): + the `FlaxDPMSolverMultistepScheduler` state data class instance. + sample (`jnp.ndarray`): input sample + timestep (`int`, optional): current timestep + + Returns: + `jnp.ndarray`: scaled input sample + """ + return sample + + def add_noise( + self, + state: DPMSolverMultistepSchedulerState, + original_samples: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + return add_noise_common(state.common, original_samples, noise, timesteps) + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py new file mode 100644 index 0000000000000000000000000000000000000000..3386514f64e9e88805f93dc4443990b5a9de121e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py @@ -0,0 +1,1166 @@ +# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver + +import math +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import deprecate, is_scipy_available +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput + + +if is_scipy_available(): + import scipy.stats + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin): + """ + `DPMSolverMultistepInverseScheduler` is the reverse scheduler of [`DPMSolverMultistepScheduler`]. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + solver_order (`int`, defaults to 2): + The DPMSolver order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided + sampling, and `solver_order=3` for unconditional sampling. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True` and + `algorithm_type="dpmsolver++"`. + algorithm_type (`str`, defaults to `dpmsolver++`): + Algorithm type for the solver; can be `dpmsolver`, `dpmsolver++`, `sde-dpmsolver` or `sde-dpmsolver++`. The + `dpmsolver` type implements the algorithms in the [DPMSolver](https://huggingface.co/papers/2206.00927) + paper, and the `dpmsolver++` type implements the algorithms in the + [DPMSolver++](https://huggingface.co/papers/2211.01095) paper. It is recommended to use `dpmsolver++` or + `sde-dpmsolver++` with `solver_order=2` for guided sampling like in Stable Diffusion. + solver_type (`str`, defaults to `midpoint`): + Solver type for the second-order solver; can be `midpoint` or `heun`. The solver type slightly affects the + sample quality, especially for a small number of steps. It is recommended to use `midpoint` solvers. + lower_order_final (`bool`, defaults to `True`): + Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can + stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10. + euler_at_final (`bool`, defaults to `False`): + Whether to use Euler's method in the final step. It is a trade-off between numerical stability and detail + richness. This can stabilize the sampling of the SDE variant of DPMSolver for small number of inference + steps, but sometimes may result in blurring. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + use_flow_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use flow sigmas for step sizes in the noise schedule during the sampling process. + flow_shift (`float`, *optional*, defaults to 1.0): + The flow shift factor. Valid only when `use_flow_sigmas=True`. + lambda_min_clipped (`float`, defaults to `-inf`): + Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the + cosine (`squaredcos_cap_v2`) noise schedule. + variance_type (`str`, *optional*): + Set to "learned" or "learned_range" for diffusion models that predict variance. If set, the model's output + contains the predicted Gaussian variance. + timestep_spacing (`str`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear", + trained_betas: np.ndarray | list[float] | None = None, + solver_order: int = 2, + prediction_type: Literal["epsilon", "sample", "v_prediction", "flow_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: Literal["dpmsolver", "dpmsolver++", "sde-dpmsolver", "sde-dpmsolver++"] = "dpmsolver++", + solver_type: Literal["midpoint", "heun"] = "midpoint", + lower_order_final: bool = True, + euler_at_final: bool = False, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + use_flow_sigmas: bool = False, + flow_shift: float = 1.0, + lambda_min_clipped: float = -float("inf"), + variance_type: Literal["learned", "learned_range"] | None = None, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace", + steps_offset: int = 0, + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if ( + sum( + [ + self.config.use_beta_sigmas, + self.config.use_exponential_sigmas, + self.config.use_karras_sigmas, + ] + ) + > 1 + ): + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if algorithm_type in ["dpmsolver", "sde-dpmsolver"]: + deprecation_message = f"algorithm_type {algorithm_type} is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead" + deprecate( + "algorithm_types dpmsolver and sde-dpmsolver", + "1.0.0", + deprecation_message, + ) + + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + # Currently we only support VP-type noise schedule + self.alpha_t = torch.sqrt(self.alphas_cumprod) + self.sigma_t = torch.sqrt(1 - self.alphas_cumprod) + self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t) + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # settings for DPM-Solver + if algorithm_type not in [ + "dpmsolver", + "dpmsolver++", + "sde-dpmsolver", + "sde-dpmsolver++", + ]: + if algorithm_type == "deis": + self.register_to_config(algorithm_type="dpmsolver++") + else: + raise NotImplementedError(f"{algorithm_type} is not implemented for {self.__class__}") + + if solver_type not in ["midpoint", "heun"]: + if solver_type in ["logrho", "bh1", "bh2"]: + self.register_to_config(solver_type="midpoint") + else: + raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}") + + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32).copy() + self.timesteps = torch.from_numpy(timesteps) + self.model_outputs = [None] * solver_order + self.lower_order_nums = 0 + self._step_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + self.use_karras_sigmas = use_karras_sigmas + self.use_exponential_sigmas = use_exponential_sigmas + self.use_beta_sigmas = use_beta_sigmas + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + # Clipping the minimum of all lambda(t) for numerical stability. + # This is critical for cosine (squaredcos_cap_v2) noise schedule. + clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped).item() + self.noisiest_timestep = self.config.num_train_timesteps - 1 - clipped_idx + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.noisiest_timestep, num_inference_steps + 1).round()[:-1].copy().astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = (self.noisiest_timestep + 1) // (num_inference_steps + 1) + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[:-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.arange(self.noisiest_timestep + 1, 0, -step_ratio).round()[::-1].copy().astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', " + "'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round() + timesteps = timesteps.copy().astype(np.int64) + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_flow_sigmas: + alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1) + sigmas = 1.0 - alphas + sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy() + timesteps = (sigmas * self.config.num_train_timesteps).copy() + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + else: + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + sigma_max = ( + (1 - self.alphas_cumprod[self.noisiest_timestep]) / self.alphas_cumprod[self.noisiest_timestep] + ) ** 0.5 + sigmas = np.concatenate([sigmas, [sigma_max]]).astype(np.float32) + + self.sigmas = torch.from_numpy(sigmas) + + # when num_inference_steps == num_train_timesteps, we can end up with + # duplicates in timesteps. + _, unique_indices = np.unique(timesteps, return_index=True) + timesteps = timesteps[np.sort(unique_indices)] + + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.int64) + + self.num_inference_steps = len(timesteps) + + self.model_outputs = [ + None, + ] * self.config.solver_order + self.lower_order_nums = 0 + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t + def _sigma_to_alpha_sigma_t(self, sigma: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Convert sigma values to alpha_t and sigma_t values. + + Args: + sigma (`torch.Tensor`): + The sigma value(s) to convert. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A tuple containing (alpha_t, sigma_t) values. + """ + if self.config.use_flow_sigmas: + alpha_t = 1 - sigma + sigma_t = sigma + else: + alpha_t = 1 / ((sigma**2 + 1) ** 0.5) + sigma_t = sigma * alpha_t + + return alpha_t, sigma_t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.convert_model_output + def convert_model_output( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is + designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an + integral of the data prediction model. + + > [!TIP] > The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both + noise > prediction and data prediction models. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`, *optional*): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + # DPM-Solver++ needs to solve an integral of the data prediction model. + if self.config.algorithm_type in ["dpmsolver++", "sde-dpmsolver++"]: + if self.config.prediction_type == "epsilon": + # DPM-Solver and DPM-Solver++ only need the "mean" output. + if self.config.variance_type in ["learned", "learned_range"]: + model_output = model_output[:, :3] + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + x0_pred = alpha_t * sample - sigma_t * model_output + elif self.config.prediction_type == "flow_prediction": + sigma_t = self.sigmas[self.step_index] + x0_pred = sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + "`v_prediction`, or `flow_prediction` for the DPMSolverMultistepScheduler." + ) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + return x0_pred + + # DPM-Solver needs to solve an integral of the noise prediction model. + elif self.config.algorithm_type in ["dpmsolver", "sde-dpmsolver"]: + if self.config.prediction_type == "epsilon": + # DPM-Solver and DPM-Solver++ only need the "mean" output. + if self.config.variance_type in ["learned", "learned_range"]: + epsilon = model_output[:, :3] + else: + epsilon = model_output + elif self.config.prediction_type == "sample": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + epsilon = (sample - alpha_t * model_output) / sigma_t + elif self.config.prediction_type == "v_prediction": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + epsilon = alpha_t * model_output + sigma_t * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction` for the DPMSolverMultistepScheduler." + ) + + if self.config.thresholding: + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + x0_pred = (sample - sigma_t * epsilon) / alpha_t + x0_pred = self._threshold_sample(x0_pred) + epsilon = (sample - alpha_t * x0_pred) / sigma_t + + return epsilon + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.dpm_solver_first_order_update + def dpm_solver_first_order_update( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the first-order DPMSolver (equivalent to DDIM). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`, *optional*): + A current instance of a sample created by the diffusion process. + noise (`torch.Tensor`, *optional*): + The noise tensor. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + ) + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s) + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s = torch.log(alpha_s) - torch.log(sigma_s) + + h = lambda_t - lambda_s + if self.config.algorithm_type == "dpmsolver++": + x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output + elif self.config.algorithm_type == "dpmsolver": + x_t = (alpha_t / alpha_s) * sample - (sigma_t * (torch.exp(h) - 1.0)) * model_output + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + x_t = ( + (sigma_t / sigma_s * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * model_output + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.algorithm_type == "sde-dpmsolver": + assert noise is not None + x_t = ( + (alpha_t / alpha_s) * sample + - 2.0 * (sigma_t * (torch.exp(h) - 1.0)) * model_output + + sigma_t * torch.sqrt(torch.exp(2 * h) - 1.0) * noise + ) + return x_t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.multistep_dpm_solver_second_order_update + def multistep_dpm_solver_second_order_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the second-order multistep DPMSolver. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`, *optional*): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s0, sigma_s1 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + + m0, m1 = model_output_list[-1], model_output_list[-2] + + h, h_0 = lambda_t - lambda_s0, lambda_s0 - lambda_s1 + r0 = h_0 / h + D0, D1 = m0, (1.0 / r0) * (m0 - m1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2211.01095 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - 0.5 * (sigma_t * (torch.exp(h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + ) + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + 0.5 * (alpha_t * (1 - torch.exp(-2.0 * h))) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.algorithm_type == "sde-dpmsolver": + assert noise is not None + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s0) * sample + - 2.0 * (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * (torch.exp(h) - 1.0)) * D1 + + sigma_t * torch.sqrt(torch.exp(2 * h) - 1.0) * noise + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s0) * sample + - 2.0 * (sigma_t * (torch.exp(h) - 1.0)) * D0 + - 2.0 * (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + + sigma_t * torch.sqrt(torch.exp(2 * h) - 1.0) * noise + ) + return x_t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.multistep_dpm_solver_third_order_update + def multistep_dpm_solver_third_order_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the third-order multistep DPMSolver. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`, *optional*): + A current instance of a sample created by diffusion process. + noise (`torch.Tensor`, *optional*): + The noise tensor. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s0, sigma_s1, sigma_s2 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + self.sigmas[self.step_index - 2], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + alpha_s2, sigma_s2 = self._sigma_to_alpha_sigma_t(sigma_s2) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + lambda_s2 = torch.log(alpha_s2) - torch.log(sigma_s2) + + m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3] + + h, h_0, h_1 = lambda_t - lambda_s0, lambda_s0 - lambda_s1, lambda_s1 - lambda_s2 + r0, r1 = h_0 / h, h_1 / h + D0 = m0 + D1_0, D1_1 = (1.0 / r0) * (m0 - m1), (1.0 / r1) * (m1 - m2) + D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1) + D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + - (alpha_t * ((torch.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + - (sigma_t * ((torch.exp(h) - 1.0 - h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1.0 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h) - 2.0 * h) / (2.0 * h) ** 2 - 0.5)) * D2 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + return x_t + + def _init_step_index(self, timestep: int | torch.Tensor): + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + + index_candidates = (self.timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + self._step_index = step_index + + def step( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + variance_noise: torch.Tensor | None = None, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the multistep DPMSolver. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + variance_noise (`torch.Tensor`): + Alternative to generating noise with `generator` by directly providing the noise for the variance + itself. Useful for methods such as [`CycleDiffusion`]. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # Improve numerical stability for small number of steps + lower_order_final = (self.step_index == len(self.timesteps) - 1) and ( + self.config.euler_at_final or (self.config.lower_order_final and len(self.timesteps) < 15) + ) + lower_order_second = ( + (self.step_index == len(self.timesteps) - 2) and self.config.lower_order_final and len(self.timesteps) < 15 + ) + + model_output = self.convert_model_output(model_output, sample=sample) + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.model_outputs[-1] = model_output + + if self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"] and variance_noise is None: + noise = randn_tensor( + model_output.shape, + generator=generator, + device=model_output.device, + dtype=model_output.dtype, + ) + elif self.config.algorithm_type in ["sde-dpmsolver", "sde-dpmsolver++"]: + noise = variance_noise + else: + noise = None + + if self.config.solver_order == 1 or self.lower_order_nums < 1 or lower_order_final: + prev_sample = self.dpm_solver_first_order_update(model_output, sample=sample, noise=noise) + elif self.config.solver_order == 2 or self.lower_order_nums < 2 or lower_order_second: + prev_sample = self.multistep_dpm_solver_second_order_update(self.model_outputs, sample=sample, noise=noise) + else: + prev_sample = self.multistep_dpm_solver_third_order_update(self.model_outputs, sample=sample) + + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.scale_model_input + def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the clean `original_samples` using the scheduler's equivalent function. + + Args: + original_samples (`torch.Tensor`): + The original samples to add noise to. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps at which to add noise. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + step_indices = [] + for timestep in timesteps: + index_candidates = (schedule_timesteps == timestep).nonzero() + if len(index_candidates) == 0: + step_index = len(schedule_timesteps) - 1 + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + step_indices.append(step_index) + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + noisy_samples = alpha_t * original_samples + sigma_t * noise + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_sde.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_sde.py new file mode 100644 index 0000000000000000000000000000000000000000..b288d9f61b1489d6cb37daa5b1af0a83794b78cb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_sde.py @@ -0,0 +1,825 @@ +# Copyright 2025 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Callable, List, Literal, Optional, Tuple, Union + +import numpy as np +import torch +import torchsde + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, is_scipy_available +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +if is_scipy_available(): + import scipy.stats + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DPMSolverSDE +class DPMSolverSDESchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +class BatchedBrownianTree: + """A wrapper around torchsde.BrownianTree that enables batches of entropy.""" + + def __init__( + self, + x: torch.Tensor, + t0: float, + t1: float, + seed: Optional[Union[int, List[int]]] = None, + **kwargs, + ): + t0, t1, self.sign = self.sort(t0, t1) + w0 = kwargs.get("w0", torch.zeros_like(x)) + if seed is None: + seed = torch.randint(0, 2**63 - 1, []).item() + self.batched = True + try: + assert len(seed) == x.shape[0] + w0 = w0[0] + except TypeError: + seed = [seed] + self.batched = False + self.trees = [ + torchsde.BrownianInterval( + t0=t0, + t1=t1, + size=w0.shape, + dtype=w0.dtype, + device=w0.device, + entropy=s, + tol=1e-6, + pool_size=24, + halfway_tree=True, + ) + for s in seed + ] + + @staticmethod + def sort(a: float, b: float) -> Tuple[float, float, float]: + """ + Sorts two float values and returns them along with a sign indicating if they were swapped. + + Args: + a (`float`): + The first value. + b (`float`): + The second value. + + Returns: + `Tuple[float, float, float]`: + A tuple containing the sorted values (min, max) and a sign (1.0 if a < b, -1.0 otherwise). + """ + return (a, b, 1.0) if a < b else (b, a, -1.0) + + def __call__(self, t0: float, t1: float) -> torch.Tensor: + t0, t1, sign = self.sort(t0, t1) + w = torch.stack([tree(t0, t1) for tree in self.trees]) * (self.sign * sign) + return w if self.batched else w[0] + + +class BrownianTreeNoiseSampler: + """A noise sampler backed by a torchsde.BrownianTree. + + Args: + x (`torch.Tensor`): The tensor whose shape, device and dtype is used to generate random samples. + sigma_min (`float`): The low end of the valid interval. + sigma_max (`float`): The high end of the valid interval. + seed (`int` or `List[int]`): The random seed. If a list of seeds is + supplied instead of a single integer, then the noise sampler will use one BrownianTree per batch item, each + with its own seed. + transform (`callable`): A function that maps sigma to the sampler's + internal timestep. + """ + + def __init__( + self, + x: torch.Tensor, + sigma_min: float, + sigma_max: float, + seed: Optional[Union[int, List[int]]] = None, + transform: Callable[[float], float] = lambda x: x, + ): + self.transform = transform + t0, t1 = self.transform(torch.as_tensor(sigma_min)), self.transform(torch.as_tensor(sigma_max)) + self.tree = BatchedBrownianTree(x, t0, t1, seed) + + def __call__(self, sigma: float, sigma_next: float) -> torch.Tensor: + t0, t1 = self.transform(torch.as_tensor(sigma)), self.transform(torch.as_tensor(sigma_next)) + return self.tree(t0, t1) / (t1 - t0).abs().sqrt() + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin): + """ + DPMSolverSDEScheduler implements the stochastic sampler from the [Elucidating the Design Space of Diffusion-Based + Generative Models](https://huggingface.co/papers/2206.00364) paper. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.00085): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.012): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear` or `scaled_linear`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + noise_sampler_seed (`int`, *optional*, defaults to `None`): + The random seed to use for the noise sampler. If `None`, a random seed is generated. + timestep_spacing (`str`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 2 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.00085, # sensible defaults + beta_end: float = 0.012, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear", + trained_betas: np.ndarray | list[float] | None = None, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + noise_sampler_seed: int | None = None, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace", + steps_offset: int = 0, + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if ( + sum( + [ + self.config.use_beta_sigmas, + self.config.use_exponential_sigmas, + self.config.use_karras_sigmas, + ] + ) + > 1 + ): + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # set all values + self.set_timesteps(num_train_timesteps, None, num_train_timesteps) + self.use_karras_sigmas = use_karras_sigmas + self.noise_sampler = None + self.noise_sampler_seed = noise_sampler_seed + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + @property + def init_noise_sigma(self) -> torch.Tensor: + # standard deviation of the initial noise distribution + if self.config.timestep_spacing in ["linspace", "trailing"]: + return self.sigmas.max() + + return (self.sigmas.max() ** 2 + 1) ** 0.5 + + @property + def step_index(self) -> Union[int, None]: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self) -> Union[int, None]: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input( + self, + sample: torch.Tensor, + timestep: float | torch.Tensor, + ) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + sigma_input = sigma if self.state_in_first_order else self.mid_point_sigma + sample = sample / ((sigma_input**2 + 1) ** 0.5) + return sample + + def set_timesteps( + self, + num_inference_steps: int, + device: str | torch.device = None, + num_train_timesteps: int | None = None, + ) -> None: + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + num_train_timesteps (`int`, *optional*): + The number of train timesteps. If `None`, uses `self.config.num_train_timesteps`. + """ + self.num_inference_steps = num_inference_steps + + num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=float)[::-1].copy() + elif self.config.timestep_spacing == "leading": + step_ratio = num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(num_train_timesteps, 0, -step_ratio)).round().copy().astype(float) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + + second_order_timesteps = self._second_order_timesteps(sigmas, log_sigmas) + + sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) + sigmas = torch.from_numpy(sigmas).to(device=device) + self.sigmas = torch.cat([sigmas[:1], sigmas[1:-1].repeat_interleave(2), sigmas[-1:]]) + + timesteps = torch.from_numpy(timesteps) + second_order_timesteps = torch.from_numpy(second_order_timesteps) + timesteps = torch.cat([timesteps[:1], timesteps[1:].repeat_interleave(2)]) + timesteps[1::2] = second_order_timesteps + + if str(device).startswith("mps"): + # mps does not support float64 + self.timesteps = timesteps.to(device, dtype=torch.float32) + else: + self.timesteps = timesteps.to(device=device) + + # empty first order variables + self.sample = None + self.mid_point_sigma = None + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + self.noise_sampler = None + + def _second_order_timesteps(self, sigmas: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + def sigma_fn(_t): + return np.exp(-_t) + + def t_fn(_sigma): + return -np.log(_sigma) + + midpoint_ratio = 0.5 + t = t_fn(sigmas) + delta_time = np.diff(t) + t_proposed = t[:-1] + delta_time * midpoint_ratio + sig_proposed = sigma_fn(t_proposed) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sig_proposed]) + return timesteps + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_lms_discrete.LMSDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + sigma_min: float = in_sigmas[-1].item() + sigma_max: float = in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, self.num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + @property + def state_in_first_order(self) -> bool: + return self.sample is None + + def step( + self, + model_output: torch.Tensor, + timestep: float | torch.Tensor, + sample: torch.Tensor, + return_dict: bool = True, + s_noise: float = 1.0, + ) -> DPMSolverSDESchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`float` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_dpmsolver_sde.DPMSolverSDESchedulerOutput`] or + tuple. + s_noise (`float`, *optional*, defaults to 1.0): + Scaling factor for noise added to the sample. + + Returns: + [`~schedulers.scheduling_dpmsolver_sde.DPMSolverSDESchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_dpmsolver_sde.DPMSolverSDESchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor. + """ + if self.step_index is None: + self._init_step_index(timestep) + + # Create a noise sampler if it hasn't been created yet + if self.noise_sampler is None: + min_sigma, max_sigma = self.sigmas[self.sigmas > 0].min(), self.sigmas.max() + self.noise_sampler = BrownianTreeNoiseSampler( + sample, min_sigma.item(), max_sigma.item(), self.noise_sampler_seed + ) + + # Define functions to compute sigma and t from each other + def sigma_fn(_t: torch.Tensor) -> torch.Tensor: + return _t.neg().exp() + + def t_fn(_sigma: torch.Tensor) -> torch.Tensor: + return _sigma.log().neg() + + if self.state_in_first_order: + sigma = self.sigmas[self.step_index] + sigma_next = self.sigmas[self.step_index + 1] + else: + # 2nd order + sigma = self.sigmas[self.step_index - 1] + sigma_next = self.sigmas[self.step_index] + + # Set the midpoint and step size for the current step + midpoint_ratio = 0.5 + t, t_next = t_fn(sigma), t_fn(sigma_next) + delta_time = t_next - t + t_proposed = t + delta_time * midpoint_ratio + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + sigma_input = sigma if self.state_in_first_order else sigma_fn(t_proposed) + pred_original_sample = sample - sigma_input * model_output + elif self.config.prediction_type == "v_prediction": + sigma_input = sigma if self.state_in_first_order else sigma_fn(t_proposed) + pred_original_sample = model_output * (-sigma_input / (sigma_input**2 + 1) ** 0.5) + ( + sample / (sigma_input**2 + 1) + ) + elif self.config.prediction_type == "sample": + raise NotImplementedError("prediction_type not implemented yet: sample") + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + if sigma_next == 0: + derivative = (sample - pred_original_sample) / sigma + dt = sigma_next - sigma + prev_sample = sample + derivative * dt + else: + if self.state_in_first_order: + t_next = t_proposed + else: + sample = self.sample + + sigma_from = sigma_fn(t) + sigma_to = sigma_fn(t_next) + sigma_up = min( + sigma_to, + (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5, + ) + sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5 + ancestral_t = t_fn(sigma_down) + prev_sample = (sigma_fn(ancestral_t) / sigma_fn(t)) * sample - ( + t - ancestral_t + ).expm1() * pred_original_sample + prev_sample = prev_sample + self.noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * sigma_up + + if self.state_in_first_order: + # store for 2nd order step + self.sample = sample + self.mid_point_sigma = sigma_fn(t_next) + else: + # free for "first order mode" + self.sample = None + self.mid_point_sigma = None + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return DPMSolverSDESchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_singlestep.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_singlestep.py new file mode 100644 index 0000000000000000000000000000000000000000..002f2b844a527d710310e769b52f3c3b704ae6df --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_dpmsolver_singlestep.py @@ -0,0 +1,1312 @@ +# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver + +import math +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import deprecate, is_scipy_available, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput + + +if is_scipy_available(): + import scipy.stats + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin): + """ + `DPMSolverSinglestepScheduler` is a fast dedicated high-order solver for diffusion ODEs. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to `1000`): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to `0.0001`): + The starting `beta` value of inference. + beta_end (`float`, defaults to `0.02`): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray` or `list[float]`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + solver_order (`int`, defaults to `2`): + The DPMSolver order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided + sampling, and `solver_order=3` for unconditional sampling. + prediction_type (`"epsilon"`, `"sample"`, `"v_prediction"`, or `"flow_prediction"`, defaults to `"epsilon"`): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`), `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper), or `flow_prediction`. + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to `0.995`): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to `1.0`): + The threshold value for dynamic thresholding. Valid only when `thresholding=True` and + `algorithm_type="dpmsolver++"`. + algorithm_type (`"dpmsolver"`, `"dpmsolver++"`, or `"sde-dpmsolver++"`, defaults to `"dpmsolver++"`): + Algorithm type for the solver; can be `dpmsolver`, `dpmsolver++`, or `sde-dpmsolver++`. The `dpmsolver` + type implements the algorithms in the [DPMSolver](https://huggingface.co/papers/2206.00927) paper, and the + `dpmsolver++` type implements the algorithms in the [DPMSolver++](https://huggingface.co/papers/2211.01095) + paper. It is recommended to use `dpmsolver++` or `sde-dpmsolver++` with `solver_order=2` for guided + sampling like in Stable Diffusion. + solver_type (`"midpoint"` or `"heun"`, defaults to `"midpoint"`): + Solver type for the second-order solver; can be `midpoint` or `heun`. The solver type slightly affects the + sample quality, especially for a small number of steps. It is recommended to use `midpoint` solvers. + lower_order_final (`bool`, defaults to `False`): + Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can + stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + use_flow_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use flow sigmas for step sizes in the noise schedule during the sampling process. + flow_shift (`float`, *optional*, defaults to `1.0`): + The flow shift parameter for flow-based models. + final_sigmas_type (`"zero"` or `"sigma_min"`, *optional*, defaults to `"zero"`): + The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final + sigma is the same as the last sigma in the training schedule. If `"zero"`, the final sigma is set to 0. + lambda_min_clipped (`float`, defaults to `-inf`): + Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the + cosine (`squaredcos_cap_v2`) noise schedule. + variance_type (`"learned"` or `"learned_range"`, *optional*): + Set to `"learned"` or `"learned_range"` for diffusion models that predict variance. If set, the model's + output contains the predicted Gaussian variance. + use_dynamic_shifting (`bool`, defaults to `False`): + Whether to use dynamic shifting for the noise schedule. + time_shift_type (`"exponential"`, defaults to `"exponential"`): + The type of time shifting to apply. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear", + trained_betas: np.ndarray | list[float] | None = None, + solver_order: int = 2, + prediction_type: Literal["epsilon", "sample", "v_prediction", "flow_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: Literal["dpmsolver", "dpmsolver++", "sde-dpmsolver++"] = "dpmsolver++", + solver_type: Literal["midpoint", "heun"] = "midpoint", + lower_order_final: bool = False, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + use_flow_sigmas: bool = False, + flow_shift: float = 1.0, + final_sigmas_type: Literal["zero", "sigma_min"] = "zero", + lambda_min_clipped: float = -float("inf"), + variance_type: Literal["learned", "learned_range"] | None = None, + use_dynamic_shifting: bool = False, + time_shift_type: Literal["exponential"] = "exponential", + ) -> None: + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if algorithm_type == "dpmsolver": + deprecation_message = "algorithm_type `dpmsolver` is deprecated and will be removed in a future version. Choose from `dpmsolver++` or `sde-dpmsolver++` instead" + deprecate("algorithm_types=dpmsolver", "1.0.0", deprecation_message) + + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + # Currently we only support VP-type noise schedule + self.alpha_t = torch.sqrt(self.alphas_cumprod) + self.sigma_t = torch.sqrt(1 - self.alphas_cumprod) + self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t) + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # settings for DPM-Solver + if algorithm_type not in ["dpmsolver", "dpmsolver++", "sde-dpmsolver++"]: + if algorithm_type == "deis": + self.register_to_config(algorithm_type="dpmsolver++") + else: + raise NotImplementedError(f"{algorithm_type} is not implemented for {self.__class__}") + if solver_type not in ["midpoint", "heun"]: + if solver_type in ["logrho", "bh1", "bh2"]: + self.register_to_config(solver_type="midpoint") + else: + raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}") + + if algorithm_type not in ["dpmsolver++", "sde-dpmsolver++"] and final_sigmas_type == "zero": + raise ValueError( + f"`final_sigmas_type` {final_sigmas_type} is not supported for `algorithm_type` {algorithm_type}. Please choose `sigma_min` instead." + ) + + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps) + self.model_outputs = [None] * solver_order + self.sample = None + self.order_list = self.get_order_list(num_train_timesteps) + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + def get_order_list(self, num_inference_steps: int) -> list[int]: + """ + Computes the solver order at each time step. + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + + Returns: + `list[int]`: + The list of solver orders for each timestep. + """ + steps = num_inference_steps + order = self.config.solver_order + if order > 3: + raise ValueError("Order > 3 is not supported by this scheduler") + if self.config.lower_order_final: + if order == 3: + if steps % 3 == 0: + orders = [1, 2, 3] * (steps // 3 - 1) + [1, 2] + [1] + elif steps % 3 == 1: + orders = [1, 2, 3] * (steps // 3) + [1] + else: + orders = [1, 2, 3] * (steps // 3) + [1, 2] + elif order == 2: + if steps % 2 == 0: + orders = [1, 2] * (steps // 2 - 1) + [1, 1] + else: + orders = [1, 2] * (steps // 2) + [1] + elif order == 1: + orders = [1] * steps + else: + if order == 3: + orders = [1, 2, 3] * (steps // 3) + elif order == 2: + orders = [1, 2] * (steps // 2) + elif order == 1: + orders = [1] * steps + + if self.config.final_sigmas_type == "zero": + orders[-1] = 1 + + return orders + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + + Returns: + `int` or `None`: + The current step index. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + + Returns: + `int` or `None`: + The begin index. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_timesteps( + self, + num_inference_steps: int = None, + device: str | torch.device = None, + mu: float | None = None, + timesteps: list[int] | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default + timestep spacing strategy of equal spacing between timesteps schedule is used. If `timesteps` is + passed, `num_inference_steps` must be `None`. + """ + if mu is not None: + assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential" + self.config.flow_shift = np.exp(mu) + if num_inference_steps is None and timesteps is None: + raise ValueError("Must pass exactly one of `num_inference_steps` or `timesteps`.") + if num_inference_steps is not None and timesteps is not None: + raise ValueError("Must pass exactly one of `num_inference_steps` or `timesteps`.") + if timesteps is not None and self.config.use_karras_sigmas: + raise ValueError("Cannot use `timesteps` when `config.use_karras_sigmas=True`.") + if timesteps is not None and self.config.use_exponential_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.") + if timesteps is not None and self.config.use_beta_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_beta_sigmas = True`.") + + num_inference_steps = num_inference_steps or len(timesteps) + self.num_inference_steps = num_inference_steps + + if timesteps is not None: + timesteps = np.array(timesteps).astype(np.int64) + else: + # Clipping the minimum of all lambda(t) for numerical stability. + # This is critical for cosine (squaredcos_cap_v2) noise schedule. + clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped) + clipped_idx = clipped_idx.item() + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1 - clipped_idx, num_inference_steps + 1) + .round()[::-1][:-1] + .copy() + .astype(np.int64) + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + if self.config.use_karras_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round() + elif self.config.use_exponential_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_flow_sigmas: + alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1) + sigmas = 1.0 - alphas + sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy() + timesteps = (sigmas * self.config.num_train_timesteps).copy() + else: + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + + if self.config.final_sigmas_type == "sigma_min": + sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f" `final_sigmas_type` must be one of `sigma_min` or `zero`, but got {self.config.final_sigmas_type}" + ) + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + + self.sigmas = torch.from_numpy(sigmas).to(device=device) + + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.int64) + self.model_outputs = [None] * self.config.solver_order + self.sample = None + + if not self.config.lower_order_final and num_inference_steps % self.config.solver_order != 0: + logger.warning( + "Changing scheduler {self.config} to have `lower_order_final` set to True to handle uneven amount of inference steps. Please make sure to always use an even number of `num_inference steps when using `lower_order_final=False`." + ) + self.register_to_config(lower_order_final=True) + + if not self.config.lower_order_final and self.config.final_sigmas_type == "zero": + logger.warning( + " `last_sigmas_type='zero'` is not supported for `lower_order_final=False`. Changing scheduler {self.config} to have `lower_order_final` set to True." + ) + self.register_to_config(lower_order_final=True) + + self.order_list = self.get_order_list(num_inference_steps) + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t + def _sigma_to_alpha_sigma_t(self, sigma: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Convert sigma values to alpha_t and sigma_t values. + + Args: + sigma (`torch.Tensor`): + The sigma value(s) to convert. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A tuple containing (alpha_t, sigma_t) values. + """ + if self.config.use_flow_sigmas: + alpha_t = 1 - sigma + sigma_t = sigma + else: + alpha_t = 1 / ((sigma**2 + 1) ** 0.5) + sigma_t = sigma * alpha_t + + return alpha_t, sigma_t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def convert_model_output( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is + designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an + integral of the data prediction model. + + > [!TIP] > The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both + noise > prediction and data prediction models. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + # DPM-Solver++ needs to solve an integral of the data prediction model. + if self.config.algorithm_type in ["dpmsolver++", "sde-dpmsolver++"]: + if self.config.prediction_type == "epsilon": + # DPM-Solver and DPM-Solver++ only need the "mean" output. + if self.config.variance_type in ["learned", "learned_range"]: + model_output = model_output[:, :3] + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + x0_pred = alpha_t * sample - sigma_t * model_output + elif self.config.prediction_type == "flow_prediction": + sigma_t = self.sigmas[self.step_index] + x0_pred = sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + "`v_prediction`, or `flow_prediction` for the DPMSolverSinglestepScheduler." + ) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + return x0_pred + + # DPM-Solver needs to solve an integral of the noise prediction model. + elif self.config.algorithm_type == "dpmsolver": + if self.config.prediction_type == "epsilon": + # DPM-Solver and DPM-Solver++ only need the "mean" output. + if self.config.variance_type in ["learned", "learned_range"]: + epsilon = model_output[:, :3] + else: + epsilon = model_output + elif self.config.prediction_type == "sample": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + epsilon = (sample - alpha_t * model_output) / sigma_t + elif self.config.prediction_type == "v_prediction": + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + epsilon = alpha_t * model_output + sigma_t * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction` for the DPMSolverSinglestepScheduler." + ) + + if self.config.thresholding: + alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep] + x0_pred = (sample - sigma_t * epsilon) / alpha_t + x0_pred = self._threshold_sample(x0_pred) + epsilon = (sample - alpha_t * x0_pred) / sigma_t + + return epsilon + + def dpm_solver_first_order_update( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the first-order DPMSolver (equivalent to DDIM). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + sigma_t, sigma_s = self.sigmas[self.step_index + 1], self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s) + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s = torch.log(alpha_s) - torch.log(sigma_s) + h = lambda_t - lambda_s + if self.config.algorithm_type == "dpmsolver++": + x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output + elif self.config.algorithm_type == "dpmsolver": + x_t = (alpha_t / alpha_s) * sample - (sigma_t * (torch.exp(h) - 1.0)) * model_output + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + x_t = ( + (sigma_t / sigma_s * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * model_output + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + return x_t + + def singlestep_dpm_solver_second_order_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the second-order singlestep DPMSolver that computes the solution at time `prev_timestep` from the + time `timestep_list[-2]`. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): + The current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + sigma_t, sigma_s0, sigma_s1 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + + m0, m1 = model_output_list[-1], model_output_list[-2] + + h, h_0 = lambda_t - lambda_s1, lambda_s0 - lambda_s1 + r0 = h_0 / h + D0, D1 = m1, (1.0 / r0) * (m0 - m1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2211.01095 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s1) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s1) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s1) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - 0.5 * (sigma_t * (torch.exp(h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s1) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + ) + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s1 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + 0.5 * (alpha_t * (1 - torch.exp(-2.0 * h))) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s1 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + return x_t + + def singlestep_dpm_solver_third_order_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor | None = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the third-order singlestep DPMSolver that computes the solution at time `prev_timestep` from the + time `timestep_list[-3]`. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): + The current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma_t, sigma_s0, sigma_s1, sigma_s2 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + self.sigmas[self.step_index - 2], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + alpha_s2, sigma_s2 = self._sigma_to_alpha_sigma_t(sigma_s2) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + lambda_s2 = torch.log(alpha_s2) - torch.log(sigma_s2) + + m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3] + + h, h_0, h_1 = lambda_t - lambda_s2, lambda_s0 - lambda_s2, lambda_s1 - lambda_s2 + r0, r1 = h_0 / h, h_1 / h + D0 = m2 + D1_0, D1_1 = (1.0 / r1) * (m1 - m2), (1.0 / r0) * (m0 - m2) + D1 = (r0 * D1_0 - r1 * D1_1) / (r0 - r1) + D2 = 2.0 * (D1_1 - D1_0) / (r0 - r1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s2) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1_1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s2) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + - (alpha_t * ((torch.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s2) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1_1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s2) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + - (sigma_t * ((torch.exp(h) - 1.0 - h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s2 * torch.exp(-h)) * sample + + (alpha_t * (1.0 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1_1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s2 * torch.exp(-h)) * sample + + (alpha_t * (1.0 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h) + (-2.0 * h)) / (-2.0 * h) ** 2 - 0.5)) * D2 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + return x_t + + def singlestep_dpm_solver_update( + self, + model_output_list: list[torch.Tensor], + *args, + sample: torch.Tensor | None = None, + order: int = None, + noise: torch.Tensor | None = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the singlestep DPMSolver. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): + The current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by diffusion process. + order (`int`): + The solver order at this step. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + timestep_list = args[0] if len(args) > 0 else kwargs.pop("timestep_list", None) + prev_timestep = args[1] if len(args) > 1 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 2: + sample = args[2] + else: + raise ValueError("missing `sample` as a required keyword argument") + if order is None: + if len(args) > 3: + order = args[3] + else: + raise ValueError("missing `order` as a required keyword argument") + if timestep_list is not None: + deprecate( + "timestep_list", + "1.0.0", + "Passing `timestep_list` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + if order == 1: + return self.dpm_solver_first_order_update(model_output_list[-1], sample=sample, noise=noise) + elif order == 2: + return self.singlestep_dpm_solver_second_order_update(model_output_list, sample=sample, noise=noise) + elif order == 3: + return self.singlestep_dpm_solver_third_order_update(model_output_list, sample=sample, noise=noise) + else: + raise ValueError(f"Order must be 1, 2, 3, got {order}") + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep + def index_for_timestep( + self, + timestep: int | torch.Tensor, + schedule_timesteps: torch.Tensor | None = None, + ) -> int: + """ + Find the index for a given timestep in the schedule. + + Args: + timestep (`int` or `torch.Tensor`): + The timestep for which to find the index. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + index_candidates = (schedule_timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + return step_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._init_step_index + def _init_step_index(self, timestep: int | torch.Tensor) -> None: + """ + Initialize the step_index counter for the scheduler. + + Args: + timestep (`int` or `torch.Tensor`): + The current timestep for which to initialize the step index. + """ + + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the singlestep DPMSolver. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator for stochastic sampling. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + model_output = self.convert_model_output(model_output, sample=sample) + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.model_outputs[-1] = model_output + + if self.config.algorithm_type == "sde-dpmsolver++": + noise = randn_tensor( + model_output.shape, generator=generator, device=model_output.device, dtype=model_output.dtype + ) + else: + noise = None + + order = self.order_list[self.step_index] + + # For img2img denoising might start with order>1 which is not possible + # In this case make sure that the first two steps are both order=1 + while self.model_outputs[-order] is None: + order -= 1 + + # For single-step solvers, we use the initial value at each time with order = 1. + if order == 1: + self.sample = sample + + prev_sample = self.singlestep_dpm_solver_update( + self.model_outputs, sample=self.sample, order=order, noise=noise + ) + + # upon completion increase step index by one, noise=noise + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples without noise. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps at which to add noise to the samples. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + noisy_samples = alpha_t * original_samples + sigma_t * noise + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py new file mode 100644 index 0000000000000000000000000000000000000000..20ad17cfdd967ab9b9bd537796397a84177718c7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py @@ -0,0 +1,873 @@ +# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver and https://github.com/NVlabs/edm + +import math +from typing import List, Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin, SchedulerOutput + + +class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): + """ + Implements DPMSolverMultistepScheduler in EDM formulation as presented in Karras et al. 2022 [1]. + `EDMDPMSolverMultistepScheduler` is a fast dedicated high-order solver for diffusion ODEs. + + [1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models." + https://huggingface.co/papers/2206.00364 + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + sigma_min (`float`, *optional*, defaults to 0.002): + Minimum noise magnitude in the sigma schedule. This was set to 0.002 in the EDM paper [1]; a reasonable + range is [0, 10]. + sigma_max (`float`, *optional*, defaults to 80.0): + Maximum noise magnitude in the sigma schedule. This was set to 80.0 in the EDM paper [1]; a reasonable + range is [0.2, 80.0]. + sigma_data (`float`, *optional*, defaults to 0.5): + The standard deviation of the data distribution. This is set to 0.5 in the EDM paper [1]. + sigma_schedule (`str`, *optional*, defaults to `karras`): + Sigma schedule to compute the `sigmas`. By default, we the schedule introduced in the EDM paper + (https://huggingface.co/papers/2206.00364). Other acceptable value is "exponential". The exponential + schedule was incorporated in this model: https://huggingface.co/stabilityai/cosxl. + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + rho (`float`, *optional*, defaults to 7.0): + The rho parameter in the Karras sigma schedule. This was set to 7.0 in the EDM paper [1]. + solver_order (`int`, defaults to 2): + The DPMSolver order which can be `1` or `2` or `3`. It is recommended to use `solver_order=2` for guided + sampling, and `solver_order=3` for unconditional sampling. + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True` and + `algorithm_type="dpmsolver++"`. + algorithm_type (`str`, defaults to `dpmsolver++`): + Algorithm type for the solver; can be `dpmsolver++` or `sde-dpmsolver++`. The `dpmsolver++` type implements + the algorithms in the [DPMSolver++](https://huggingface.co/papers/2211.01095) paper. It is recommended to + use `dpmsolver++` or `sde-dpmsolver++` with `solver_order=2` for guided sampling like in Stable Diffusion. + solver_type (`str`, defaults to `midpoint`): + Solver type for the second-order solver; can be `midpoint` or `heun`. The solver type slightly affects the + sample quality, especially for a small number of steps. It is recommended to use `midpoint` solvers. + lower_order_final (`bool`, defaults to `True`): + Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can + stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10. + euler_at_final (`bool`, defaults to `False`): + Whether to use Euler's method in the final step. It is a trade-off between numerical stability and detail + richness. This can stabilize the sampling of the SDE variant of DPMSolver for small number of inference + steps, but sometimes may result in blurring. + final_sigmas_type (`str`, defaults to `"zero"`): + The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final + sigma is the same as the last sigma in the training schedule. If `zero`, the final sigma is set to 0. + """ + + _compatibles = [] + order = 1 + + @register_to_config + def __init__( + self, + sigma_min: float = 0.002, + sigma_max: float = 80.0, + sigma_data: float = 0.5, + sigma_schedule: Literal["karras", "exponential"] = "karras", + num_train_timesteps: int = 1000, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + rho: float = 7.0, + solver_order: int = 2, + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: Literal["dpmsolver++", "sde-dpmsolver++"] = "dpmsolver++", + solver_type: Literal["midpoint", "heun"] = "midpoint", + lower_order_final: bool = True, + euler_at_final: bool = False, + final_sigmas_type: Literal["zero", "sigma_min"] = "zero", # "zero", "sigma_min" + ): + # settings for DPM-Solver + if algorithm_type not in ["dpmsolver++", "sde-dpmsolver++"]: + if algorithm_type == "deis": + self.register_to_config(algorithm_type="dpmsolver++") + else: + raise NotImplementedError(f"{algorithm_type} is not implemented for {self.__class__}") + + if solver_type not in ["midpoint", "heun"]: + if solver_type in ["logrho", "bh1", "bh2"]: + self.register_to_config(solver_type="midpoint") + else: + raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}") + + if algorithm_type not in ["dpmsolver++", "sde-dpmsolver++"] and final_sigmas_type == "zero": + raise ValueError( + f"`final_sigmas_type` {final_sigmas_type} is not supported for `algorithm_type` {algorithm_type}. Please choose `sigma_min` instead." + ) + + ramp = torch.linspace(0, 1, num_train_timesteps) + if sigma_schedule == "karras": + sigmas = self._compute_karras_sigmas(ramp) + elif sigma_schedule == "exponential": + sigmas = self._compute_exponential_sigmas(ramp) + + self.timesteps = self.precondition_noise(sigmas) + + self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) + + # setable values + self.num_inference_steps = None + self.model_outputs = [None] * solver_order + self.lower_order_nums = 0 + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def init_noise_sigma(self) -> float: + # standard deviation of the initial noise distribution + return (self.config.sigma_max**2 + 1) ** 0.5 + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.precondition_inputs + def precondition_inputs(self, sample: torch.Tensor, sigma: float | torch.Tensor) -> torch.Tensor: + """ + Precondition the input sample by scaling it according to the EDM formulation. + + Args: + sample (`torch.Tensor`): + The input sample tensor to precondition. + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `torch.Tensor`: + The scaled input sample. + """ + c_in = self._get_conditioning_c_in(sigma) + scaled_sample = sample * c_in + return scaled_sample + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.precondition_noise + def precondition_noise(self, sigma: float | torch.Tensor) -> torch.Tensor: + """ + Precondition the noise level by applying a logarithmic transformation. + + Args: + sigma (`float` or `torch.Tensor`): + The sigma (noise level) value to precondition. + + Returns: + `torch.Tensor`: + The preconditioned noise value computed as `0.25 * log(sigma)`. + """ + if not isinstance(sigma, torch.Tensor): + sigma = torch.tensor([sigma]) + + c_noise = 0.25 * torch.log(sigma) + + return c_noise + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.precondition_outputs + def precondition_outputs( + self, + sample: torch.Tensor, + model_output: torch.Tensor, + sigma: float | torch.Tensor, + ) -> torch.Tensor: + """ + Precondition the model outputs according to the EDM formulation. + + Args: + sample (`torch.Tensor`): + The input sample tensor. + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `torch.Tensor`: + The denoised sample computed by combining the skip connection and output scaling. + """ + sigma_data = self.config.sigma_data + c_skip = sigma_data**2 / (sigma**2 + sigma_data**2) + + if self.config.prediction_type == "epsilon": + c_out = sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5 + elif self.config.prediction_type == "v_prediction": + c_out = -sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5 + else: + raise ValueError(f"Prediction type {self.config.prediction_type} is not supported.") + + denoised = c_skip * sample + c_out * model_output + + return denoised + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.scale_model_input + def scale_model_input(self, sample: torch.Tensor, timestep: float | torch.Tensor) -> torch.Tensor: + """ + Scale the denoising model input to match the Euler algorithm. Ensures interchangeability with schedulers that + need to scale the denoising model input depending on the current timestep. + + Args: + sample (`torch.Tensor`): + The input sample tensor. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + sample = self.precondition_inputs(sample, sigma) + + self.is_scale_input_called = True + return sample + + def set_timesteps( + self, + num_inference_steps: int = None, + device: str | torch.device | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + + self.num_inference_steps = num_inference_steps + + ramp = torch.linspace(0, 1, self.num_inference_steps) + if self.config.sigma_schedule == "karras": + sigmas = self._compute_karras_sigmas(ramp) + elif self.config.sigma_schedule == "exponential": + sigmas = self._compute_exponential_sigmas(ramp) + + sigmas = sigmas.to(dtype=torch.float32, device=device) + self.timesteps = self.precondition_noise(sigmas) + + if self.config.final_sigmas_type == "sigma_min": + sigma_last = self.config.sigma_min + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + + self.sigmas = torch.cat([sigmas, torch.tensor([sigma_last], dtype=torch.float32, device=device)]) + + self.model_outputs = [ + None, + ] * self.config.solver_order + self.lower_order_nums = 0 + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler._compute_karras_sigmas + def _compute_karras_sigmas( + self, + ramp: torch.Tensor, + sigma_min: float | None = None, + sigma_max: float | None = None, + ) -> torch.Tensor: + """ + Construct the noise schedule of [Karras et al. (2022)](https://huggingface.co/papers/2206.00364). + + Args: + ramp (`torch.Tensor`): + A tensor of values in [0, 1] representing the interpolation positions. + sigma_min (`float`, *optional*): + Minimum sigma value. If `None`, uses `self.config.sigma_min`. + sigma_max (`float`, *optional*): + Maximum sigma value. If `None`, uses `self.config.sigma_max`. + + Returns: + `torch.Tensor`: + The computed Karras sigma schedule. + """ + sigma_min = sigma_min or self.config.sigma_min + sigma_max = sigma_max or self.config.sigma_max + + rho = self.config.rho + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler._compute_exponential_sigmas + def _compute_exponential_sigmas( + self, + ramp: torch.Tensor, + sigma_min: float | None = None, + sigma_max: float | None = None, + ) -> torch.Tensor: + """ + Compute the exponential sigma schedule. Implementation closely follows k-diffusion: + https://github.com/crowsonkb/k-diffusion/blob/6ab5146d4a5ef63901326489f31f1d8e7dd36b48/k_diffusion/sampling.py#L26 + + Args: + ramp (`torch.Tensor`): + A tensor of values representing the interpolation positions. + sigma_min (`float`, *optional*): + Minimum sigma value. If `None`, uses `self.config.sigma_min`. + sigma_max (`float`, *optional*): + Maximum sigma value. If `None`, uses `self.config.sigma_max`. + + Returns: + `torch.Tensor`: + The computed exponential sigma schedule. + """ + sigma_min = sigma_min or self.config.sigma_min + sigma_max = sigma_max or self.config.sigma_max + sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), len(ramp)).exp().flip(0) + return sigmas + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma, log_sigmas): + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + def _sigma_to_alpha_sigma_t(self, sigma): + alpha_t = torch.tensor(1) # Inputs are pre-scaled before going into unet, so alpha_t = 1 + sigma_t = sigma + return alpha_t, sigma_t + + def convert_model_output( + self, + model_output: torch.Tensor, + sample: torch.Tensor, + ) -> torch.Tensor: + """ + Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is + designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an + integral of the data prediction model. + + > [!TIP] > The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both + noise > prediction and data prediction models. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + sigma = self.sigmas[self.step_index] + x0_pred = self.precondition_outputs(sample, model_output, sigma) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + return x0_pred + + def dpm_solver_first_order_update( + self, + model_output: torch.Tensor, + sample: torch.Tensor, + noise: torch.Tensor | None = None, + ) -> torch.Tensor: + """ + One step for the first-order DPMSolver (equivalent to DDIM). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + noise (`torch.Tensor`, *optional*): + The noise tensor to add to the original samples. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + sigma_t, sigma_s = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + ) + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s, sigma_s = self._sigma_to_alpha_sigma_t(sigma_s) + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s = torch.log(alpha_s) - torch.log(sigma_s) + + h = lambda_t - lambda_s + if self.config.algorithm_type == "dpmsolver++": + x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + x_t = ( + (sigma_t / sigma_s * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * model_output + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + + return x_t + + def multistep_dpm_solver_second_order_update( + self, + model_output_list: List[torch.Tensor], + sample: torch.Tensor, + noise: torch.Tensor | None = None, + ) -> torch.Tensor: + """ + One step for the second-order multistep DPMSolver. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + noise (`torch.Tensor`, *optional*): + The noise tensor to add to the original samples. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + sigma_t, sigma_s0, sigma_s1 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + + m0, m1 = model_output_list[-1], model_output_list[-2] + + h, h_0 = lambda_t - lambda_s0, lambda_s0 - lambda_s1 + r0 = h_0 / h + D0, D1 = m0, (1.0 / r0) * (m0 - m1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2211.01095 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + ) + elif self.config.algorithm_type == "sde-dpmsolver++": + assert noise is not None + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + 0.5 * (alpha_t * (1 - torch.exp(-2.0 * h))) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0 * torch.exp(-h)) * sample + + (alpha_t * (1 - torch.exp(-2.0 * h))) * D0 + + (alpha_t * ((1.0 - torch.exp(-2.0 * h)) / (-2.0 * h) + 1.0)) * D1 + + sigma_t * torch.sqrt(1.0 - torch.exp(-2 * h)) * noise + ) + + return x_t + + def multistep_dpm_solver_third_order_update( + self, + model_output_list: list[torch.Tensor], + sample: torch.Tensor, + ) -> torch.Tensor: + """ + One step for the third-order multistep DPMSolver. + + Args: + model_output_list (`list[torch.Tensor]`): + The direct outputs from learned diffusion model at current and latter timesteps. + sample (`torch.Tensor`): + A current instance of a sample created by diffusion process. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + sigma_t, sigma_s0, sigma_s1, sigma_s2 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + self.sigmas[self.step_index - 2], + ) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + alpha_s1, sigma_s1 = self._sigma_to_alpha_sigma_t(sigma_s1) + alpha_s2, sigma_s2 = self._sigma_to_alpha_sigma_t(sigma_s2) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + lambda_s1 = torch.log(alpha_s1) - torch.log(sigma_s1) + lambda_s2 = torch.log(alpha_s2) - torch.log(sigma_s2) + + m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3] + + h, h_0, h_1 = lambda_t - lambda_s0, lambda_s0 - lambda_s1, lambda_s1 - lambda_s2 + r0, r1 = h_0 / h, h_1 / h + D0 = m0 + D1_0, D1_1 = (1.0 / r0) * (m0 - m1), (1.0 / r1) * (m1 - m2) + D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1) + D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1) + if self.config.algorithm_type == "dpmsolver++": + # See https://huggingface.co/papers/2206.00927 for detailed derivations + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + - (alpha_t * ((torch.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2 + ) + + return x_t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep + def index_for_timestep( + self, + timestep: int | torch.Tensor, + schedule_timesteps: torch.Tensor | None = None, + ) -> int: + """ + Find the index for a given timestep in the schedule. + + Args: + timestep (`int` or `torch.Tensor`): + The timestep for which to find the index. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + index_candidates = (schedule_timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + return step_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._init_step_index + def _init_step_index(self, timestep: int | torch.Tensor) -> None: + """ + Initialize the step_index counter for the scheduler. + + Args: + timestep (`int` or `torch.Tensor`): + The current timestep for which to initialize the step index. + """ + + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the multistep DPMSolver. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # Improve numerical stability for small number of steps + lower_order_final = (self.step_index == len(self.timesteps) - 1) and ( + self.config.euler_at_final + or (self.config.lower_order_final and len(self.timesteps) < 15) + or self.config.final_sigmas_type == "zero" + ) + lower_order_second = ( + (self.step_index == len(self.timesteps) - 2) and self.config.lower_order_final and len(self.timesteps) < 15 + ) + + model_output = self.convert_model_output(model_output, sample=sample) + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.model_outputs[-1] = model_output + + if self.config.algorithm_type == "sde-dpmsolver++": + noise = randn_tensor( + model_output.shape, + generator=generator, + device=model_output.device, + dtype=model_output.dtype, + ) + else: + noise = None + + if self.config.solver_order == 1 or self.lower_order_nums < 1 or lower_order_final: + prev_sample = self.dpm_solver_first_order_update(model_output, sample=sample, noise=noise) + elif self.config.solver_order == 2 or self.lower_order_nums < 2 or lower_order_second: + prev_sample = self.multistep_dpm_solver_second_order_update(self.model_outputs, sample=sample, noise=noise) + else: + prev_sample = self.multistep_dpm_solver_third_order_update(self.model_outputs, sample=sample) + + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler._get_conditioning_c_in + def _get_conditioning_c_in(self, sigma: float | torch.Tensor) -> float | torch.Tensor: + """ + Compute the input conditioning factor for the EDM formulation. + + Args: + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `float` or `torch.Tensor`: + The input conditioning factor `c_in`. + """ + c_in = 1 / ((sigma**2 + self.config.sigma_data**2) ** 0.5) + return c_in + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_edm_euler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_edm_euler.py new file mode 100644 index 0000000000000000000000000000000000000000..dfb70ce68b46d4e9861fad7bf707f6a5ac62f566 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_edm_euler.py @@ -0,0 +1,600 @@ +# Copyright 2025 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->EulerDiscrete +class EDMEulerSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +class EDMEulerScheduler(SchedulerMixin, ConfigMixin): + """ + Implements the Euler scheduler in EDM formulation as presented in Karras et al. 2022 [1]. + + [1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models." + https://huggingface.co/papers/2206.00364 + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + sigma_min (`float`, *optional*, defaults to `0.002`): + Minimum noise magnitude in the sigma schedule. This was set to 0.002 in the EDM paper [1]; a reasonable + range is [0, 10]. + sigma_max (`float`, *optional*, defaults to `80.0`): + Maximum noise magnitude in the sigma schedule. This was set to 80.0 in the EDM paper [1]; a reasonable + range is [0.2, 80.0]. + sigma_data (`float`, *optional*, defaults to `0.5`): + The standard deviation of the data distribution. This is set to 0.5 in the EDM paper [1]. + sigma_schedule (`Literal["karras", "exponential"]`, *optional*, defaults to `"karras"`): + Sigma schedule to compute the `sigmas`. By default, we use the schedule introduced in the EDM paper + (https://huggingface.co/papers/2206.00364). The `"exponential"` schedule was incorporated in this model: + https://huggingface.co/stabilityai/cosxl. + num_train_timesteps (`int`, *optional*, defaults to `1000`): + The number of diffusion steps to train the model. + prediction_type (`Literal["epsilon", "v_prediction"]`, *optional*, defaults to `"epsilon"`): + Prediction type of the scheduler function. `"epsilon"` predicts the noise of the diffusion process, and + `"v_prediction"` (see section 2.4 of [Imagen Video](https://huggingface.co/papers/2210.02303) paper). + rho (`float`, *optional*, defaults to `7.0`): + The rho parameter used for calculating the Karras sigma schedule, which is set to 7.0 in the EDM paper [1]. + final_sigmas_type (`Literal["zero", "sigma_min"]`, *optional*, defaults to `"zero"`): + The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final + sigma is the same as the last sigma in the training schedule. If `"zero"`, the final sigma is set to 0. + """ + + _compatibles = [] + order = 1 + + @register_to_config + def __init__( + self, + sigma_min: float = 0.002, + sigma_max: float = 80.0, + sigma_data: float = 0.5, + sigma_schedule: Literal["karras", "exponential"] = "karras", + num_train_timesteps: int = 1000, + prediction_type: Literal["epsilon", "v_prediction"] = "epsilon", + rho: float = 7.0, + final_sigmas_type: Literal["zero", "sigma_min"] = "zero", + ) -> None: + if sigma_schedule not in ["karras", "exponential"]: + raise ValueError(f"Wrong value for provided for `{sigma_schedule=}`.`") + + # setable values + self.num_inference_steps = None + + sigmas_dtype = torch.float32 if torch.backends.mps.is_available() else torch.float64 + sigmas = torch.arange(num_train_timesteps + 1, dtype=sigmas_dtype) / num_train_timesteps + if sigma_schedule == "karras": + sigmas = self._compute_karras_sigmas(sigmas) + elif sigma_schedule == "exponential": + sigmas = self._compute_exponential_sigmas(sigmas) + sigmas = sigmas.to(torch.float32) + + self.timesteps = self.precondition_noise(sigmas) + + if self.config.final_sigmas_type == "sigma_min": + sigma_last = sigmas[-1] + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + + self.sigmas = torch.cat([sigmas, torch.full((1,), fill_value=sigma_last, device=sigmas.device)]) + + self.is_scale_input_called = False + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def init_noise_sigma(self) -> float: + """ + Return the standard deviation of the initial noise distribution. + + Returns: + `float`: + The initial noise sigma value computed as `(sigma_max**2 + 1) ** 0.5`. + """ + return (self.config.sigma_max**2 + 1) ** 0.5 + + @property + def step_index(self) -> int: + """ + Return the index counter for the current timestep. The index will increase by 1 after each scheduler step. + + Returns: + `int` or `None`: + The current step index, or `None` if not yet initialized. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + Return the index for the first timestep. This should be set from the pipeline with the `set_begin_index` + method. + + Returns: + `int` or `None`: + The begin index, or `None` if not yet set. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def precondition_inputs(self, sample: torch.Tensor, sigma: float | torch.Tensor) -> torch.Tensor: + """ + Precondition the input sample by scaling it according to the EDM formulation. + + Args: + sample (`torch.Tensor`): + The input sample tensor to precondition. + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `torch.Tensor`: + The scaled input sample. + """ + c_in = self._get_conditioning_c_in(sigma) + scaled_sample = sample * c_in + return scaled_sample + + def precondition_noise(self, sigma: float | torch.Tensor) -> torch.Tensor: + """ + Precondition the noise level by applying a logarithmic transformation. + + Args: + sigma (`float` or `torch.Tensor`): + The sigma (noise level) value to precondition. + + Returns: + `torch.Tensor`: + The preconditioned noise value computed as `0.25 * log(sigma)`. + """ + if not isinstance(sigma, torch.Tensor): + sigma = torch.tensor([sigma]) + + c_noise = 0.25 * torch.log(sigma) + + return c_noise + + def precondition_outputs( + self, + sample: torch.Tensor, + model_output: torch.Tensor, + sigma: float | torch.Tensor, + ) -> torch.Tensor: + """ + Precondition the model outputs according to the EDM formulation. + + Args: + sample (`torch.Tensor`): + The input sample tensor. + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `torch.Tensor`: + The denoised sample computed by combining the skip connection and output scaling. + """ + sigma_data = self.config.sigma_data + c_skip = sigma_data**2 / (sigma**2 + sigma_data**2) + + if self.config.prediction_type == "epsilon": + c_out = sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5 + elif self.config.prediction_type == "v_prediction": + c_out = -sigma * sigma_data / (sigma**2 + sigma_data**2) ** 0.5 + else: + raise ValueError(f"Prediction type {self.config.prediction_type} is not supported.") + + denoised = c_skip * sample + c_out * model_output + + return denoised + + def scale_model_input(self, sample: torch.Tensor, timestep: float | torch.Tensor) -> torch.Tensor: + """ + Scale the denoising model input to match the Euler algorithm. Ensures interchangeability with schedulers that + need to scale the denoising model input depending on the current timestep. + + Args: + sample (`torch.Tensor`): + The input sample tensor. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + sample = self.precondition_inputs(sample, sigma) + + self.is_scale_input_called = True + return sample + + def set_timesteps( + self, + num_inference_steps: int = None, + device: str | torch.device = None, + sigmas: torch.Tensor | list[float] | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + sigmas (`torch.Tensor | list[float]`, *optional*): + Custom sigmas to use for the denoising process. If not defined, the default behavior when + `num_inference_steps` is passed will be used. + """ + self.num_inference_steps = num_inference_steps + + sigmas_dtype = torch.float32 if torch.backends.mps.is_available() else torch.float64 + if sigmas is None: + sigmas = torch.linspace(0, 1, self.num_inference_steps, dtype=sigmas_dtype) + elif isinstance(sigmas, float): + sigmas = torch.tensor(sigmas, dtype=sigmas_dtype) + else: + sigmas = sigmas.to(sigmas_dtype) + if self.config.sigma_schedule == "karras": + sigmas = self._compute_karras_sigmas(sigmas) + elif self.config.sigma_schedule == "exponential": + sigmas = self._compute_exponential_sigmas(sigmas) + sigmas = sigmas.to(dtype=torch.float32, device=device) + + self.timesteps = self.precondition_noise(sigmas) + + if self.config.final_sigmas_type == "sigma_min": + sigma_last = sigmas[-1] + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + + self.sigmas = torch.cat([sigmas, torch.full((1,), fill_value=sigma_last, device=sigmas.device)]) + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Taken from https://github.com/crowsonkb/k-diffusion/blob/686dbad0f39640ea25c8a8c6a6e56bb40eacefa2/k_diffusion/sampling.py#L17 + def _compute_karras_sigmas( + self, + ramp: torch.Tensor, + sigma_min: float | None = None, + sigma_max: float | None = None, + ) -> torch.Tensor: + """ + Construct the noise schedule of [Karras et al. (2022)](https://huggingface.co/papers/2206.00364). + + Args: + ramp (`torch.Tensor`): + A tensor of values in [0, 1] representing the interpolation positions. + sigma_min (`float`, *optional*): + Minimum sigma value. If `None`, uses `self.config.sigma_min`. + sigma_max (`float`, *optional*): + Maximum sigma value. If `None`, uses `self.config.sigma_max`. + + Returns: + `torch.Tensor`: + The computed Karras sigma schedule. + """ + sigma_min = sigma_min or self.config.sigma_min + sigma_max = sigma_max or self.config.sigma_max + + rho = self.config.rho + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + def _compute_exponential_sigmas( + self, + ramp: torch.Tensor, + sigma_min: float | None = None, + sigma_max: float | None = None, + ) -> torch.Tensor: + """ + Compute the exponential sigma schedule. Implementation closely follows k-diffusion: + https://github.com/crowsonkb/k-diffusion/blob/6ab5146d4a5ef63901326489f31f1d8e7dd36b48/k_diffusion/sampling.py#L26 + + Args: + ramp (`torch.Tensor`): + A tensor of values representing the interpolation positions. + sigma_min (`float`, *optional*): + Minimum sigma value. If `None`, uses `self.config.sigma_min`. + sigma_max (`float`, *optional*): + Maximum sigma value. If `None`, uses `self.config.sigma_max`. + + Returns: + `torch.Tensor`: + The computed exponential sigma schedule. + """ + sigma_min = sigma_min or self.config.sigma_min + sigma_max = sigma_max or self.config.sigma_max + sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), len(ramp)).exp().flip(0) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: float | torch.Tensor, + sample: torch.Tensor, + s_churn: float = 0.0, + s_tmin: float = 0.0, + s_tmax: float = float("inf"), + s_noise: float = 1.0, + generator: torch.Generator | None = None, + return_dict: bool = True, + pred_original_sample: torch.Tensor | None = None, + ) -> EDMEulerSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`float` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + s_churn (`float`, *optional*, defaults to `0.0`): + The amount of stochasticity to add at each step. Higher values add more noise. + s_tmin (`float`, *optional*, defaults to `0.0`): + The minimum sigma threshold below which no noise is added. + s_tmax (`float`, *optional*, defaults to `float("inf")`): + The maximum sigma threshold above which no noise is added. + s_noise (`float`, *optional*, defaults to `1.0`): + Scaling factor for noise added to the sample. + generator (`torch.Generator`, *optional*): + A random number generator for reproducibility. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return an [`~schedulers.scheduling_edm_euler.EDMEulerSchedulerOutput`] or tuple. + pred_original_sample (`torch.Tensor`, *optional*): + The predicted denoised sample from a previous step. If provided, skips recomputation. + + Returns: + [`~schedulers.scheduling_edm_euler.EDMEulerSchedulerOutput`] or `tuple`: + If `return_dict` is `True`, an [`~schedulers.scheduling_edm_euler.EDMEulerSchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the previous sample tensor and the + second element is the predicted original sample tensor. + """ + + if isinstance(timestep, (int, torch.IntTensor, torch.LongTensor)): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `EDMEulerScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if not self.is_scale_input_called: + logger.warning( + "The `scale_model_input` function should be called before `step` to ensure correct denoising. " + "See `StableDiffusionPipeline` for a usage example." + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # Upcast to avoid precision issues when computing prev_sample + sample = sample.to(torch.float32) + + sigma = self.sigmas[self.step_index] + + gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 + + sigma_hat = sigma * (gamma + 1) + + if gamma > 0: + noise = randn_tensor( + model_output.shape, + dtype=model_output.dtype, + device=model_output.device, + generator=generator, + ) + eps = noise * s_noise + sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5 + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if pred_original_sample is None: + pred_original_sample = self.precondition_outputs(sample, model_output, sigma_hat) + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma_hat + + dt = self.sigmas[self.step_index + 1] - sigma_hat + + prev_sample = sample + derivative * dt + + # Cast sample back to model compatible dtype + prev_sample = prev_sample.to(model_output.dtype) + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return EDMEulerSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def _get_conditioning_c_in(self, sigma: float | torch.Tensor) -> float | torch.Tensor: + """ + Compute the input conditioning factor for the EDM formulation. + + Args: + sigma (`float` or `torch.Tensor`): + The current sigma (noise level) value. + + Returns: + `float` or `torch.Tensor`: + The input conditioning factor `c_in`. + """ + c_in = 1 / ((sigma**2 + self.config.sigma_data**2) ** 0.5) + return c_in + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_ancestral_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..8ad6abd90668670fc9834825a45ecf039263aa9d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -0,0 +1,528 @@ +# Copyright 2025 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->EulerAncestralDiscrete +class EulerAncestralDiscreteSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): + """ + Ancestral sampling with Euler method steps. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + prediction_type (`"epsilon"`, `"sample"`, or `"v_prediction"`, defaults to `"epsilon"`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + prediction_type: str = "epsilon", + timestep_spacing: str = "linspace", + steps_offset: int = 0, + rescale_betas_zero_snr: bool = False, + ) -> None: + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + if rescale_betas_zero_snr: + # Close to 0 without being 0 so first sigma is not inf + # FP16 smallest positive subnormal works well here + self.alphas_cumprod[-1] = 2**-24 + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32) + self.sigmas = torch.from_numpy(sigmas) + + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=float)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps) + self.is_scale_input_called = False + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def init_noise_sigma(self) -> torch.Tensor: + # standard deviation of the initial noise distribution + if self.config.timestep_spacing in ["linspace", "trailing"]: + return self.sigmas.max() + + return (self.sigmas.max() ** 2 + 1) ** 0.5 + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input(self, sample: torch.Tensor, timestep: float | torch.Tensor) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + sample = sample / ((sigma**2 + 1) ** 0.5) + self.is_scale_input_called = True + return sample + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[ + ::-1 + ].copy() + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.float32) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(self.config.num_train_timesteps, 0, -step_ratio)).round().copy().astype(np.float32) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) + self.sigmas = torch.from_numpy(sigmas).to(device=device) + + self.timesteps = torch.from_numpy(timesteps).to(device=device) + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: float | torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> EulerAncestralDiscreteSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`float` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, defaults to `True`): + Whether or not to return a + [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or tuple. + + Returns: + [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] or `tuple`: + If return_dict is `True`, + [`~schedulers.scheduling_euler_ancestral_discrete.EulerAncestralDiscreteSchedulerOutput`] is returned, + otherwise a tuple is returned where the first element is the sample tensor. + + """ + + if isinstance(timestep, (int, torch.IntTensor, torch.LongTensor)): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if not self.is_scale_input_called: + logger.warning( + "The `scale_model_input` function should be called before `step` to ensure correct denoising. " + "See `StableDiffusionPipeline` for a usage example." + ) + + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + + # Upcast to avoid precision issues when computing prev_sample + sample = sample.to(torch.float32) + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + pred_original_sample = sample - sigma * model_output + elif self.config.prediction_type == "v_prediction": + # * c_out + input * c_skip + pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1)) + elif self.config.prediction_type == "sample": + raise NotImplementedError("prediction_type not implemented yet: sample") + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + sigma_from = self.sigmas[self.step_index] + sigma_to = self.sigmas[self.step_index + 1] + sigma_up = (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5 + sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5 + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + + dt = sigma_down - sigma + + prev_sample = sample + derivative * dt + + device = model_output.device + noise = randn_tensor(model_output.shape, dtype=model_output.dtype, device=device, generator=generator) + + prev_sample = prev_sample + noise * sigma_up + + # Cast sample back to model compatible dtype + prev_sample = prev_sample.to(model_output.dtype) + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return EulerAncestralDiscreteSchedulerOutput( + prev_sample=prev_sample, pred_original_sample=pred_original_sample + ) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_discrete.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..484f2ca58e1b4410b9a5d6e4850748306dd81928 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_discrete.py @@ -0,0 +1,907 @@ +# Copyright 2025 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, is_scipy_available, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +if is_scipy_available(): + import scipy.stats + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->EulerDiscrete +class EulerDiscreteSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin): + """ + Euler scheduler. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`Literal["linear", "scaled_linear", "squaredcos_cap_v2"]`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + prediction_type (`Literal["epsilon", "sample", "v_prediction"]`, defaults to `"epsilon"`, *optional*): + Prediction type of the scheduler function; can be `"epsilon"` (predicts the noise of the diffusion + process), `"sample"` (directly predicts the noisy sample`) or `"v_prediction"` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + interpolation_type (`Literal["linear", "log_linear"]`, defaults to `"linear"`, *optional*): + The interpolation type to compute intermediate sigmas for the scheduler denoising steps. Should be one of + `"linear"` or `"log_linear"`. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + sigma_min (`float`, *optional*): + The minimum sigma value for the noise schedule. If not provided, defaults to the last sigma in the + schedule. + sigma_max (`float`, *optional*): + The maximum sigma value for the noise schedule. If not provided, defaults to the first sigma in the + schedule. + timestep_spacing (`Literal["linspace", "leading", "trailing"]`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + timestep_type (`Literal["discrete", "continuous"]`, defaults to `"discrete"`): + The type of timesteps to use. Can be `"discrete"` or `"continuous"`. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + final_sigmas_type (`Literal["zero", "sigma_min"]`, defaults to `"zero"`): + The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final + sigma is the same as the last sigma in the training schedule. If `"zero"`, the final sigma is set to 0. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + prediction_type: str = "epsilon", + interpolation_type: str = "linear", + use_karras_sigmas: bool | None = False, + use_exponential_sigmas: bool | None = False, + use_beta_sigmas: bool | None = False, + sigma_min: float | None = None, + sigma_max: float | None = None, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace", + timestep_type: Literal["discrete", "continuous"] = "discrete", + steps_offset: int = 0, + rescale_betas_zero_snr: bool = False, + final_sigmas_type: Literal["zero", "sigma_min"] = "zero", + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + if rescale_betas_zero_snr: + # Close to 0 without being 0 so first sigma is not inf + # FP16 smallest positive subnormal works well here + self.alphas_cumprod[-1] = 2**-24 + + sigmas = (((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5).flip(0) + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=float)[::-1].copy() + timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32) + + # setable values + self.num_inference_steps = None + + # TODO: Support the full EDM scalings for all prediction types and timestep types + if timestep_type == "continuous" and prediction_type == "v_prediction": + self.timesteps = torch.Tensor([0.25 * sigma.log() for sigma in sigmas]) + else: + self.timesteps = timesteps + + self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) + + self.is_scale_input_called = False + self.use_karras_sigmas = use_karras_sigmas + self.use_exponential_sigmas = use_exponential_sigmas + self.use_beta_sigmas = use_beta_sigmas + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def init_noise_sigma(self) -> float | torch.Tensor: + """ + The standard deviation of the initial noise distribution. + + Returns: + `float` or `torch.Tensor`: + The standard deviation of the initial noise distribution, computed based on the maximum sigma value and + the timestep spacing configuration. + """ + max_sigma = max(self.sigmas) if isinstance(self.sigmas, list) else self.sigmas.max() + if self.config.timestep_spacing in ["linspace", "trailing"]: + return max_sigma + + return (max_sigma**2 + 1) ** 0.5 + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase by 1 after each scheduler step. + + Returns: + `int` or `None`: + The current step index, or `None` if not initialized. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + + Returns: + `int` or `None`: + The begin index for the scheduler, or `None` if not set. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input(self, sample: torch.Tensor, timestep: float | torch.Tensor) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm. + + Args: + sample (`torch.Tensor`): + The input sample to be scaled. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample, divided by `(sigma**2 + 1) ** 0.5`. + """ + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + sample = sample / ((sigma**2 + 1) ** 0.5) + + self.is_scale_input_called = True + return sample + + def set_timesteps( + self, + num_inference_steps: int = None, + device: str | torch.device = None, + timesteps: list[int] | None = None, + sigmas: list[float] | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. If `None`, + `timesteps` or `sigmas` must be provided. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary timesteps schedule. If `None`, timesteps will be generated + based on the `timestep_spacing` attribute. If `timesteps` is passed, `num_inference_steps` and `sigmas` + must be `None`, and `timestep_spacing` attribute will be ignored. + sigmas (`list[float]`, *optional*): + Custom sigmas used to support arbitrary timesteps schedule schedule. If `None`, timesteps and sigmas + will be generated based on the relevant scheduler attributes. If `sigmas` is passed, + `num_inference_steps` and `timesteps` must be `None`, and the timesteps will be generated based on the + custom sigmas schedule. + """ + + if timesteps is not None and sigmas is not None: + raise ValueError("Only one of `timesteps` or `sigmas` should be set.") + if num_inference_steps is None and timesteps is None and sigmas is None: + raise ValueError("Must pass exactly one of `num_inference_steps` or `timesteps` or `sigmas.") + if num_inference_steps is not None and (timesteps is not None or sigmas is not None): + raise ValueError("Can only pass one of `num_inference_steps` or `timesteps` or `sigmas`.") + if timesteps is not None and self.config.use_karras_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_karras_sigmas = True`.") + if timesteps is not None and self.config.use_exponential_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.") + if timesteps is not None and self.config.use_beta_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_beta_sigmas = True`.") + if ( + timesteps is not None + and self.config.timestep_type == "continuous" + and self.config.prediction_type == "v_prediction" + ): + raise ValueError( + "Cannot set `timesteps` with `config.timestep_type = 'continuous'` and `config.prediction_type = 'v_prediction'`." + ) + + if num_inference_steps is None: + num_inference_steps = len(timesteps) if timesteps is not None else len(sigmas) - 1 + self.num_inference_steps = num_inference_steps + + if sigmas is not None: + log_sigmas = np.log(np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)) + sigmas = np.array(sigmas).astype(np.float32) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas[:-1]]) + + else: + if timesteps is not None: + timesteps = np.array(timesteps).astype(np.float32) + else: + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = np.linspace( + 0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=np.float32 + )[::-1].copy() + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = ( + (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.float32) + ) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = ( + (np.arange(self.config.num_train_timesteps, 0, -step_ratio)).round().copy().astype(np.float32) + ) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + if self.config.interpolation_type == "linear": + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + elif self.config.interpolation_type == "log_linear": + sigmas = torch.linspace(np.log(sigmas[-1]), np.log(sigmas[0]), num_inference_steps + 1).exp().numpy() + else: + raise ValueError( + f"{self.config.interpolation_type} is not implemented. Please specify interpolation_type to either" + " 'linear' or 'log_linear'" + ) + + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + + if self.config.final_sigmas_type == "sigma_min": + sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + + sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device) + + # TODO: Support the full EDM scalings for all prediction types and timestep types + if self.config.timestep_type == "continuous" and self.config.prediction_type == "v_prediction": + self.timesteps = torch.Tensor([0.25 * sigma.log() for sigma in sigmas[:-1]]).to(device=device) + else: + self.timesteps = torch.from_numpy(timesteps.astype(np.float32)).to(device=device) + + self._step_index = None + self._begin_index = None + self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication + + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from https://github.com/crowsonkb/k-diffusion/blob/686dbad0f39640ea25c8a8c6a6e56bb40eacefa2/k_diffusion/sampling.py#L17 + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from https://github.com/crowsonkb/k-diffusion/blob/686dbad0f39640ea25c8a8c6a6e56bb40eacefa2/k_diffusion/sampling.py#L26 + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: float | torch.Tensor, + sample: torch.Tensor, + s_churn: float = 0.0, + s_tmin: float = 0.0, + s_tmax: float = float("inf"), + s_noise: float = 1.0, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> EulerDiscreteSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`float` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + s_churn (`float`, *optional*, defaults to `0.0`): + Stochasticity parameter that controls the amount of noise added during sampling. Higher values increase + randomness. + s_tmin (`float`, *optional*, defaults to `0.0`): + Minimum timestep threshold for applying stochasticity. Only timesteps above this value will have noise + added. + s_tmax (`float`, *optional*, defaults to `inf`): + Maximum timestep threshold for applying stochasticity. Only timesteps below this value will have noise + added. + s_noise (`float`, *optional*, defaults to `1.0`): + Scaling factor for noise added to the sample. + generator (`torch.Generator`, *optional*): + A random number generator for reproducible sampling. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or + tuple. + + Returns: + [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`: + If `return_dict` is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor and the second + element is the predicted original sample. + """ + + if isinstance(timestep, (int, torch.IntTensor, torch.LongTensor)): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if not self.is_scale_input_called: + logger.warning( + "The `scale_model_input` function should be called before `step` to ensure correct denoising. " + "See `StableDiffusionPipeline` for a usage example." + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # Upcast to avoid precision issues when computing prev_sample + sample = sample.to(torch.float32) + + sigma = self.sigmas[self.step_index] + + gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 + + sigma_hat = sigma * (gamma + 1) + + if gamma > 0: + noise = randn_tensor( + model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator + ) + eps = noise * s_noise + sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5 + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + # NOTE: "original_sample" should not be an expected prediction_type but is left in for + # backwards compatibility + if self.config.prediction_type == "original_sample" or self.config.prediction_type == "sample": + pred_original_sample = model_output + elif self.config.prediction_type == "epsilon": + pred_original_sample = sample - sigma_hat * model_output + elif self.config.prediction_type == "v_prediction": + # denoised = model_output * c_out + input * c_skip + pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1)) + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma_hat + + dt = self.sigmas[self.step_index + 1] - sigma_hat + + prev_sample = sample + derivative * dt + + # Cast sample back to model compatible dtype + prev_sample = prev_sample.to(model_output.dtype) + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return EulerDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.Tensor) -> torch.Tensor: + """ + Compute the velocity prediction for the given sample and noise at the specified timesteps. + + This method implements the velocity prediction used in v-prediction models, which predicts a linear combination + of the sample and noise. + + Args: + sample (`torch.Tensor`): + The input sample for which to compute the velocity. + noise (`torch.Tensor`): + The noise tensor corresponding to the sample. + timesteps (`torch.Tensor`): + The timesteps at which to compute the velocity. + + Returns: + `torch.Tensor`: + The velocity prediction computed as `sqrt(alpha_prod) * noise - sqrt(1 - alpha_prod) * sample`. + """ + if ( + isinstance(timesteps, int) + or isinstance(timesteps, torch.IntTensor) + or isinstance(timesteps, torch.LongTensor) + ): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `EulerDiscreteScheduler.get_velocity()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if sample.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(sample.device, dtype=torch.float32) + timesteps = timesteps.to(sample.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(sample.device) + timesteps = timesteps.to(sample.device) + + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + alphas_cumprod = self.alphas_cumprod.to(sample) + sqrt_alpha_prod = alphas_cumprod[step_indices] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[step_indices]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_discrete_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_discrete_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..fc555dd21395fea37d607a95fd129dfb90fadca2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_euler_discrete_flax.py @@ -0,0 +1,289 @@ +# Copyright 2025 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +import flax +import jax.numpy as jnp + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging +from .scheduling_utils_flax import ( + CommonSchedulerState, + FlaxKarrasDiffusionSchedulers, + FlaxSchedulerMixin, + FlaxSchedulerOutput, + broadcast_to_shape_from_left, +) + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class EulerDiscreteSchedulerState: + common: CommonSchedulerState + + # setable values + init_noise_sigma: jnp.ndarray + timesteps: jnp.ndarray + sigmas: jnp.ndarray + num_inference_steps: int = None + + @classmethod + def create( + cls, + common: CommonSchedulerState, + init_noise_sigma: jnp.ndarray, + timesteps: jnp.ndarray, + sigmas: jnp.ndarray, + ): + return cls( + common=common, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + sigmas=sigmas, + ) + + +@dataclass +class FlaxEulerDiscreteSchedulerOutput(FlaxSchedulerOutput): + state: EulerDiscreteSchedulerState + + +class FlaxEulerDiscreteScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + Euler scheduler (Algorithm 2) from Karras et al. (2022) https://huggingface.co/papers/2206.00364. . Based on the + original k-diffusion implementation by Katherine Crowson: + https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51 + + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear` or `scaled_linear`. + trained_betas (`jnp.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + prediction_type (`str`, default `epsilon`, optional): + prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion + process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4 + https://huggingface.co/papers/2210.02303) + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + the `dtype` used for params and computation. + """ + + _compatibles = [e.name for e in FlaxKarrasDiffusionSchedulers] + + dtype: jnp.dtype + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: jnp.ndarray | None = None, + prediction_type: str = "epsilon", + timestep_spacing: str = "linspace", + dtype: jnp.dtype = jnp.float32, + ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + self.dtype = dtype + + def create_state(self, common: CommonSchedulerState | None = None) -> EulerDiscreteSchedulerState: + if common is None: + common = CommonSchedulerState.create(self) + + timesteps = jnp.arange(0, self.config.num_train_timesteps).round()[::-1] + sigmas = ((1 - common.alphas_cumprod) / common.alphas_cumprod) ** 0.5 + sigmas = jnp.interp(timesteps, jnp.arange(0, len(sigmas)), sigmas) + sigmas = jnp.concatenate([sigmas, jnp.array([0.0], dtype=self.dtype)]) + + # standard deviation of the initial noise distribution + if self.config.timestep_spacing in ["linspace", "trailing"]: + init_noise_sigma = sigmas.max() + else: + init_noise_sigma = (sigmas.max() ** 2 + 1) ** 0.5 + + return EulerDiscreteSchedulerState.create( + common=common, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + sigmas=sigmas, + ) + + def scale_model_input(self, state: EulerDiscreteSchedulerState, sample: jnp.ndarray, timestep: int) -> jnp.ndarray: + """ + Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the Euler algorithm. + + Args: + state (`EulerDiscreteSchedulerState`): + the `FlaxEulerDiscreteScheduler` state data class instance. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + timestep (`int`): + current discrete timestep in the diffusion chain. + + Returns: + `jnp.ndarray`: scaled input sample + """ + (step_index,) = jnp.where(state.timesteps == timestep, size=1) + step_index = step_index[0] + + sigma = state.sigmas[step_index] + sample = sample / ((sigma**2 + 1) ** 0.5) + return sample + + def set_timesteps( + self, + state: EulerDiscreteSchedulerState, + num_inference_steps: int, + shape: tuple = (), + ) -> EulerDiscreteSchedulerState: + """ + Sets the timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`EulerDiscreteSchedulerState`): + the `FlaxEulerDiscreteScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + """ + + if self.config.timestep_spacing == "linspace": + timesteps = jnp.linspace( + self.config.num_train_timesteps - 1, + 0, + num_inference_steps, + dtype=self.dtype, + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // num_inference_steps + timesteps = (jnp.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(float) + timesteps += 1 + else: + raise ValueError( + f"timestep_spacing must be one of ['linspace', 'leading'], got {self.config.timestep_spacing}" + ) + + sigmas = ((1 - state.common.alphas_cumprod) / state.common.alphas_cumprod) ** 0.5 + sigmas = jnp.interp(timesteps, jnp.arange(0, len(sigmas)), sigmas) + sigmas = jnp.concatenate([sigmas, jnp.array([0.0], dtype=self.dtype)]) + + # standard deviation of the initial noise distribution + if self.config.timestep_spacing in ["linspace", "trailing"]: + init_noise_sigma = sigmas.max() + else: + init_noise_sigma = (sigmas.max() ** 2 + 1) ** 0.5 + + return state.replace( + timesteps=timesteps, + sigmas=sigmas, + num_inference_steps=num_inference_steps, + init_noise_sigma=init_noise_sigma, + ) + + def step( + self, + state: EulerDiscreteSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + return_dict: bool = True, + ) -> FlaxEulerDiscreteSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + state (`EulerDiscreteSchedulerState`): + the `FlaxEulerDiscreteScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + order: coefficient for multi-step inference. + return_dict (`bool`): option for returning tuple rather than FlaxEulerDiscreteScheduler class + + Returns: + [`FlaxEulerDiscreteScheduler`] or `tuple`: [`FlaxEulerDiscreteScheduler`] if `return_dict` is True, + otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + if state.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + (step_index,) = jnp.where(state.timesteps == timestep, size=1) + step_index = step_index[0] + + sigma = state.sigmas[step_index] + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + pred_original_sample = sample - sigma * model_output + elif self.config.prediction_type == "v_prediction": + # * c_out + input * c_skip + pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1)) + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + + # dt = sigma_down - sigma + dt = state.sigmas[step_index + 1] - sigma + + prev_sample = sample + derivative * dt + + if not return_dict: + return (prev_sample, state) + + return FlaxEulerDiscreteSchedulerOutput(prev_sample=prev_sample, state=state) + + def add_noise( + self, + state: EulerDiscreteSchedulerState, + original_samples: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + sigma = state.sigmas[timesteps].flatten() + sigma = broadcast_to_shape_from_left(sigma, noise.shape) + + noisy_samples = original_samples + noise * sigma + + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_euler_discrete.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_euler_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..1021abf0f6f60a96504debd8dea1209a51a12261 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_euler_discrete.py @@ -0,0 +1,655 @@ +# Copyright 2025 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal, Optional, Union + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, is_scipy_available, logging +from .scheduling_utils import SchedulerMixin + + +if is_scipy_available(): + import scipy.stats + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class FlowMatchEulerDiscreteSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.FloatTensor + + +class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin): + """ + Euler scheduler. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + shift (`float`, defaults to 1.0): + The shift value for the timestep schedule. + use_dynamic_shifting (`bool`, defaults to False): + Whether to apply timestep shifting on-the-fly based on the image resolution. + base_shift (`float`, defaults to 0.5): + Value to stabilize image generation. Increasing `base_shift` reduces variation and image is more consistent + with desired output. + max_shift (`float`, defaults to 1.15): + Value change allowed to latent vectors. Increasing `max_shift` encourages more variation and image may be + more exaggerated or stylized. + base_image_seq_len (`int`, defaults to 256): + The base image sequence length. + max_image_seq_len (`int`, defaults to 4096): + The maximum image sequence length. + invert_sigmas (`bool`, defaults to False): + Whether to invert the sigmas. + shift_terminal (`float`, defaults to None): + The end value of the shifted timestep schedule. + use_karras_sigmas (`bool`, defaults to False): + Whether to use Karras sigmas for step sizes in the noise schedule during sampling. + use_exponential_sigmas (`bool`, defaults to False): + Whether to use exponential sigmas for step sizes in the noise schedule during sampling. + use_beta_sigmas (`bool`, defaults to False): + Whether to use beta sigmas for step sizes in the noise schedule during sampling. + time_shift_type (`str`, defaults to "exponential"): + The type of dynamic resolution-dependent timestep shifting to apply. Either "exponential" or "linear". + stochastic_sampling (`bool`, defaults to False): + Whether to use stochastic sampling. + """ + + _compatibles = [] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + shift: float = 1.0, + use_dynamic_shifting: bool = False, + base_shift: float | None = 0.5, + max_shift: float | None = 1.15, + base_image_seq_len: int = 256, + max_image_seq_len: int = 4096, + invert_sigmas: bool = False, + shift_terminal: float = None, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + time_shift_type: Literal["exponential", "linear"] = "exponential", + stochastic_sampling: bool = False, + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if ( + sum( + [ + self.config.use_beta_sigmas, + self.config.use_exponential_sigmas, + self.config.use_karras_sigmas, + ] + ) + > 1 + ): + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if time_shift_type not in {"exponential", "linear"}: + raise ValueError("`time_shift_type` must either be 'exponential' or 'linear'.") + + timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32)[::-1].copy() + timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32) + + sigmas = timesteps / num_train_timesteps + if not use_dynamic_shifting: + # when use_dynamic_shifting is True, we apply the timestep shifting on the fly based on the image resolution + sigmas = shift * sigmas / (1 + (shift - 1) * sigmas) + + self.timesteps = sigmas * num_train_timesteps + + self._step_index = None + self._begin_index = None + + self._shift = shift + + self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication + self.sigma_min = self.sigmas[-1].item() + self.sigma_max = self.sigmas[0].item() + + @property + def shift(self): + """ + The value used for shifting. + """ + return self._shift + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_shift(self, shift: float): + """ + Sets the shift value for the scheduler. + + Args: + shift (`float`): + The shift value to be set. + """ + self._shift = shift + + def scale_noise( + self, + sample: torch.FloatTensor, + timestep: float | torch.FloatTensor, + noise: torch.FloatTensor | None = None, + ) -> torch.FloatTensor: + """ + Forward process in flow-matching + + Args: + sample (`torch.FloatTensor`): + The input sample. + timestep (`torch.FloatTensor`): + The current timestep in the diffusion chain. + noise (`torch.FloatTensor`): + The noise tensor. + + Returns: + `torch.FloatTensor`: + A scaled input sample. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=sample.device, dtype=sample.dtype) + + if sample.device.type == "mps" and torch.is_floating_point(timestep): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(sample.device, dtype=torch.float32) + timestep = timestep.to(sample.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(sample.device) + timestep = timestep.to(sample.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timestep] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timestep.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timestep.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(sample.shape): + sigma = sigma.unsqueeze(-1) + + sample = sigma * noise + (1.0 - sigma) * sample + + return sample + + def _sigma_to_t(self, sigma) -> float: + return sigma * self.config.num_train_timesteps + + def time_shift(self, mu: float, sigma: float, t: torch.Tensor) -> torch.Tensor: + """ + Apply time shifting to the sigmas. + + Args: + mu (`float`): + The mu parameter for the time shift. + sigma (`float`): + The sigma parameter for the time shift. + t (`torch.Tensor`): + The input timesteps. + + Returns: + `torch.Tensor`: + The time-shifted timesteps. + """ + if self.config.time_shift_type == "exponential": + return self._time_shift_exponential(mu, sigma, t) + elif self.config.time_shift_type == "linear": + return self._time_shift_linear(mu, sigma, t) + + def stretch_shift_to_terminal(self, t: torch.Tensor) -> torch.Tensor: + r""" + Stretches and shifts the timestep schedule to ensure it terminates at the configured `shift_terminal` config + value. + + Reference: + https://github.com/Lightricks/LTX-Video/blob/a01a171f8fe3d99dce2728d60a73fecf4d4238ae/ltx_video/schedulers/rf.py#L51 + + Args: + t (`torch.Tensor`): + A tensor of timesteps to be stretched and shifted. + + Returns: + `torch.Tensor`: + A tensor of adjusted timesteps such that the final value equals `self.config.shift_terminal`. + """ + one_minus_z = 1 - t + scale_factor = one_minus_z[-1] / (1 - self.config.shift_terminal) + stretched_t = 1 - (one_minus_z / scale_factor) + return stretched_t + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device = None, + sigmas: list[float] | None = None, + mu: float | None = None, + timesteps: list[float] | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + sigmas (`list[float]`, *optional*): + Custom values for sigmas to be used for each diffusion step. If `None`, the sigmas are computed + automatically. + mu (`float`, *optional*): + Determines the amount of shifting applied to sigmas when performing resolution-dependent timestep + shifting. + timesteps (`list[float]`, *optional*): + Custom values for timesteps to be used for each diffusion step. If `None`, the timesteps are computed + automatically. + """ + if self.config.use_dynamic_shifting and mu is None: + raise ValueError("`mu` must be passed when `use_dynamic_shifting` is set to be `True`") + + if sigmas is not None and timesteps is not None: + if len(sigmas) != len(timesteps): + raise ValueError("`sigmas` and `timesteps` should have the same length") + + if num_inference_steps is not None: + if (sigmas is not None and len(sigmas) != num_inference_steps) or ( + timesteps is not None and len(timesteps) != num_inference_steps + ): + raise ValueError( + "`sigmas` and `timesteps` should have the same length as num_inference_steps, if `num_inference_steps` is provided" + ) + else: + num_inference_steps = len(sigmas) if sigmas is not None else len(timesteps) + + self.num_inference_steps = num_inference_steps + + # 1. Prepare default sigmas + is_timesteps_provided = timesteps is not None + + if is_timesteps_provided: + timesteps = np.array(timesteps).astype(np.float32) + + if sigmas is None: + if timesteps is None: + timesteps = np.linspace( + self._sigma_to_t(self.sigma_max), + self._sigma_to_t(self.sigma_min), + num_inference_steps, + ) + sigmas = timesteps / self.config.num_train_timesteps + else: + sigmas = np.array(sigmas).astype(np.float32) + num_inference_steps = len(sigmas) + + # 2. Perform timestep shifting. Either no shifting is applied, or resolution-dependent shifting of + # "exponential" or "linear" type is applied + if self.config.use_dynamic_shifting: + sigmas = self.time_shift(mu, 1.0, sigmas) + else: + sigmas = self.shift * sigmas / (1 + (self.shift - 1) * sigmas) + + # 3. If required, stretch the sigmas schedule to terminate at the configured `shift_terminal` value + if self.config.shift_terminal: + sigmas = self.stretch_shift_to_terminal(sigmas) + + # 4. If required, convert sigmas to one of karras, exponential, or beta sigma schedules + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + + # 5. Convert sigmas and timesteps to tensors and move to specified device + sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device) + if not is_timesteps_provided: + timesteps = sigmas * self.config.num_train_timesteps + else: + timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32, device=device) + + # 6. Append the terminal sigma value. + # If a model requires inverted sigma schedule for denoising but timesteps without inversion, the + # `invert_sigmas` flag can be set to `True`. This case is only required in Mochi + if self.config.invert_sigmas: + sigmas = 1.0 - sigmas + timesteps = sigmas * self.config.num_train_timesteps + sigmas = torch.cat([sigmas, torch.ones(1, device=sigmas.device)]) + else: + sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) + + self.timesteps = timesteps + self.sigmas = sigmas + self._step_index = None + self._begin_index = None + + def index_for_timestep( + self, + timestep: Union[float, torch.FloatTensor], + schedule_timesteps: Optional[torch.FloatTensor] = None, + ) -> int: + """ + Get the index for the given timestep. + + Args: + timestep (`float` or `torch.FloatTensor`): + The timestep to find the index for. + schedule_timesteps (`torch.FloatTensor`, *optional*): + The schedule timesteps to validate against. If `None`, the scheduler's timesteps are used. + + Returns: + `int`: + The index of the timestep. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + def _init_step_index(self, timestep: Union[float, torch.FloatTensor]) -> None: + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.FloatTensor, + timestep: float | torch.FloatTensor, + sample: torch.FloatTensor, + s_churn: float = 0.0, + s_tmin: float = 0.0, + s_tmax: float = float("inf"), + s_noise: float = 1.0, + generator: torch.Generator | None = None, + per_token_timesteps: torch.Tensor | None = None, + return_dict: bool = True, + ) -> FlowMatchEulerDiscreteSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.FloatTensor`): + The direct output from learned diffusion model. + timestep (`float`): + The current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + s_churn (`float`): + s_tmin (`float`): + s_tmax (`float`): + s_noise (`float`, defaults to 1.0): + Scaling factor for noise added to the sample. + generator (`torch.Generator`, *optional*): + A random number generator. + per_token_timesteps (`torch.Tensor`, *optional*): + The timesteps for each token in the sample. + return_dict (`bool`, defaults to `True`): + Whether or not to return a + [`~schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteSchedulerOutput`] or tuple. + + Returns: + [`~schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteSchedulerOutput`] or `tuple`: + If return_dict is `True`, + [`~schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteSchedulerOutput`] is returned, + otherwise a tuple is returned where the first element is the sample tensor. + """ + + if ( + isinstance(timestep, int) + or isinstance(timestep, torch.IntTensor) + or isinstance(timestep, torch.LongTensor) + ): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `FlowMatchEulerDiscreteScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # Upcast to avoid precision issues when computing prev_sample + sample = sample.to(torch.float32) + + if per_token_timesteps is not None: + per_token_sigmas = per_token_timesteps / self.config.num_train_timesteps + + sigmas = self.sigmas[:, None, None] + lower_mask = sigmas < per_token_sigmas[None] - 1e-6 + lower_sigmas = lower_mask * sigmas + lower_sigmas, _ = lower_sigmas.max(dim=0) + + current_sigma = per_token_sigmas[..., None] + next_sigma = lower_sigmas[..., None] + dt = current_sigma - next_sigma + else: + sigma_idx = self.step_index + sigma = self.sigmas[sigma_idx] + sigma_next = self.sigmas[sigma_idx + 1] + + current_sigma = sigma + next_sigma = sigma_next + dt = sigma_next - sigma + + if self.config.stochastic_sampling: + x0 = sample - current_sigma * model_output + noise = torch.randn_like(sample) + prev_sample = (1.0 - next_sigma) * x0 + next_sigma * noise + else: + prev_sample = sample + dt * model_output + + # upon completion increase step index by one + self._step_index += 1 + if per_token_timesteps is None: + # Cast sample back to model compatible dtype + prev_sample = prev_sample.to(model_output.dtype) + + if not return_dict: + return (prev_sample,) + + return FlowMatchEulerDiscreteSchedulerOutput(prev_sample=prev_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def _time_shift_exponential(self, mu: float, sigma: float, t: torch.Tensor) -> torch.Tensor: + return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma) + + def _time_shift_linear(self, mu: float, sigma: float, t: torch.Tensor) -> torch.Tensor: + return mu / (mu + (1 / t - 1) ** sigma) + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_heun_discrete.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_heun_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..49892e043abd438d2a73a5e8d85bdc495e101815 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_heun_discrete.py @@ -0,0 +1,355 @@ +# Copyright 2025 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class FlowMatchHeunDiscreteSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.FloatTensor + + +class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin): + """ + Heun scheduler. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + shift (`float`, defaults to 1.0): + The shift value for the timestep schedule. + """ + + _compatibles = [] + order = 2 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + shift: float = 1.0, + ): + timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32)[::-1].copy() + timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32) + + sigmas = timesteps / num_train_timesteps + sigmas = shift * sigmas / (1 + (shift - 1) * sigmas) + + self.timesteps = sigmas * num_train_timesteps + + self._step_index = None + self._begin_index = None + + self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication + self.sigma_min = self.sigmas[-1].item() + self.sigma_max = self.sigmas[0].item() + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_noise( + self, + sample: torch.FloatTensor, + timestep: float | torch.FloatTensor, + noise: torch.FloatTensor, + ) -> torch.FloatTensor: + """ + Forward process in flow-matching + + Args: + sample (`torch.FloatTensor`): + The input sample. + timestep (`float` or `torch.FloatTensor`): + The current timestep in the diffusion chain. + noise (`torch.FloatTensor`): + The noise tensor. + + Returns: + `torch.FloatTensor`: + A scaled input sample. + """ + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + + sample = sigma * noise + (1.0 - sigma) * sample + + return sample + + def _sigma_to_t(self, sigma: float) -> float: + return sigma * self.config.num_train_timesteps + + def set_timesteps( + self, + num_inference_steps: int, + device: str | torch.device = None, + ) -> None: + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps + + timesteps = np.linspace( + self._sigma_to_t(self.sigma_max), + self._sigma_to_t(self.sigma_min), + num_inference_steps, + ) + + sigmas = timesteps / self.config.num_train_timesteps + sigmas = self.config.shift * sigmas / (1 + (self.config.shift - 1) * sigmas) + sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device) + + timesteps = sigmas * self.config.num_train_timesteps + timesteps = torch.cat([timesteps[:1], timesteps[1:].repeat_interleave(2)]) + self.timesteps = timesteps.to(device=device) + + sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) + self.sigmas = torch.cat([sigmas[:1], sigmas[1:-1].repeat_interleave(2), sigmas[-1:]]) + + # empty dt and derivative + self.prev_derivative = None + self.dt = None + + self._step_index = None + self._begin_index = None + + def index_for_timestep( + self, + timestep: float | torch.FloatTensor, + schedule_timesteps: torch.FloatTensor | None = None, + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.FloatTensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.FloatTensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + def _init_step_index(self, timestep: float | torch.FloatTensor) -> None: + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + @property + def state_in_first_order(self) -> bool: + """ + Returns whether the scheduler is in the first-order state. + """ + return self.dt is None + + def step( + self, + model_output: torch.FloatTensor, + timestep: float | torch.FloatTensor, + sample: torch.FloatTensor, + s_churn: float = 0.0, + s_tmin: float = 0.0, + s_tmax: float = float("inf"), + s_noise: float = 1.0, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> FlowMatchHeunDiscreteSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.FloatTensor`): + The direct output from learned diffusion model. + timestep (`float` or `torch.FloatTensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + s_churn (`float`): + Stochasticity parameter that controls the amount of noise added during sampling. Higher values increase + randomness. + s_tmin (`float`): + Minimum timestep threshold for applying stochasticity. Only timesteps above this value will have noise + added. + s_tmax (`float`): + Maximum timestep threshold for applying stochasticity. Only timesteps below this value will have noise + added. + s_noise (`float`, defaults to 1.0): + Scaling factor for noise added to the sample. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`): + Whether or not to return a + [`~schedulers.scheduling_flow_match_heun_discrete.FlowMatchHeunDiscreteSchedulerOutput`] tuple. + + Returns: + [`~schedulers.scheduling_flow_match_heun_discrete.FlowMatchHeunDiscreteSchedulerOutput`] or `tuple`: + If return_dict is `True`, + [`~schedulers.scheduling_flow_match_heun_discrete.FlowMatchHeunDiscreteSchedulerOutput`] is returned, + otherwise a tuple is returned where the first element is the sample tensor. + """ + + if ( + isinstance(timestep, int) + or isinstance(timestep, torch.IntTensor) + or isinstance(timestep, torch.LongTensor) + ): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `FlowMatchHeunDiscreteScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # Upcast to avoid precision issues when computing prev_sample + sample = sample.to(torch.float32) + + if self.state_in_first_order: + sigma = self.sigmas[self.step_index] + sigma_next = self.sigmas[self.step_index + 1] + else: + # 2nd order / Heun's method + sigma = self.sigmas[self.step_index - 1] + sigma_next = self.sigmas[self.step_index] + + gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 + + sigma_hat = sigma * (gamma + 1) + + if gamma > 0: + noise = randn_tensor( + model_output.shape, + dtype=model_output.dtype, + device=model_output.device, + generator=generator, + ) + eps = noise * s_noise + sample = sample + eps * (sigma_hat**2 - sigma**2) ** 0.5 + + if self.state_in_first_order: + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + denoised = sample - model_output * sigma + # 2. convert to an ODE derivative for 1st order + derivative = (sample - denoised) / sigma_hat + # 3. Delta timestep + dt = sigma_next - sigma_hat + + # store for 2nd order step + self.prev_derivative = derivative + self.dt = dt + self.sample = sample + else: + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + denoised = sample - model_output * sigma_next + # 2. 2nd order / Heun's method + derivative = (sample - denoised) / sigma_next + derivative = 0.5 * (self.prev_derivative + derivative) + + # 3. take prev timestep & sample + dt = self.dt + sample = self.sample + + # free dt and derivative + # Note, this puts the scheduler in "first order mode" + self.prev_derivative = None + self.dt = None + self.sample = None + + prev_sample = sample + derivative * dt + # Cast sample back to model compatible dtype + prev_sample = prev_sample.to(model_output.dtype) + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return FlowMatchHeunDiscreteSchedulerOutput(prev_sample=prev_sample) + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_lcm.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_lcm.py new file mode 100644 index 0000000000000000000000000000000000000000..97d4ebbc8e427bd528303bd91d35a3204e9941e2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_flow_match_lcm.py @@ -0,0 +1,640 @@ +# Copyright 2025 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, is_scipy_available, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +if is_scipy_available(): + import scipy.stats + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class FlowMatchLCMSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.Tensor + + +class FlowMatchLCMScheduler(SchedulerMixin, ConfigMixin): + """ + LCM scheduler for Flow Matching. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + shift (`float`, defaults to 1.0): + The shift value for the timestep schedule. + use_dynamic_shifting (`bool`, defaults to False): + Whether to apply timestep shifting on-the-fly based on the image resolution. + base_shift (`float`, defaults to 0.5): + Value to stabilize image generation. Increasing `base_shift` reduces variation and image is more consistent + with desired output. + max_shift (`float`, defaults to 1.15): + Value change allowed to latent vectors. Increasing `max_shift` encourages more variation and image may be + more exaggerated or stylized. + base_image_seq_len (`int`, defaults to 256): + The base image sequence length. + max_image_seq_len (`int`, defaults to 4096): + The maximum image sequence length. + invert_sigmas (`bool`, defaults to False): + Whether to invert the sigmas. + shift_terminal (`float`, defaults to None): + The end value of the shifted timestep schedule. + use_karras_sigmas (`bool`, defaults to False): + Whether to use Karras sigmas for step sizes in the noise schedule during sampling. + use_exponential_sigmas (`bool`, defaults to False): + Whether to use exponential sigmas for step sizes in the noise schedule during sampling. + use_beta_sigmas (`bool`, defaults to False): + Whether to use beta sigmas for step sizes in the noise schedule during sampling. + time_shift_type (`str`, defaults to "exponential"): + The type of dynamic resolution-dependent timestep shifting to apply. + scale_factors (`list[float]`, *optional*, defaults to `None`): + It defines how to scale the latents at which predictions are made. + upscale_mode (`str`, *optional*, defaults to "bicubic"): + Upscaling method, applied if scale-wise generation is considered. + """ + + _compatibles = [] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + shift: float = 1.0, + use_dynamic_shifting: bool = False, + base_shift: float = 0.5, + max_shift: float = 1.15, + base_image_seq_len: int = 256, + max_image_seq_len: int = 4096, + invert_sigmas: bool = False, + shift_terminal: float | None = None, + use_karras_sigmas: bool | None = False, + use_exponential_sigmas: bool | None = False, + use_beta_sigmas: bool | None = False, + time_shift_type: Literal["exponential", "linear"] = "exponential", + scale_factors: list[float] | None = None, + upscale_mode: Literal[ + "nearest", + "linear", + "bilinear", + "bicubic", + "trilinear", + "area", + "nearest-exact", + ] = "bicubic", + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if ( + sum( + [ + self.config.use_beta_sigmas, + self.config.use_exponential_sigmas, + self.config.use_karras_sigmas, + ] + ) + > 1 + ): + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if time_shift_type not in {"exponential", "linear"}: + raise ValueError("`time_shift_type` must either be 'exponential' or 'linear'.") + + timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32)[::-1].copy() + timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32) + + sigmas = timesteps / num_train_timesteps + if not use_dynamic_shifting: + # when use_dynamic_shifting is True, we apply the timestep shifting on the fly based on the image resolution + sigmas = shift * sigmas / (1 + (shift - 1) * sigmas) + + self.timesteps = sigmas * num_train_timesteps + + self._step_index = None + self._begin_index = None + + self._shift = shift + + self._init_size = None + self._scale_factors = scale_factors + self._upscale_mode = upscale_mode + + self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication + self.sigma_min = self.sigmas[-1].item() + self.sigma_max = self.sigmas[0].item() + + @property + def shift(self): + """ + The value used for shifting. + """ + return self._shift + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_shift(self, shift: float) -> None: + self._shift = shift + + def set_scale_factors(self, scale_factors: list[float], upscale_mode: str) -> None: + """ + Sets scale factors for a scale-wise generation regime. + + Args: + scale_factors (`list[float]`): + The scale factors for each step. + upscale_mode (`str`): + Upscaling method. + """ + self._scale_factors = scale_factors + self._upscale_mode = upscale_mode + + def scale_noise( + self, + sample: torch.FloatTensor, + timestep: float | torch.FloatTensor, + noise: torch.FloatTensor | None = None, + ) -> torch.FloatTensor: + """ + Forward process in flow-matching + + Args: + sample (`torch.FloatTensor`): + The input sample. + timestep (`torch.FloatTensor`): + The current timestep in the diffusion chain. + noise (`torch.FloatTensor`): + The noise tensor. + + Returns: + `torch.FloatTensor`: + A scaled input sample. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=sample.device, dtype=sample.dtype) + + if sample.device.type == "mps" and torch.is_floating_point(timestep): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(sample.device, dtype=torch.float32) + timestep = timestep.to(sample.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(sample.device) + timestep = timestep.to(sample.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timestep] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timestep.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timestep.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(sample.shape): + sigma = sigma.unsqueeze(-1) + + sample = sigma * noise + (1.0 - sigma) * sample + + return sample + + def _sigma_to_t(self, sigma: float | torch.FloatTensor) -> float | torch.FloatTensor: + return sigma * self.config.num_train_timesteps + + def time_shift( + self, mu: float, sigma: float, t: float | np.ndarray | torch.Tensor + ) -> float | np.ndarray | torch.Tensor: + if self.config.time_shift_type == "exponential": + return self._time_shift_exponential(mu, sigma, t) + elif self.config.time_shift_type == "linear": + return self._time_shift_linear(mu, sigma, t) + + def stretch_shift_to_terminal(self, t: np.ndarray | torch.Tensor) -> np.ndarray | torch.Tensor: + r""" + Stretches and shifts the timestep schedule to ensure it terminates at the configured `shift_terminal` config + value. + + Reference: + https://github.com/Lightricks/LTX-Video/blob/a01a171f8fe3d99dce2728d60a73fecf4d4238ae/ltx_video/schedulers/rf.py#L51 + + Args: + t (`torch.Tensor` or `np.ndarray`): + A tensor or numpy array of timesteps to be stretched and shifted. + + Returns: + `torch.Tensor` or `np.ndarray`: + A tensor or numpy array of adjusted timesteps such that the final value equals + `self.config.shift_terminal`. + """ + one_minus_z = 1 - t + scale_factor = one_minus_z[-1] / (1 - self.config.shift_terminal) + stretched_t = 1 - (one_minus_z / scale_factor) + return stretched_t + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device | None = None, + sigmas: list[float] | None = None, + mu: float | None = None, + timesteps: list[float] | None = None, + ) -> None: + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + sigmas (`list[float]`, *optional*): + Custom values for sigmas to be used for each diffusion step. If `None`, the sigmas are computed + automatically. + mu (`float`, *optional*): + Determines the amount of shifting applied to sigmas when performing resolution-dependent timestep + shifting. + timesteps (`list[float]`, *optional*): + Custom values for timesteps to be used for each diffusion step. If `None`, the timesteps are computed + automatically. + """ + if self.config.use_dynamic_shifting and mu is None: + raise ValueError("`mu` must be passed when `use_dynamic_shifting` is set to be `True`") + + if sigmas is not None and timesteps is not None: + if len(sigmas) != len(timesteps): + raise ValueError("`sigmas` and `timesteps` should have the same length") + + if num_inference_steps is not None: + if (sigmas is not None and len(sigmas) != num_inference_steps) or ( + timesteps is not None and len(timesteps) != num_inference_steps + ): + raise ValueError( + "`sigmas` and `timesteps` should have the same length as num_inference_steps, if `num_inference_steps` is provided" + ) + else: + num_inference_steps = len(sigmas) if sigmas is not None else len(timesteps) + + self.num_inference_steps = num_inference_steps + + # 1. Prepare default sigmas + is_timesteps_provided = timesteps is not None + + if is_timesteps_provided: + timesteps = np.array(timesteps).astype(np.float32) # type: ignore + + if sigmas is None: + if timesteps is None: + timesteps = np.linspace( # type: ignore + self._sigma_to_t(self.sigma_max), + self._sigma_to_t(self.sigma_min), + num_inference_steps, + ) + sigmas = timesteps / self.config.num_train_timesteps # type: ignore + else: + sigmas = np.array(sigmas).astype(np.float32) # type: ignore + num_inference_steps = len(sigmas) + + # 2. Perform timestep shifting. Either no shifting is applied, or resolution-dependent shifting of + # "exponential" or "linear" type is applied + if self.config.use_dynamic_shifting: + sigmas = self.time_shift(mu, 1.0, sigmas) # type: ignore + else: + sigmas = self.shift * sigmas / (1 + (self.shift - 1) * sigmas) # type: ignore + + # 3. If required, stretch the sigmas schedule to terminate at the configured `shift_terminal` value + if self.config.shift_terminal: + sigmas = self.stretch_shift_to_terminal(sigmas) # type: ignore + + # 4. If required, convert sigmas to one of karras, exponential, or beta sigma schedules + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) # type: ignore + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) # type: ignore + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) # type: ignore + + # 5. Convert sigmas and timesteps to tensors and move to specified device + sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device) # type: ignore + if not is_timesteps_provided: + timesteps = sigmas * self.config.num_train_timesteps # type: ignore + else: + timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32, device=device) # type: ignore + + # 6. Append the terminal sigma value. + # If a model requires inverted sigma schedule for denoising but timesteps without inversion, the + # `invert_sigmas` flag can be set to `True`. This case is only required in Mochi + if self.config.invert_sigmas: + sigmas = 1.0 - sigmas + timesteps = sigmas * self.config.num_train_timesteps + sigmas = torch.cat([sigmas, torch.ones(1, device=sigmas.device)]) + else: + sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) + + self.timesteps = timesteps + self.sigmas = sigmas + self._step_index = None + self._begin_index = None + + def index_for_timestep( + self, + timestep: float | torch.Tensor, + schedule_timesteps: torch.Tensor | None = None, + ) -> int: + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return int(indices[pos].item()) + + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.FloatTensor, + timestep: float | torch.FloatTensor, + sample: torch.FloatTensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> FlowMatchLCMSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.FloatTensor`): + The direct output from learned diffusion model. + timestep (`float`): + The current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_flow_match_lcm.FlowMatchLCMSchedulerOutput`] or + tuple. + + Returns: + [`~schedulers.scheduling_flow_match_lcm.FlowMatchLCMSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_flow_match_lcm.FlowMatchLCMSchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor. + """ + + if ( + isinstance(timestep, int) + or isinstance(timestep, torch.IntTensor) + or isinstance(timestep, torch.LongTensor) + ): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `FlowMatchLCMScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if self._scale_factors and self._upscale_mode and len(self.timesteps) != len(self._scale_factors) + 1: + raise ValueError( + "`_scale_factors` should have the same length as `timesteps` - 1, if `_scale_factors` are set." + ) + + if self._init_size is None or self.step_index is None: + self._init_size = model_output.size()[2:] + + if self.step_index is None: + self._init_step_index(timestep) + + # Upcast to avoid precision issues when computing prev_sample + sample = sample.to(torch.float32) + + sigma = self.sigmas[self.step_index] + sigma_next = self.sigmas[self.step_index + 1] + x0_pred = sample - sigma * model_output + + if self._scale_factors and self._upscale_mode: + if self._step_index < len(self._scale_factors): + size = [round(self._scale_factors[self._step_index] * size) for size in self._init_size] + x0_pred = torch.nn.functional.interpolate(x0_pred, size=size, mode=self._upscale_mode) + + noise = randn_tensor( + x0_pred.shape, + generator=generator, + device=x0_pred.device, + dtype=x0_pred.dtype, + ) + prev_sample = (1 - sigma_next) * x0_pred + sigma_next * noise + + # upon completion increase step index by one + self._step_index += 1 + # Cast sample back to model compatible dtype + prev_sample = prev_sample.to(model_output.dtype) + + if not return_dict: + return (prev_sample,) + + return FlowMatchLCMSchedulerOutput(prev_sample=prev_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def _time_shift_exponential( + self, mu: float, sigma: float, t: float | np.ndarray | torch.Tensor + ) -> float | np.ndarray | torch.Tensor: + return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma) + + def _time_shift_linear( + self, mu: float, sigma: float, t: float | np.ndarray | torch.Tensor + ) -> float | np.ndarray | torch.Tensor: + return mu / (mu + (1 / t - 1) ** sigma) + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_helios.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_helios.py new file mode 100644 index 0000000000000000000000000000000000000000..ed35245c9db322af5b75a1025f8502fdfba3503c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_helios.py @@ -0,0 +1,867 @@ +# Copyright 2025 The Helios Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..schedulers.scheduling_utils import SchedulerMixin +from ..utils import BaseOutput, deprecate + + +@dataclass +class HeliosSchedulerOutput(BaseOutput): + prev_sample: torch.FloatTensor + model_outputs: torch.FloatTensor | None = None + last_sample: torch.FloatTensor | None = None + this_order: int | None = None + + +class HeliosScheduler(SchedulerMixin, ConfigMixin): + _compatibles = [] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + shift: float = 1.0, # Following Stable diffusion 3, + stages: int = 3, + stage_range: list = [0, 1 / 3, 2 / 3, 1], + gamma: float = 1 / 3, + # For UniPC + thresholding: bool = False, + prediction_type: str = "flow_prediction", + solver_order: int = 2, + predict_x0: bool = True, + solver_type: str = "bh2", + lower_order_final: bool = True, + disable_corrector: list[int] = [], + solver_p: SchedulerMixin = None, + use_flow_sigmas: bool = True, + scheduler_type: str = "unipc", # ["euler", "unipc"] + use_dynamic_shifting: bool = False, + time_shift_type: Literal["exponential", "linear"] = "exponential", + ): + self.timestep_ratios = {} # The timestep ratio for each stage + self.timesteps_per_stage = {} # The detailed timesteps per stage (fix max and min per stage) + self.sigmas_per_stage = {} # always uniform [1000, 0] + self.start_sigmas = {} # for start point / upsample renoise + self.end_sigmas = {} # for end point + self.ori_start_sigmas = {} + + # self.init_sigmas() + self.init_sigmas_for_each_stage() + self.sigma_min = self.sigmas[-1].item() + self.sigma_max = self.sigmas[0].item() + self.gamma = gamma + + if solver_type not in ["bh1", "bh2"]: + if solver_type in ["midpoint", "heun", "logrho"]: + self.register_to_config(solver_type="bh2") + else: + raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}") + + self.predict_x0 = predict_x0 + self.model_outputs = [None] * solver_order + self.timestep_list = [None] * solver_order + self.lower_order_nums = 0 + self.disable_corrector = disable_corrector + self.solver_p = solver_p + self.last_sample = None + self._step_index = None + self._begin_index = None + + def init_sigmas(self): + """ + initialize the global timesteps and sigmas + """ + num_train_timesteps = self.config.num_train_timesteps + shift = self.config.shift + + alphas = np.linspace(1, 1 / num_train_timesteps, num_train_timesteps + 1) + sigmas = 1.0 - alphas + sigmas = np.flip(shift * sigmas / (1 + (shift - 1) * sigmas))[:-1].copy() + sigmas = torch.from_numpy(sigmas) + timesteps = (sigmas * num_train_timesteps).clone() + + self._step_index = None + self._begin_index = None + self.timesteps = timesteps + self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication + + def init_sigmas_for_each_stage(self): + """ + Init the timesteps for each stage + """ + self.init_sigmas() + + stage_distance = [] + stages = self.config.stages + training_steps = self.config.num_train_timesteps + stage_range = self.config.stage_range + + # Init the start and end point of each stage + for i_s in range(stages): + # To decide the start and ends point + start_indice = int(stage_range[i_s] * training_steps) + start_indice = max(start_indice, 0) + end_indice = int(stage_range[i_s + 1] * training_steps) + end_indice = min(end_indice, training_steps) + start_sigma = self.sigmas[start_indice].item() + end_sigma = self.sigmas[end_indice].item() if end_indice < training_steps else 0.0 + self.ori_start_sigmas[i_s] = start_sigma + + if i_s != 0: + ori_sigma = 1 - start_sigma + gamma = self.config.gamma + corrected_sigma = (1 / (math.sqrt(1 + (1 / gamma)) * (1 - ori_sigma) + ori_sigma)) * ori_sigma + # corrected_sigma = 1 / (2 - ori_sigma) * ori_sigma + start_sigma = 1 - corrected_sigma + + stage_distance.append(start_sigma - end_sigma) + self.start_sigmas[i_s] = start_sigma + self.end_sigmas[i_s] = end_sigma + + # Determine the ratio of each stage according to flow length + tot_distance = sum(stage_distance) + for i_s in range(stages): + if i_s == 0: + start_ratio = 0.0 + else: + start_ratio = sum(stage_distance[:i_s]) / tot_distance + if i_s == stages - 1: + end_ratio = 0.9999999999999999 + else: + end_ratio = sum(stage_distance[: i_s + 1]) / tot_distance + + self.timestep_ratios[i_s] = (start_ratio, end_ratio) + + # Determine the timesteps and sigmas for each stage + for i_s in range(stages): + timestep_ratio = self.timestep_ratios[i_s] + # timestep_max = self.timesteps[int(timestep_ratio[0] * training_steps)] + timestep_max = min(self.timesteps[int(timestep_ratio[0] * training_steps)], 999) + timestep_min = self.timesteps[min(int(timestep_ratio[1] * training_steps), training_steps - 1)] + timesteps = np.linspace(timestep_max, timestep_min, training_steps + 1) + self.timesteps_per_stage[i_s] = ( + timesteps[:-1] if isinstance(timesteps, torch.Tensor) else torch.from_numpy(timesteps[:-1]) + ) + stage_sigmas = np.linspace(0.999, 0, training_steps + 1) + self.sigmas_per_stage[i_s] = torch.from_numpy(stage_sigmas[:-1]) + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def _sigma_to_t(self, sigma): + return sigma * self.config.num_train_timesteps + + def set_timesteps( + self, + num_inference_steps: int, + stage_index: int | None = None, + device: str | torch.device = None, + sigmas: bool | None = None, + mu: bool | None = None, + is_amplify_first_chunk: bool = False, + ): + """ + Setting the timesteps and sigmas for each stage + """ + if self.config.scheduler_type == "dmd": + if is_amplify_first_chunk: + num_inference_steps = num_inference_steps * 2 + 1 + else: + num_inference_steps = num_inference_steps + 1 + + self.num_inference_steps = num_inference_steps + self.init_sigmas() + + if self.config.stages == 1: + if sigmas is None: + sigmas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)[:-1].astype( + np.float32 + ) + if self.config.shift != 1.0: + assert not self.config.use_dynamic_shifting + sigmas = self.time_shift(self.config.shift, 1.0, sigmas) + timesteps = (sigmas * self.config.num_train_timesteps).copy() + sigmas = torch.from_numpy(sigmas) + else: + stage_timesteps = self.timesteps_per_stage[stage_index] + timesteps = np.linspace( + stage_timesteps[0].item(), + stage_timesteps[-1].item(), + num_inference_steps, + ) + + stage_sigmas = self.sigmas_per_stage[stage_index] + ratios = np.linspace(stage_sigmas[0].item(), stage_sigmas[-1].item(), num_inference_steps) + sigmas = torch.from_numpy(ratios) + + self.timesteps = torch.from_numpy(timesteps).to(device=device) + self.sigmas = torch.cat([sigmas, torch.zeros(1)]).to(device=device) + + self._step_index = None + self.reset_scheduler_history() + + if self.config.scheduler_type == "dmd": + self.timesteps = self.timesteps[:-1] + self.sigmas = torch.cat([self.sigmas[:-2], self.sigmas[-1:]]) + + if self.config.use_dynamic_shifting: + assert self.config.shift == 1.0 + self.sigmas = self.time_shift(mu, 1.0, self.sigmas) + if self.config.stages == 1: + self.timesteps = self.sigmas[:-1] * self.config.num_train_timesteps + else: + self.timesteps = self.timesteps_per_stage[stage_index].min() + self.sigmas[:-1] * ( + self.timesteps_per_stage[stage_index].max() - self.timesteps_per_stage[stage_index].min() + ) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.time_shift + def time_shift(self, mu: float, sigma: float, t: torch.Tensor): + """ + Apply time shifting to the sigmas. + + Args: + mu (`float`): + The mu parameter for the time shift. + sigma (`float`): + The sigma parameter for the time shift. + t (`torch.Tensor`): + The input timesteps. + + Returns: + `torch.Tensor`: + The time-shifted timesteps. + """ + if self.config.time_shift_type == "exponential": + return self._time_shift_exponential(mu, sigma, t) + elif self.config.time_shift_type == "linear": + return self._time_shift_linear(mu, sigma, t) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_exponential + def _time_shift_exponential(self, mu, sigma, t): + return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_linear + def _time_shift_linear(self, mu, sigma, t): + return mu / (mu + (1 / t - 1) ** sigma) + + # ---------------------------------- Euler ---------------------------------- + def index_for_timestep(self, timestep, schedule_timesteps=None): + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + def _init_step_index(self, timestep): + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step_euler( + self, + model_output: torch.FloatTensor, + timestep: float | torch.FloatTensor = None, + sample: torch.FloatTensor = None, + generator: torch.Generator | None = None, + sigma: torch.FloatTensor | None = None, + sigma_next: torch.FloatTensor | None = None, + return_dict: bool = True, + ) -> HeliosSchedulerOutput | tuple: + assert (sigma is None) == (sigma_next is None), "sigma and sigma_next must both be None or both be not None" + + if sigma is None and sigma_next is None: + if ( + isinstance(timestep, int) + or isinstance(timestep, torch.IntTensor) + or isinstance(timestep, torch.LongTensor) + ): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `EulerDiscreteScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` as a timestep." + ), + ) + + if self.step_index is None: + self._step_index = 0 + + # Upcast to avoid precision issues when computing prev_sample + sample = sample.to(torch.float32) + + if sigma is None and sigma_next is None: + sigma = self.sigmas[self.step_index] + sigma_next = self.sigmas[self.step_index + 1] + + prev_sample = sample + (sigma_next - sigma) * model_output + + # Cast sample back to model compatible dtype + prev_sample = prev_sample.to(model_output.dtype) + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return HeliosSchedulerOutput(prev_sample=prev_sample) + + # ---------------------------------- UniPC ---------------------------------- + def _sigma_to_alpha_sigma_t(self, sigma): + if self.config.use_flow_sigmas: + alpha_t = 1 - sigma + sigma_t = torch.clamp(sigma, min=1e-8) + else: + alpha_t = 1 / ((sigma**2 + 1) ** 0.5) + sigma_t = sigma * alpha_t + + return alpha_t, sigma_t + + def convert_model_output( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor = None, + sigma: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + r""" + Convert the model output to the corresponding type the UniPC algorithm needs. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + flag = False + if sigma is None: + flag = True + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + + if self.predict_x0: + if self.config.prediction_type == "epsilon": + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + x0_pred = alpha_t * sample - sigma_t * model_output + elif self.config.prediction_type == "flow_prediction": + if flag: + sigma_t = self.sigmas[self.step_index] + else: + sigma_t = sigma + x0_pred = sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + "`v_prediction`, or `flow_prediction` for the UniPCMultistepScheduler." + ) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + return x0_pred + else: + if self.config.prediction_type == "epsilon": + return model_output + elif self.config.prediction_type == "sample": + epsilon = (sample - alpha_t * model_output) / sigma_t + return epsilon + elif self.config.prediction_type == "v_prediction": + epsilon = alpha_t * model_output + sigma_t * sample + return epsilon + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction` for the UniPCMultistepScheduler." + ) + + def multistep_uni_p_bh_update( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor = None, + order: int = None, + sigma: torch.Tensor = None, + sigma_next: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the UniP (B(h) version). Alternatively, `self.solver_p` is used if is specified. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model at the current timestep. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + order (`int`): + The order of UniP at this timestep (corresponds to the *p* in UniPC-p). + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + prev_timestep = args[0] if len(args) > 0 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if order is None: + if len(args) > 2: + order = args[2] + else: + raise ValueError("missing `order` as a required keyword argument") + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + model_output_list = self.model_outputs + + s0 = self.timestep_list[-1] + m0 = model_output_list[-1] + x = sample + + if self.solver_p: + x_t = self.solver_p.step(model_output, s0, x).prev_sample + return x_t + + if sigma_next is None and sigma is None: + sigma_t, sigma_s0 = self.sigmas[self.step_index + 1], self.sigmas[self.step_index] + else: + sigma_t, sigma_s0 = sigma_next, sigma + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + + h = lambda_t - lambda_s0 + device = sample.device + + rks = [] + D1s = [] + for i in range(1, order): + si = self.step_index - i + mi = model_output_list[-(i + 1)] + alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) + lambda_si = torch.log(alpha_si) - torch.log(sigma_si) + rk = (lambda_si - lambda_s0) / h + rks.append(rk) + D1s.append((mi - m0) / rk) + + rks.append(1.0) + rks = torch.tensor(rks, device=device) + + R = [] + b = [] + + hh = -h if self.predict_x0 else h + h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 + h_phi_k = h_phi_1 / hh - 1 + + factorial_i = 1 + + if self.config.solver_type == "bh1": + B_h = hh + elif self.config.solver_type == "bh2": + B_h = torch.expm1(hh) + else: + raise NotImplementedError() + + for i in range(1, order + 1): + R.append(torch.pow(rks, i - 1)) + b.append(h_phi_k * factorial_i / B_h) + factorial_i *= i + 1 + h_phi_k = h_phi_k / hh - 1 / factorial_i + + R = torch.stack(R) + b = torch.tensor(b, device=device) + + if len(D1s) > 0: + D1s = torch.stack(D1s, dim=1) # (B, K) + # for order 2, we use a simplified version + if order == 2: + rhos_p = torch.tensor([0.5], dtype=x.dtype, device=device) + else: + rhos_p = torch.linalg.solve(R[:-1, :-1], b[:-1]).to(device).to(x.dtype) + else: + D1s = None + + if self.predict_x0: + x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0 + if D1s is not None: + pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) + else: + pred_res = 0 + x_t = x_t_ - alpha_t * B_h * pred_res + else: + x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0 + if D1s is not None: + pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) + else: + pred_res = 0 + x_t = x_t_ - sigma_t * B_h * pred_res + + x_t = x_t.to(x.dtype) + return x_t + + def multistep_uni_c_bh_update( + self, + this_model_output: torch.Tensor, + *args, + last_sample: torch.Tensor = None, + this_sample: torch.Tensor = None, + order: int = None, + sigma_before: torch.Tensor = None, + sigma: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the UniC (B(h) version). + + Args: + this_model_output (`torch.Tensor`): + The model outputs at `x_t`. + this_timestep (`int`): + The current timestep `t`. + last_sample (`torch.Tensor`): + The generated sample before the last predictor `x_{t-1}`. + this_sample (`torch.Tensor`): + The generated sample after the last predictor `x_{t}`. + order (`int`): + The `p` of UniC-p at this step. The effective order of accuracy should be `order + 1`. + + Returns: + `torch.Tensor`: + The corrected sample tensor at the current timestep. + """ + this_timestep = args[0] if len(args) > 0 else kwargs.pop("this_timestep", None) + if last_sample is None: + if len(args) > 1: + last_sample = args[1] + else: + raise ValueError("missing `last_sample` as a required keyword argument") + if this_sample is None: + if len(args) > 2: + this_sample = args[2] + else: + raise ValueError("missing `this_sample` as a required keyword argument") + if order is None: + if len(args) > 3: + order = args[3] + else: + raise ValueError("missing `order` as a required keyword argument") + if this_timestep is not None: + deprecate( + "this_timestep", + "1.0.0", + "Passing `this_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + model_output_list = self.model_outputs + + m0 = model_output_list[-1] + x = last_sample + x_t = this_sample + model_t = this_model_output + + if sigma_before is None and sigma is None: + sigma_t, sigma_s0 = self.sigmas[self.step_index], self.sigmas[self.step_index - 1] + else: + sigma_t, sigma_s0 = sigma, sigma_before + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + + h = lambda_t - lambda_s0 + device = this_sample.device + + rks = [] + D1s = [] + for i in range(1, order): + si = self.step_index - (i + 1) + mi = model_output_list[-(i + 1)] + alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) + lambda_si = torch.log(alpha_si) - torch.log(sigma_si) + rk = (lambda_si - lambda_s0) / h + rks.append(rk) + D1s.append((mi - m0) / rk) + + rks.append(1.0) + rks = torch.tensor(rks, device=device) + + R = [] + b = [] + + hh = -h if self.predict_x0 else h + h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 + h_phi_k = h_phi_1 / hh - 1 + + factorial_i = 1 + + if self.config.solver_type == "bh1": + B_h = hh + elif self.config.solver_type == "bh2": + B_h = torch.expm1(hh) + else: + raise NotImplementedError() + + for i in range(1, order + 1): + R.append(torch.pow(rks, i - 1)) + b.append(h_phi_k * factorial_i / B_h) + factorial_i *= i + 1 + h_phi_k = h_phi_k / hh - 1 / factorial_i + + R = torch.stack(R) + b = torch.tensor(b, device=device) + + if len(D1s) > 0: + D1s = torch.stack(D1s, dim=1) + else: + D1s = None + + # for order 1, we use a simplified version + if order == 1: + rhos_c = torch.tensor([0.5], dtype=x.dtype, device=device) + else: + rhos_c = torch.linalg.solve(R, b).to(device).to(x.dtype) + + if self.predict_x0: + x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0 + if D1s is not None: + corr_res = torch.einsum("k,bkc...->bc...", rhos_c[:-1], D1s) + else: + corr_res = 0 + D1_t = model_t - m0 + x_t = x_t_ - alpha_t * B_h * (corr_res + rhos_c[-1] * D1_t) + else: + x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0 + if D1s is not None: + corr_res = torch.einsum("k,bkc...->bc...", rhos_c[:-1], D1s) + else: + corr_res = 0 + D1_t = model_t - m0 + x_t = x_t_ - sigma_t * B_h * (corr_res + rhos_c[-1] * D1_t) + x_t = x_t.to(x.dtype) + return x_t + + def step_unipc( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor = None, + sample: torch.Tensor = None, + return_dict: bool = True, + model_outputs: list = None, + timestep_list: list = None, + sigma_before: torch.Tensor = None, + sigma: torch.Tensor = None, + sigma_next: torch.Tensor = None, + cus_step_index: int = None, + cus_lower_order_num: int = None, + cus_this_order: int = None, + cus_last_sample: torch.Tensor = None, + ) -> HeliosSchedulerOutput | tuple: + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if cus_step_index is None: + if self.step_index is None: + self._step_index = 0 + else: + self._step_index = cus_step_index + + if cus_lower_order_num is not None: + self.lower_order_nums = cus_lower_order_num + + if cus_this_order is not None: + self.this_order = cus_this_order + + if cus_last_sample is not None: + self.last_sample = cus_last_sample + + use_corrector = ( + self.step_index > 0 and self.step_index - 1 not in self.disable_corrector and self.last_sample is not None + ) + + # Convert model output using the proper conversion method + model_output_convert = self.convert_model_output(model_output, sample=sample, sigma=sigma) + + if model_outputs is not None and timestep_list is not None: + self.model_outputs = model_outputs[:-1] + self.timestep_list = timestep_list[:-1] + + if use_corrector: + sample = self.multistep_uni_c_bh_update( + this_model_output=model_output_convert, + last_sample=self.last_sample, + this_sample=sample, + order=self.this_order, + sigma_before=sigma_before, + sigma=sigma, + ) + + if model_outputs is not None and timestep_list is not None: + model_outputs[-1] = model_output_convert + self.model_outputs = model_outputs[1:] + self.timestep_list = timestep_list[1:] + else: + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.timestep_list[i] = self.timestep_list[i + 1] + self.model_outputs[-1] = model_output_convert + self.timestep_list[-1] = timestep + + if self.config.lower_order_final: + this_order = min(self.config.solver_order, len(self.timesteps) - self.step_index) + else: + this_order = self.config.solver_order + self.this_order = min(this_order, self.lower_order_nums + 1) # warmup for multistep + assert self.this_order > 0 + + self.last_sample = sample + prev_sample = self.multistep_uni_p_bh_update( + model_output=model_output, # pass the original non-converted model output, in case solver-p is used + sample=sample, + order=self.this_order, + sigma=sigma, + sigma_next=sigma_next, + ) + + if cus_lower_order_num is None: + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # upon completion increase step index by one + if cus_step_index is None: + self._step_index += 1 + + if not return_dict: + return (prev_sample, model_outputs, self.last_sample, self.this_order) + + return HeliosSchedulerOutput( + prev_sample=prev_sample, + model_outputs=model_outputs, + last_sample=self.last_sample, + this_order=self.this_order, + ) + + # ---------------------------------- Merge ---------------------------------- + def step( + self, + model_output: torch.FloatTensor, + timestep: float | torch.FloatTensor = None, + sample: torch.FloatTensor = None, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> HeliosSchedulerOutput | tuple: + if self.config.scheduler_type == "euler": + return self.step_euler( + model_output=model_output, + timestep=timestep, + sample=sample, + generator=generator, + return_dict=return_dict, + ) + elif self.config.scheduler_type == "unipc": + return self.step_unipc( + model_output=model_output, + timestep=timestep, + sample=sample, + return_dict=return_dict, + ) + else: + raise NotImplementedError + + def reset_scheduler_history(self): + self.model_outputs = [None] * self.config.solver_order + self.timestep_list = [None] * self.config.solver_order + self.lower_order_nums = 0 + self.disable_corrector = self.config.disable_corrector + self.solver_p = self.config.solver_p + self.last_sample = None + self._step_index = None + self._begin_index = None + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_helios_dmd.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_helios_dmd.py new file mode 100644 index 0000000000000000000000000000000000000000..1f4afa0e3128bee25bcc241e687d1a98f8d8fc09 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_helios_dmd.py @@ -0,0 +1,331 @@ +# Copyright 2025 The Helios Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..schedulers.scheduling_utils import SchedulerMixin +from ..utils import BaseOutput + + +@dataclass +class HeliosDMDSchedulerOutput(BaseOutput): + prev_sample: torch.FloatTensor + model_outputs: torch.FloatTensor | None = None + last_sample: torch.FloatTensor | None = None + this_order: int | None = None + + +class HeliosDMDScheduler(SchedulerMixin, ConfigMixin): + _compatibles = [] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + shift: float = 1.0, # Following Stable diffusion 3, + stages: int = 3, + stage_range: list = [0, 1 / 3, 2 / 3, 1], + gamma: float = 1 / 3, + prediction_type: str = "flow_prediction", + use_flow_sigmas: bool = True, + use_dynamic_shifting: bool = False, + time_shift_type: Literal["exponential", "linear"] = "linear", + ): + self.timestep_ratios = {} # The timestep ratio for each stage + self.timesteps_per_stage = {} # The detailed timesteps per stage (fix max and min per stage) + self.sigmas_per_stage = {} # always uniform [1000, 0] + self.start_sigmas = {} # for start point / upsample renoise + self.end_sigmas = {} # for end point + self.ori_start_sigmas = {} + + # self.init_sigmas() + self.init_sigmas_for_each_stage() + self.sigma_min = self.sigmas[-1].item() + self.sigma_max = self.sigmas[0].item() + self.gamma = gamma + + self.last_sample = None + self._step_index = None + self._begin_index = None + + def init_sigmas(self): + """ + initialize the global timesteps and sigmas + """ + num_train_timesteps = self.config.num_train_timesteps + shift = self.config.shift + + alphas = np.linspace(1, 1 / num_train_timesteps, num_train_timesteps + 1) + sigmas = 1.0 - alphas + sigmas = np.flip(shift * sigmas / (1 + (shift - 1) * sigmas))[:-1].copy() + sigmas = torch.from_numpy(sigmas) + timesteps = (sigmas * num_train_timesteps).clone() + + self._step_index = None + self._begin_index = None + self.timesteps = timesteps + self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication + + def init_sigmas_for_each_stage(self): + """ + Init the timesteps for each stage + """ + self.init_sigmas() + + stage_distance = [] + stages = self.config.stages + training_steps = self.config.num_train_timesteps + stage_range = self.config.stage_range + + # Init the start and end point of each stage + for i_s in range(stages): + # To decide the start and ends point + start_indice = int(stage_range[i_s] * training_steps) + start_indice = max(start_indice, 0) + end_indice = int(stage_range[i_s + 1] * training_steps) + end_indice = min(end_indice, training_steps) + start_sigma = self.sigmas[start_indice].item() + end_sigma = self.sigmas[end_indice].item() if end_indice < training_steps else 0.0 + self.ori_start_sigmas[i_s] = start_sigma + + if i_s != 0: + ori_sigma = 1 - start_sigma + gamma = self.config.gamma + corrected_sigma = (1 / (math.sqrt(1 + (1 / gamma)) * (1 - ori_sigma) + ori_sigma)) * ori_sigma + # corrected_sigma = 1 / (2 - ori_sigma) * ori_sigma + start_sigma = 1 - corrected_sigma + + stage_distance.append(start_sigma - end_sigma) + self.start_sigmas[i_s] = start_sigma + self.end_sigmas[i_s] = end_sigma + + # Determine the ratio of each stage according to flow length + tot_distance = sum(stage_distance) + for i_s in range(stages): + if i_s == 0: + start_ratio = 0.0 + else: + start_ratio = sum(stage_distance[:i_s]) / tot_distance + if i_s == stages - 1: + end_ratio = 0.9999999999999999 + else: + end_ratio = sum(stage_distance[: i_s + 1]) / tot_distance + + self.timestep_ratios[i_s] = (start_ratio, end_ratio) + + # Determine the timesteps and sigmas for each stage + for i_s in range(stages): + timestep_ratio = self.timestep_ratios[i_s] + # timestep_max = self.timesteps[int(timestep_ratio[0] * training_steps)] + timestep_max = min(self.timesteps[int(timestep_ratio[0] * training_steps)], 999) + timestep_min = self.timesteps[min(int(timestep_ratio[1] * training_steps), training_steps - 1)] + timesteps = np.linspace(timestep_max, timestep_min, training_steps + 1) + self.timesteps_per_stage[i_s] = ( + timesteps[:-1] if isinstance(timesteps, torch.Tensor) else torch.from_numpy(timesteps[:-1]) + ) + stage_sigmas = np.linspace(0.999, 0, training_steps + 1) + self.sigmas_per_stage[i_s] = torch.from_numpy(stage_sigmas[:-1]) + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def _sigma_to_t(self, sigma): + return sigma * self.config.num_train_timesteps + + def set_timesteps( + self, + num_inference_steps: int, + stage_index: int | None = None, + device: str | torch.device = None, + sigmas: bool | None = None, + mu: bool | None = None, + is_amplify_first_chunk: bool = False, + ): + """ + Setting the timesteps and sigmas for each stage + """ + if is_amplify_first_chunk: + num_inference_steps = num_inference_steps * 2 + 1 + else: + num_inference_steps = num_inference_steps + 1 + + self.num_inference_steps = num_inference_steps + self.init_sigmas() + + if self.config.stages == 1: + if sigmas is None: + sigmas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)[:-1].astype( + np.float32 + ) + if self.config.shift != 1.0: + assert not self.config.use_dynamic_shifting + sigmas = self.time_shift(self.config.shift, 1.0, sigmas) + timesteps = (sigmas * self.config.num_train_timesteps).copy() + sigmas = torch.from_numpy(sigmas) + else: + stage_timesteps = self.timesteps_per_stage[stage_index] + timesteps = np.linspace( + stage_timesteps[0].item(), + stage_timesteps[-1].item(), + num_inference_steps, + ) + + stage_sigmas = self.sigmas_per_stage[stage_index] + ratios = np.linspace(stage_sigmas[0].item(), stage_sigmas[-1].item(), num_inference_steps) + sigmas = torch.from_numpy(ratios) + + self.timesteps = torch.from_numpy(timesteps).to(device=device) + self.sigmas = torch.cat([sigmas, torch.zeros(1)]).to(device=device) + + self._step_index = None + self.reset_scheduler_history() + + self.timesteps = self.timesteps[:-1] + self.sigmas = torch.cat([self.sigmas[:-2], self.sigmas[-1:]]) + + if self.config.use_dynamic_shifting: + assert self.config.shift == 1.0 + self.sigmas = self.time_shift(mu, 1.0, self.sigmas) + if self.config.stages == 1: + self.timesteps = self.sigmas[:-1] * self.config.num_train_timesteps + else: + self.timesteps = self.timesteps_per_stage[stage_index].min() + self.sigmas[:-1] * ( + self.timesteps_per_stage[stage_index].max() - self.timesteps_per_stage[stage_index].min() + ) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.time_shift + def time_shift(self, mu: float, sigma: float, t: torch.Tensor): + """ + Apply time shifting to the sigmas. + + Args: + mu (`float`): + The mu parameter for the time shift. + sigma (`float`): + The sigma parameter for the time shift. + t (`torch.Tensor`): + The input timesteps. + + Returns: + `torch.Tensor`: + The time-shifted timesteps. + """ + if self.config.time_shift_type == "exponential": + return self._time_shift_exponential(mu, sigma, t) + elif self.config.time_shift_type == "linear": + return self._time_shift_linear(mu, sigma, t) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_exponential + def _time_shift_exponential(self, mu, sigma, t): + return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_linear + def _time_shift_linear(self, mu, sigma, t): + return mu / (mu + (1 / t - 1) ** sigma) + + # ---------------------------------- For DMD ---------------------------------- + def add_noise(self, original_samples, noise, timestep, sigmas, timesteps): + sigmas = sigmas.to(noise.device) + timesteps = timesteps.to(noise.device) + timestep_id = torch.argmin((timesteps.unsqueeze(0) - timestep.unsqueeze(1)).abs(), dim=1) + sigma = sigmas[timestep_id].reshape(-1, 1, 1, 1, 1) + sample = (1 - sigma) * original_samples + sigma * noise + return sample.type_as(noise) + + def convert_flow_pred_to_x0(self, flow_pred, xt, timestep, sigmas, timesteps): + # use higher precision for calculations + original_dtype = flow_pred.dtype + device = flow_pred.device + flow_pred, xt, sigmas, timesteps = (x.double().to(device) for x in (flow_pred, xt, sigmas, timesteps)) + + timestep_id = torch.argmin((timesteps.unsqueeze(0) - timestep.unsqueeze(1)).abs(), dim=1) + sigma_t = sigmas[timestep_id].reshape(-1, 1, 1, 1, 1) + x0_pred = xt - sigma_t * flow_pred + return x0_pred.to(original_dtype) + + def step( + self, + model_output: torch.FloatTensor, + timestep: float | torch.FloatTensor = None, + sample: torch.FloatTensor = None, + generator: torch.Generator | None = None, + return_dict: bool = True, + cur_sampling_step: int = 0, + dmd_noisy_tensor: torch.FloatTensor | None = None, + dmd_sigmas: torch.FloatTensor | None = None, + dmd_timesteps: torch.FloatTensor | None = None, + all_timesteps: torch.FloatTensor | None = None, + ) -> HeliosDMDSchedulerOutput | tuple: + pred_image_or_video = self.convert_flow_pred_to_x0( + flow_pred=model_output, + xt=sample, + timestep=torch.full((model_output.shape[0],), timestep, dtype=torch.long, device=model_output.device), + sigmas=dmd_sigmas, + timesteps=dmd_timesteps, + ) + if cur_sampling_step < len(all_timesteps) - 1: + prev_sample = self.add_noise( + pred_image_or_video, + dmd_noisy_tensor, + torch.full( + (model_output.shape[0],), + all_timesteps[cur_sampling_step + 1], + dtype=torch.long, + device=model_output.device, + ), + sigmas=dmd_sigmas, + timesteps=dmd_timesteps, + ) + else: + prev_sample = pred_image_or_video + + if not return_dict: + return (prev_sample,) + + return HeliosDMDSchedulerOutput(prev_sample=prev_sample) + + def reset_scheduler_history(self): + self._step_index = None + self._begin_index = None + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_heun_discrete.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_heun_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..df2593493aeb33868910ae8d7c28b2506cd08cab --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_heun_discrete.py @@ -0,0 +1,711 @@ +# Copyright 2025 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, is_scipy_available +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +if is_scipy_available(): + import scipy.stats + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->HeunDiscrete +class HeunDiscreteSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin): + """ + Scheduler with Heun steps for discrete beta schedules. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, `"squaredcos_cap_v2"`, or `"exp"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, `squaredcos_cap_v2`, or `exp`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + prediction_type (`"epsilon"`, `"sample"`, or `"v_prediction"`, defaults to `"epsilon"`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample for numerical stability. + clip_sample_range (`float`, defaults to 1.0): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 2 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.00085, # sensible defaults + beta_end: float = 0.012, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + prediction_type: str = "epsilon", + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + clip_sample: bool = False, + clip_sample_range: float = 1.0, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace", + steps_offset: int = 0, + ) -> None: + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps, alpha_transform_type="cosine") + elif beta_schedule == "exp": + self.betas = betas_for_alpha_bar(num_train_timesteps, alpha_transform_type="exp") + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # set all values + self.set_timesteps(num_train_timesteps, None, num_train_timesteps) + self.use_karras_sigmas = use_karras_sigmas + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + @property + def init_noise_sigma(self): + # standard deviation of the initial noise distribution + if self.config.timestep_spacing in ["linspace", "trailing"]: + return self.sigmas.max() + + return (self.sigmas.max() ** 2 + 1) ** 0.5 + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input( + self, + sample: torch.Tensor, + timestep: float | torch.Tensor, + ) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + sample = sample / ((sigma**2 + 1) ** 0.5) + return sample + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device = None, + num_train_timesteps: int | None = None, + timesteps: list[int] | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*, defaults to `None`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str`, `torch.device`, *optional*, defaults to `None`): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + num_train_timesteps (`int`, *optional*, defaults to `None`): + The number of diffusion steps used when training the model. If `None`, the default + `num_train_timesteps` attribute is used. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary spacing between timesteps. If `None`, timesteps will be + generated based on the `timestep_spacing` attribute. If `timesteps` is passed, `num_inference_steps` + must be `None`, and `timestep_spacing` attribute will be ignored. + """ + if num_inference_steps is None and timesteps is None: + raise ValueError("Must pass exactly one of `num_inference_steps` or `custom_timesteps`.") + if num_inference_steps is not None and timesteps is not None: + raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.") + if timesteps is not None and self.config.use_karras_sigmas: + raise ValueError("Cannot use `timesteps` with `config.use_karras_sigmas = True`") + if timesteps is not None and self.config.use_exponential_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_exponential_sigmas = True`.") + if timesteps is not None and self.config.use_beta_sigmas: + raise ValueError("Cannot set `timesteps` with `config.use_beta_sigmas = True`.") + + num_inference_steps = num_inference_steps or len(timesteps) + self.num_inference_steps = num_inference_steps + num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps + + if timesteps is not None: + timesteps = np.array(timesteps, dtype=np.float32) + else: + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[::-1].copy() + elif self.config.timestep_spacing == "leading": + step_ratio = num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.float32) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(num_train_timesteps, 0, -step_ratio)).round().copy().astype(np.float32) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=self.num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + + sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) + sigmas = torch.from_numpy(sigmas).to(device=device) + self.sigmas = torch.cat([sigmas[:1], sigmas[1:-1].repeat_interleave(2), sigmas[-1:]]) + + timesteps = torch.from_numpy(timesteps) + timesteps = torch.cat([timesteps[:1], timesteps[1:].repeat_interleave(2)]) + + self.timesteps = timesteps.to(device=device, dtype=torch.float32) + + # empty dt and derivative + self.prev_derivative = None + self.dt = None + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + @property + def state_in_first_order(self): + return self.dt is None + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor | np.ndarray, + timestep: float | torch.Tensor, + sample: torch.Tensor | np.ndarray, + return_dict: bool = True, + ) -> HeunDiscreteSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`float`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_heun_discrete.HeunDiscreteSchedulerOutput`] or + tuple. + + Returns: + [`~schedulers.scheduling_heun_discrete.HeunDiscreteSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_heun_discrete.HeunDiscreteSchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor. + """ + if self.step_index is None: + self._init_step_index(timestep) + + if self.state_in_first_order: + sigma = self.sigmas[self.step_index] + sigma_next = self.sigmas[self.step_index + 1] + else: + # 2nd order / Heun's method + sigma = self.sigmas[self.step_index - 1] + sigma_next = self.sigmas[self.step_index] + + # currently only gamma=0 is supported. This usually works best anyways. + # We can support gamma in the future but then need to scale the timestep before + # passing it to the model which requires a change in API + gamma = 0 + sigma_hat = sigma * (gamma + 1) # Note: sigma_hat == sigma for now + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + sigma_input = sigma_hat if self.state_in_first_order else sigma_next + pred_original_sample = sample - sigma_input * model_output + elif self.config.prediction_type == "v_prediction": + sigma_input = sigma_hat if self.state_in_first_order else sigma_next + pred_original_sample = model_output * (-sigma_input / (sigma_input**2 + 1) ** 0.5) + ( + sample / (sigma_input**2 + 1) + ) + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + if self.config.clip_sample: + pred_original_sample = pred_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + if self.state_in_first_order: + # 2. Convert to an ODE derivative for 1st order + derivative = (sample - pred_original_sample) / sigma_hat + # 3. delta timestep + dt = sigma_next - sigma_hat + + # store for 2nd order step + self.prev_derivative = derivative + self.dt = dt + self.sample = sample + else: + # 2. 2nd order / Heun's method + derivative = (sample - pred_original_sample) / sigma_next + derivative = (self.prev_derivative + derivative) / 2 + + # 3. take prev timestep & sample + dt = self.dt + sample = self.sample + + # free dt and derivative + # Note, this puts the scheduler in "first order mode" + self.prev_derivative = None + self.dt = None + self.sample = None + + prev_sample = sample + derivative * dt + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return HeunDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ipndm.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ipndm.py new file mode 100644 index 0000000000000000000000000000000000000000..f2e744e0f2afd29b49b6d06c961a9cb69b9cb74c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ipndm.py @@ -0,0 +1,279 @@ +# Copyright 2025 Zhejiang University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from .scheduling_utils import SchedulerMixin, SchedulerOutput + + +class IPNDMScheduler(SchedulerMixin, ConfigMixin): + """ + A fourth-order Improved Pseudo Linear Multistep scheduler. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + trained_betas (`np.ndarray` or `List[float]`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + """ + + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + trained_betas: np.ndarray | list[float] | None = None, + ): + # set `betas`, `alphas`, `timesteps` + self.set_timesteps(num_train_timesteps) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # For now we only support F-PNDM, i.e. the runge-kutta method + # For more information on the algorithm please take a look at the paper: https://huggingface.co/papers/2202.09778 + # mainly at formula (9), (12), (13) and the Algorithm 2. + self.pndm_order = 4 + + # running values + self.ets = [] + self._step_index = None + self._begin_index = None + + @property + def step_index(self) -> int | None: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + + Returns: + `int` or `None`: + The index counter for current timestep. + """ + return self._step_index + + @property + def begin_index(self) -> int | None: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + + Returns: + `int` or `None`: + The index for the first timestep. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device | None = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps + steps = torch.linspace(1, 0, num_inference_steps + 1)[:-1] + steps = torch.cat([steps, torch.tensor([0.0])]) + + if self.config.trained_betas is not None: + self.betas = torch.tensor(self.config.trained_betas, dtype=torch.float32) + else: + self.betas = torch.sin(steps * math.pi / 2) ** 2 + + self.alphas = (1.0 - self.betas**2) ** 0.5 + + timesteps = (torch.atan2(self.betas, self.alphas) / math.pi * 2)[:-1] + self.timesteps = timesteps.to(device) + + self.ets = [] + self._step_index = None + self._begin_index = None + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor, + sample: torch.Tensor, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the linear multistep method. It performs one forward pass multiple times to approximate the solution. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + if self.step_index is None: + self._init_step_index(timestep) + + timestep_index = self.step_index + prev_timestep_index = self.step_index + 1 + + ets = sample * self.betas[timestep_index] + model_output * self.alphas[timestep_index] + self.ets.append(ets) + + if len(self.ets) == 1: + ets = self.ets[-1] + elif len(self.ets) == 2: + ets = (3 * self.ets[-1] - self.ets[-2]) / 2 + elif len(self.ets) == 3: + ets = (23 * self.ets[-1] - 16 * self.ets[-2] + 5 * self.ets[-3]) / 12 + else: + ets = (1 / 24) * (55 * self.ets[-1] - 59 * self.ets[-2] + 37 * self.ets[-3] - 9 * self.ets[-4]) + + prev_sample = self._get_prev_sample(sample, timestep_index, prev_timestep_index, ets) + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def _get_prev_sample( + self, + sample: torch.Tensor, + timestep_index: int, + prev_timestep_index: int, + ets: torch.Tensor, + ) -> torch.Tensor: + """ + Predicts the previous sample based on the current sample, timestep indices, and running model outputs. + + Args: + sample (`torch.Tensor`): + The current sample. + timestep_index (`int`): + Index of the current timestep in the schedule. + prev_timestep_index (`int`): + Index of the previous timestep in the schedule. + ets (`torch.Tensor`): + The running sequence of model outputs. + + Returns: + `torch.Tensor`: + The predicted previous sample. + """ + alpha = self.alphas[timestep_index] + sigma = self.betas[timestep_index] + + next_alpha = self.alphas[prev_timestep_index] + next_sigma = self.betas[prev_timestep_index] + + pred = (sample - sigma * ets) / max(alpha, 1e-8) + prev_sample = next_alpha * pred + ets * next_sigma + + return prev_sample + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..706190e291a479ee849bdbc73eba809c7dd03e4c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py @@ -0,0 +1,718 @@ +# Copyright 2025 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, is_scipy_available +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +if is_scipy_available(): + import scipy.stats + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->KDPM2AncestralDiscrete +class KDPM2AncestralDiscreteSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): + """ + KDPM2DiscreteScheduler with ancestral sampling is inspired by the DPMSolver2 and Algorithm 2 from the [Elucidating + the Design Space of Diffusion-Based Generative Models](https://huggingface.co/papers/2206.00364) paper. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.00085): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.012): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear` or `scaled_linear`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + timestep_spacing (`str`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 2 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.00085, # sensible defaults + beta_end: float = 0.012, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + prediction_type: str = "epsilon", + timestep_spacing: str = "linspace", + steps_offset: int = 0, + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # set all values + self.set_timesteps(num_train_timesteps, None, num_train_timesteps) + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def init_noise_sigma(self): + # standard deviation of the initial noise distribution + if self.config.timestep_spacing in ["linspace", "trailing"]: + return self.sigmas.max() + + return (self.sigmas.max() ** 2 + 1) ** 0.5 + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input( + self, + sample: torch.Tensor, + timestep: float | torch.Tensor, + ) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + if self.step_index is None: + self._init_step_index(timestep) + + if self.state_in_first_order: + sigma = self.sigmas[self.step_index] + else: + sigma = self.sigmas_interpol[self.step_index - 1] + + sample = sample / ((sigma**2 + 1) ** 0.5) + return sample + + def set_timesteps( + self, + num_inference_steps: int, + device: str | torch.device | None = None, + num_train_timesteps: int | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps + + num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[::-1].copy() + elif self.config.timestep_spacing == "leading": + step_ratio = num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.float32) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(num_train_timesteps, 0, -step_ratio)).round().copy().astype(np.float32) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round() + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + + self.log_sigmas = torch.from_numpy(log_sigmas).to(device) + sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) + sigmas = torch.from_numpy(sigmas).to(device=device) + + # compute up and down sigmas + sigmas_next = sigmas.roll(-1) + sigmas_next[-1] = 0.0 + sigmas_up = (sigmas_next**2 * (sigmas**2 - sigmas_next**2) / sigmas**2) ** 0.5 + sigmas_down = (sigmas_next**2 - sigmas_up**2) ** 0.5 + sigmas_down[-1] = 0.0 + + # compute interpolated sigmas + sigmas_interpol = sigmas.log().lerp(sigmas_down.log(), 0.5).exp() + sigmas_interpol[-2:] = 0.0 + + # set sigmas + self.sigmas = torch.cat([sigmas[:1], sigmas[1:].repeat_interleave(2), sigmas[-1:]]) + self.sigmas_interpol = torch.cat( + [sigmas_interpol[:1], sigmas_interpol[1:].repeat_interleave(2), sigmas_interpol[-1:]] + ) + self.sigmas_up = torch.cat([sigmas_up[:1], sigmas_up[1:].repeat_interleave(2), sigmas_up[-1:]]) + self.sigmas_down = torch.cat([sigmas_down[:1], sigmas_down[1:].repeat_interleave(2), sigmas_down[-1:]]) + + if str(device).startswith("mps"): + timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32) + else: + timesteps = torch.from_numpy(timesteps).to(device) + + sigmas_interpol = sigmas_interpol.cpu() + log_sigmas = self.log_sigmas.cpu() + timesteps_interpol = np.array( + [self._sigma_to_t(sigma_interpol, log_sigmas) for sigma_interpol in sigmas_interpol] + ) + + timesteps_interpol = torch.from_numpy(timesteps_interpol).to(device, dtype=timesteps.dtype) + interleaved_timesteps = torch.stack((timesteps_interpol[:-2, None], timesteps[1:, None]), dim=-1).flatten() + + self.timesteps = torch.cat([timesteps[:1], interleaved_timesteps]) + + self.sample = None + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma, log_sigmas): + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + @property + def state_in_first_order(self): + return self.sample is None + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor | np.ndarray, + timestep: float | torch.Tensor, + sample: torch.Tensor | np.ndarray, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> KDPM2AncestralDiscreteSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`float`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`): + Whether or not to return a + [`~schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteSchedulerOutput`] or tuple. + + Returns: + [`~schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteSchedulerOutput`] or `tuple`: + If return_dict is `True`, + [`~schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteSchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor. + """ + if self.step_index is None: + self._init_step_index(timestep) + + if self.state_in_first_order: + sigma = self.sigmas[self.step_index] + sigma_interpol = self.sigmas_interpol[self.step_index] + sigma_up = self.sigmas_up[self.step_index] + sigma_down = self.sigmas_down[self.step_index - 1] + else: + # 2nd order / KPDM2's method + sigma = self.sigmas[self.step_index - 1] + sigma_interpol = self.sigmas_interpol[self.step_index - 1] + sigma_up = self.sigmas_up[self.step_index - 1] + sigma_down = self.sigmas_down[self.step_index - 1] + + # currently only gamma=0 is supported. This usually works best anyways. + # We can support gamma in the future but then need to scale the timestep before + # passing it to the model which requires a change in API + gamma = 0 + sigma_hat = sigma * (gamma + 1) # Note: sigma_hat == sigma for now + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + sigma_input = sigma_hat if self.state_in_first_order else sigma_interpol + pred_original_sample = sample - sigma_input * model_output + elif self.config.prediction_type == "v_prediction": + sigma_input = sigma_hat if self.state_in_first_order else sigma_interpol + pred_original_sample = model_output * (-sigma_input / (sigma_input**2 + 1) ** 0.5) + ( + sample / (sigma_input**2 + 1) + ) + elif self.config.prediction_type == "sample": + raise NotImplementedError("prediction_type not implemented yet: sample") + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + if self.state_in_first_order: + # 2. Convert to an ODE derivative for 1st order + derivative = (sample - pred_original_sample) / sigma_hat + # 3. delta timestep + dt = sigma_interpol - sigma_hat + + # store for 2nd order step + self.sample = sample + self.dt = dt + prev_sample = sample + derivative * dt + else: + # DPM-Solver-2 + # 2. Convert to an ODE derivative for 2nd order + derivative = (sample - pred_original_sample) / sigma_interpol + # 3. delta timestep + dt = sigma_down - sigma_hat + + sample = self.sample + self.sample = None + + prev_sample = sample + derivative * dt + noise = randn_tensor( + model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator + ) + prev_sample = prev_sample + noise * sigma_up + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return KDPM2AncestralDiscreteSchedulerOutput( + prev_sample=prev_sample, pred_original_sample=pred_original_sample + ) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_k_dpm_2_discrete.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_k_dpm_2_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..3092c610b46dee31b17bade4e4f1dd760a99e734 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_k_dpm_2_discrete.py @@ -0,0 +1,690 @@ +# Copyright 2025 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, is_scipy_available +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +if is_scipy_available(): + import scipy.stats + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->KDPM2Discrete +class KDPM2DiscreteSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin): + """ + KDPM2DiscreteScheduler is inspired by the DPMSolver2 and Algorithm 2 from the [Elucidating the Design Space of + Diffusion-Based Generative Models](https://huggingface.co/papers/2206.00364) paper. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.00085): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.012): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear` or `scaled_linear`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + timestep_spacing (`str`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 2 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.00085, # sensible defaults + beta_end: float = 0.012, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + prediction_type: str = "epsilon", + timestep_spacing: str = "linspace", + steps_offset: int = 0, + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # set all values + self.set_timesteps(num_train_timesteps, None, num_train_timesteps) + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def init_noise_sigma(self): + # standard deviation of the initial noise distribution + if self.config.timestep_spacing in ["linspace", "trailing"]: + return self.sigmas.max() + + return (self.sigmas.max() ** 2 + 1) ** 0.5 + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input( + self, + sample: torch.Tensor, + timestep: float | torch.Tensor, + ) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + if self.step_index is None: + self._init_step_index(timestep) + + if self.state_in_first_order: + sigma = self.sigmas[self.step_index] + else: + sigma = self.sigmas_interpol[self.step_index] + + sample = sample / ((sigma**2 + 1) ** 0.5) + return sample + + def set_timesteps( + self, + num_inference_steps: int, + device: str | torch.device = None, + num_train_timesteps: int = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps + + num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[::-1].copy() + elif self.config.timestep_spacing == "leading": + step_ratio = num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.float32) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(num_train_timesteps, 0, -step_ratio)).round().copy().astype(np.float32) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round() + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + + self.log_sigmas = torch.from_numpy(log_sigmas).to(device=device) + sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) + sigmas = torch.from_numpy(sigmas).to(device=device) + + # interpolate sigmas + sigmas_interpol = sigmas.log().lerp(sigmas.roll(1).log(), 0.5).exp() + + self.sigmas = torch.cat([sigmas[:1], sigmas[1:].repeat_interleave(2), sigmas[-1:]]) + self.sigmas_interpol = torch.cat( + [sigmas_interpol[:1], sigmas_interpol[1:].repeat_interleave(2), sigmas_interpol[-1:]] + ) + + timesteps = torch.from_numpy(timesteps).to(device) + + # interpolate timesteps + sigmas_interpol = sigmas_interpol.cpu() + log_sigmas = self.log_sigmas.cpu() + timesteps_interpol = np.array( + [self._sigma_to_t(sigma_interpol, log_sigmas) for sigma_interpol in sigmas_interpol] + ) + timesteps_interpol = torch.from_numpy(timesteps_interpol).to(device, dtype=timesteps.dtype) + interleaved_timesteps = torch.stack((timesteps_interpol[1:-1, None], timesteps[1:, None]), dim=-1).flatten() + + self.timesteps = torch.cat([timesteps[:1], interleaved_timesteps]) + + self.sample = None + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def state_in_first_order(self): + return self.sample is None + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma, log_sigmas): + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def step( + self, + model_output: torch.Tensor | np.ndarray, + timestep: float | torch.Tensor, + sample: torch.Tensor | np.ndarray, + return_dict: bool = True, + ) -> KDPM2DiscreteSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`float`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] or + tuple. + + Returns: + [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor. + """ + if self.step_index is None: + self._init_step_index(timestep) + + if self.state_in_first_order: + sigma = self.sigmas[self.step_index] + sigma_interpol = self.sigmas_interpol[self.step_index + 1] + sigma_next = self.sigmas[self.step_index + 1] + else: + # 2nd order / KDPM2's method + sigma = self.sigmas[self.step_index - 1] + sigma_interpol = self.sigmas_interpol[self.step_index] + sigma_next = self.sigmas[self.step_index] + + # currently only gamma=0 is supported. This usually works best anyways. + # We can support gamma in the future but then need to scale the timestep before + # passing it to the model which requires a change in API + gamma = 0 + sigma_hat = sigma * (gamma + 1) # Note: sigma_hat == sigma for now + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + sigma_input = sigma_hat if self.state_in_first_order else sigma_interpol + pred_original_sample = sample - sigma_input * model_output + elif self.config.prediction_type == "v_prediction": + sigma_input = sigma_hat if self.state_in_first_order else sigma_interpol + pred_original_sample = model_output * (-sigma_input / (sigma_input**2 + 1) ** 0.5) + ( + sample / (sigma_input**2 + 1) + ) + elif self.config.prediction_type == "sample": + raise NotImplementedError("prediction_type not implemented yet: sample") + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + if self.state_in_first_order: + # 2. Convert to an ODE derivative for 1st order + derivative = (sample - pred_original_sample) / sigma_hat + # 3. delta timestep + dt = sigma_interpol - sigma_hat + + # store for 2nd order step + self.sample = sample + else: + # DPM-Solver-2 + # 2. Convert to an ODE derivative for 2nd order + derivative = (sample - pred_original_sample) / sigma_interpol + + # 3. delta timestep + dt = sigma_next - sigma_hat + + sample = self.sample + self.sample = None + + # upon completion increase step index by one + self._step_index += 1 + + prev_sample = sample + derivative * dt + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return KDPM2DiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_karras_ve_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_karras_ve_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..04c46220fccadd7240771e327cf1370e5a5b0063 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_karras_ve_flax.py @@ -0,0 +1,243 @@ +# Copyright 2025 NVIDIA and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from dataclasses import dataclass + +import flax +import jax +import jax.numpy as jnp +from jax import random + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, logging +from .scheduling_utils_flax import FlaxSchedulerMixin + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class KarrasVeSchedulerState: + # setable values + num_inference_steps: int = None + timesteps: jnp.ndarray | None = None + schedule: jnp.ndarray | None = None # sigma(t_i) + + @classmethod + def create(cls): + return cls() + + +@dataclass +class FlaxKarrasVeOutput(BaseOutput): + """ + Output class for the scheduler's step function output. + + Args: + prev_sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + derivative (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)` for images): + Derivative of predicted original image sample (x_0). + state (`KarrasVeSchedulerState`): the `FlaxKarrasVeScheduler` state data class. + """ + + prev_sample: jnp.ndarray + derivative: jnp.ndarray + state: KarrasVeSchedulerState + + +class FlaxKarrasVeScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + Stochastic sampling from Karras et al. [1] tailored to the Variance-Expanding (VE) models [2]. Use Algorithm 2 and + the VE column of Table 1 from [1] for reference. + + [1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models." + https://huggingface.co/papers/2206.00364 [2] Song, Yang, et al. "Score-based generative modeling through stochastic + differential equations." https://huggingface.co/papers/2011.13456 + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of + Diffusion-Based Generative Models." https://huggingface.co/papers/2206.00364. The grid search values used to find + the optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper. + + Args: + sigma_min (`float`): minimum noise magnitude + sigma_max (`float`): maximum noise magnitude + s_noise (`float`): the amount of additional noise to counteract loss of detail during sampling. + A reasonable range is [1.000, 1.011]. + s_churn (`float`): the parameter controlling the overall amount of stochasticity. + A reasonable range is [0, 100]. + s_min (`float`): the start value of the sigma range where we add noise (enable stochasticity). + A reasonable range is [0, 10]. + s_max (`float`): the end value of the sigma range where we add noise. + A reasonable range is [0.2, 80]. + """ + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + sigma_min: float = 0.02, + sigma_max: float = 100, + s_noise: float = 1.007, + s_churn: float = 80, + s_min: float = 0.05, + s_max: float = 50, + ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + def create_state(self): + return KarrasVeSchedulerState.create() + + def set_timesteps( + self, state: KarrasVeSchedulerState, num_inference_steps: int, shape: tuple = () + ) -> KarrasVeSchedulerState: + """ + Sets the continuous timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`KarrasVeSchedulerState`): + the `FlaxKarrasVeScheduler` state data class. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + + """ + timesteps = jnp.arange(0, num_inference_steps)[::-1].copy() + schedule = [ + ( + self.config.sigma_max**2 + * (self.config.sigma_min**2 / self.config.sigma_max**2) ** (i / (num_inference_steps - 1)) + ) + for i in timesteps + ] + + return state.replace( + num_inference_steps=num_inference_steps, + schedule=jnp.array(schedule, dtype=jnp.float32), + timesteps=timesteps, + ) + + def add_noise_to_input( + self, + state: KarrasVeSchedulerState, + sample: jnp.ndarray, + sigma: float, + key: jax.Array, + ) -> tuple[jnp.ndarray, float]: + """ + Explicit Langevin-like "churn" step of adding noise to the sample according to a factor gamma_i ≥ 0 to reach a + higher noise level sigma_hat = sigma_i + gamma_i*sigma_i. + + TODO Args: + """ + if self.config.s_min <= sigma <= self.config.s_max: + gamma = min(self.config.s_churn / state.num_inference_steps, 2**0.5 - 1) + else: + gamma = 0 + + # sample eps ~ N(0, S_noise^2 * I) + key = random.split(key, num=1) + eps = self.config.s_noise * random.normal(key=key, shape=sample.shape) + sigma_hat = sigma + gamma * sigma + sample_hat = sample + ((sigma_hat**2 - sigma**2) ** 0.5 * eps) + + return sample_hat, sigma_hat + + def step( + self, + state: KarrasVeSchedulerState, + model_output: jnp.ndarray, + sigma_hat: float, + sigma_prev: float, + sample_hat: jnp.ndarray, + return_dict: bool = True, + ) -> FlaxKarrasVeOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + state (`KarrasVeSchedulerState`): the `FlaxKarrasVeScheduler` state data class. + model_output (`torch.Tensor` or `np.ndarray`): direct output from learned diffusion model. + sigma_hat (`float`): TODO + sigma_prev (`float`): TODO + sample_hat (`torch.Tensor` or `np.ndarray`): TODO + return_dict (`bool`): option for returning tuple rather than FlaxKarrasVeOutput class + + Returns: + [`~schedulers.scheduling_karras_ve_flax.FlaxKarrasVeOutput`] or `tuple`: Updated sample in the diffusion + chain and derivative. [`~schedulers.scheduling_karras_ve_flax.FlaxKarrasVeOutput`] if `return_dict` is + True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. + """ + + pred_original_sample = sample_hat + sigma_hat * model_output + derivative = (sample_hat - pred_original_sample) / sigma_hat + sample_prev = sample_hat + (sigma_prev - sigma_hat) * derivative + + if not return_dict: + return (sample_prev, derivative, state) + + return FlaxKarrasVeOutput(prev_sample=sample_prev, derivative=derivative, state=state) + + def step_correct( + self, + state: KarrasVeSchedulerState, + model_output: jnp.ndarray, + sigma_hat: float, + sigma_prev: float, + sample_hat: jnp.ndarray, + sample_prev: jnp.ndarray, + derivative: jnp.ndarray, + return_dict: bool = True, + ) -> FlaxKarrasVeOutput | tuple: + """ + Correct the predicted sample based on the output model_output of the network. TODO complete description + + Args: + state (`KarrasVeSchedulerState`): the `FlaxKarrasVeScheduler` state data class. + model_output (`torch.Tensor` or `np.ndarray`): direct output from learned diffusion model. + sigma_hat (`float`): TODO + sigma_prev (`float`): TODO + sample_hat (`torch.Tensor` or `np.ndarray`): TODO + sample_prev (`torch.Tensor` or `np.ndarray`): TODO + derivative (`torch.Tensor` or `np.ndarray`): TODO + return_dict (`bool`): option for returning tuple rather than FlaxKarrasVeOutput class + + Returns: + prev_sample (TODO): updated sample in the diffusion chain. derivative (TODO): TODO + + """ + pred_original_sample = sample_prev + sigma_prev * model_output + derivative_corr = (sample_prev - pred_original_sample) / sigma_prev + sample_prev = sample_hat + (sigma_prev - sigma_hat) * (0.5 * derivative + 0.5 * derivative_corr) + + if not return_dict: + return (sample_prev, derivative, state) + + return FlaxKarrasVeOutput(prev_sample=sample_prev, derivative=derivative, state=state) + + def add_noise(self, state: KarrasVeSchedulerState, original_samples, noise, timesteps): + raise NotImplementedError() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lcm.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lcm.py new file mode 100644 index 0000000000000000000000000000000000000000..e70bc4745a24b975efe9dc74e4c6ba9e832db3a2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lcm.py @@ -0,0 +1,736 @@ +# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class LCMSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + denoised: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class LCMScheduler(SchedulerMixin, ConfigMixin): + """ + `LCMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with + non-Markovian guidance. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. [`~ConfigMixin`] takes care of storing all config + attributes that are passed in the scheduler's `__init__` function, such as `num_train_timesteps`. They can be + accessed via `scheduler.config.num_train_timesteps`. [`SchedulerMixin`] provides general loading and saving + functionality via the [`SchedulerMixin.save_pretrained`] and [`~SchedulerMixin.from_pretrained`] functions. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + original_inference_steps (`int`, *optional*, defaults to 50): + The default number of inference steps used to generate a linearly-spaced timestep schedule, from which we + will ultimately take `num_inference_steps` evenly spaced timesteps to form the final timestep schedule. + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample for numerical stability. + clip_sample_range (`float`, defaults to 1.0): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + set_alpha_to_one (`bool`, defaults to `True`): + Each diffusion step uses the alphas product value at that step and at the previous one. For the final step + there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the alpha value at step 0. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True`. + timestep_spacing (`str`, defaults to `"leading"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + timestep_scaling (`float`, defaults to 10.0): + The factor the timesteps will be multiplied by when calculating the consistency model boundary conditions + `c_skip` and `c_out`. Increasing this will decrease the approximation error (although the approximation + error at the default of `10.0` is already pretty small). + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.00085, + beta_end: float = 0.012, + beta_schedule: str = "scaled_linear", + trained_betas: np.ndarray | list[float] | None = None, + original_inference_steps: int = 50, + clip_sample: bool = False, + clip_sample_range: float = 1.0, + set_alpha_to_one: bool = True, + steps_offset: int = 0, + prediction_type: str = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + timestep_spacing: str = "leading", + timestep_scaling: float = 10.0, + rescale_betas_zero_snr: bool = False, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this parameter simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) + self.custom_timesteps = False + + self._step_index = None + self._begin_index = None + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + @property + def step_index(self): + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device = None, + original_inference_steps: int | None = None, + timesteps: list[int] | None = None, + strength: int = 1.0, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. If used, + `timesteps` must be `None`. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + original_inference_steps (`int`, *optional*): + The original number of inference steps, which will be used to generate a linearly-spaced timestep + schedule (which is different from the standard `diffusers` implementation). We will then take + `num_inference_steps` timesteps from this schedule, evenly spaced in terms of indices, and use that as + our final timestep schedule. If not set, this will default to the `original_inference_steps` attribute. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default + timestep spacing strategy of equal spacing between timesteps on the training/distillation timestep + schedule is used. If `timesteps` is passed, `num_inference_steps` must be `None`. + """ + # 0. Check inputs + if num_inference_steps is None and timesteps is None: + raise ValueError("Must pass exactly one of `num_inference_steps` or `custom_timesteps`.") + + if num_inference_steps is not None and timesteps is not None: + raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.") + + # 1. Calculate the LCM original training/distillation timestep schedule. + original_steps = ( + original_inference_steps if original_inference_steps is not None else self.config.original_inference_steps + ) + + if original_steps > self.config.num_train_timesteps: + raise ValueError( + f"`original_steps`: {original_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + # LCM Timesteps Setting + # The skipping step parameter k from the paper. + k = self.config.num_train_timesteps // original_steps + # LCM Training/Distillation Steps Schedule + # Currently, only a linearly-spaced schedule is supported (same as in the LCM distillation scripts). + lcm_origin_timesteps = np.asarray(list(range(1, int(original_steps * strength) + 1))) * k - 1 + + # 2. Calculate the LCM inference timestep schedule. + if timesteps is not None: + # 2.1 Handle custom timestep schedules. + train_timesteps = set(lcm_origin_timesteps) + non_train_timesteps = [] + for i in range(1, len(timesteps)): + if timesteps[i] >= timesteps[i - 1]: + raise ValueError("`custom_timesteps` must be in descending order.") + + if timesteps[i] not in train_timesteps: + non_train_timesteps.append(timesteps[i]) + + if timesteps[0] >= self.config.num_train_timesteps: + raise ValueError( + f"`timesteps` must start before `self.config.train_timesteps`: {self.config.num_train_timesteps}." + ) + + # Raise warning if timestep schedule does not start with self.config.num_train_timesteps - 1 + if strength == 1.0 and timesteps[0] != self.config.num_train_timesteps - 1: + logger.warning( + f"The first timestep on the custom timestep schedule is {timesteps[0]}, not" + f" `self.config.num_train_timesteps - 1`: {self.config.num_train_timesteps - 1}. You may get" + f" unexpected results when using this timestep schedule." + ) + + # Raise warning if custom timestep schedule contains timesteps not on original timestep schedule + if non_train_timesteps: + logger.warning( + f"The custom timestep schedule contains the following timesteps which are not on the original" + f" training/distillation timestep schedule: {non_train_timesteps}. You may get unexpected results" + f" when using this timestep schedule." + ) + + # Raise warning if custom timestep schedule is longer than original_steps + if len(timesteps) > original_steps: + logger.warning( + f"The number of timesteps in the custom timestep schedule is {len(timesteps)}, which exceeds the" + f" the length of the timestep schedule used for training: {original_steps}. You may get some" + f" unexpected results when using this timestep schedule." + ) + + timesteps = np.array(timesteps, dtype=np.int64) + self.num_inference_steps = len(timesteps) + self.custom_timesteps = True + + # Apply strength (e.g. for img2img pipelines) (see StableDiffusionImg2ImgPipeline.get_timesteps) + init_timestep = min(int(self.num_inference_steps * strength), self.num_inference_steps) + t_start = max(self.num_inference_steps - init_timestep, 0) + timesteps = timesteps[t_start * self.order :] + # TODO: also reset self.num_inference_steps? + else: + # 2.2 Create the "standard" LCM inference timestep schedule. + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + skipping_step = len(lcm_origin_timesteps) // num_inference_steps + + if skipping_step < 1: + raise ValueError( + f"The combination of `original_steps x strength`: {original_steps} x {strength} is smaller than `num_inference_steps`: {num_inference_steps}. Make sure to either reduce `num_inference_steps` to a value smaller than {int(original_steps * strength)} or increase `strength` to a value higher than {float(num_inference_steps / original_steps)}." + ) + + self.num_inference_steps = num_inference_steps + + if num_inference_steps > original_steps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `original_inference_steps`:" + f" {original_steps} because the final timestep schedule will be a subset of the" + f" `original_inference_steps`-sized initial timestep schedule." + ) + + # LCM Inference Steps Schedule + lcm_origin_timesteps = lcm_origin_timesteps[::-1].copy() + # Select (approximately) evenly spaced indices from lcm_origin_timesteps. + inference_indices = np.linspace(0, len(lcm_origin_timesteps), num=num_inference_steps, endpoint=False) + inference_indices = np.floor(inference_indices).astype(np.int64) + timesteps = lcm_origin_timesteps[inference_indices] + + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.long) + + self._step_index = None + self._begin_index = None + + def get_scalings_for_boundary_condition_discrete(self, timestep): + self.sigma_data = 0.5 # Default: 0.5 + scaled_timestep = timestep * self.config.timestep_scaling + + c_skip = self.sigma_data**2 / (scaled_timestep**2 + self.sigma_data**2) + c_out = scaled_timestep / (scaled_timestep**2 + self.sigma_data**2) ** 0.5 + return c_skip, c_out + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> LCMSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`float`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] or `tuple`. + Returns: + [`~schedulers.scheduling_utils.LCMSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_lcm.LCMSchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # 1. get previous step value + prev_step_index = self.step_index + 1 + if prev_step_index < len(self.timesteps): + prev_timestep = self.timesteps[prev_step_index] + else: + prev_timestep = timestep + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # 3. Get scalings for boundary conditions + c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep) + + # 4. Compute the predicted original sample x_0 based on the model parameterization + if self.config.prediction_type == "epsilon": # noise-prediction + predicted_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt() + elif self.config.prediction_type == "sample": # x-prediction + predicted_original_sample = model_output + elif self.config.prediction_type == "v_prediction": # v-prediction + predicted_original_sample = alpha_prod_t.sqrt() * sample - beta_prod_t.sqrt() * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or" + " `v_prediction` for `LCMScheduler`." + ) + + # 5. Clip or threshold "predicted x_0" + if self.config.thresholding: + predicted_original_sample = self._threshold_sample(predicted_original_sample) + elif self.config.clip_sample: + predicted_original_sample = predicted_original_sample.clamp( + -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 6. Denoise model output using boundary conditions + denoised = c_out * predicted_original_sample + c_skip * sample + + # 7. Sample and inject noise z ~ N(0, I) for MultiStep Inference + # Noise is not used on the final timestep of the timestep schedule. + # This also means that noise is not used for one-step sampling. + if self.step_index != self.num_inference_steps - 1: + noise = randn_tensor( + model_output.shape, generator=generator, device=model_output.device, dtype=denoised.dtype + ) + prev_sample = alpha_prod_t_prev.sqrt() * denoised + beta_prod_t_prev.sqrt() * noise + else: + prev_sample = denoised + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample, denoised) + + return LCMSchedulerOutput(prev_sample=prev_sample, denoised=denoised) + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: + """ + Compute the velocity prediction from the sample and noise according to the velocity formula. + + Args: + sample (`torch.Tensor`): + The input sample. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps for velocity computation. + + Returns: + `torch.Tensor`: + The computed velocity. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as sample + self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) + timesteps = timesteps.to(sample.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self): + return self.config.num_train_timesteps + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep + def previous_timestep(self, timestep): + """ + Compute the previous timestep in the diffusion chain. + + Args: + timestep (`int`): + The current timestep. + + Returns: + `int` or `torch.Tensor`: + The previous timestep. + """ + if self.custom_timesteps or self.num_inference_steps: + index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] + if index == self.timesteps.shape[0] - 1: + prev_t = torch.tensor(-1) + else: + prev_t = self.timesteps[index + 1] + else: + prev_t = timestep - 1 + return prev_t diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lms_discrete.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lms_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..e917adda9516e840f4997f03afdef47c37082fbf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lms_discrete.py @@ -0,0 +1,675 @@ +# Copyright 2025 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import warnings +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, is_scipy_available +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin + + +if is_scipy_available(): + import scipy.stats + from scipy import integrate + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->LMSDiscrete +class LMSDiscreteSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): + """ + A linear multistep scheduler for discrete beta schedules. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to `1000`): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to `0.0001`): + The starting `beta` value of inference. + beta_end (`float`, defaults to `0.02`): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + prediction_type (`"epsilon"`, `"sample"`, or `"v_prediction"`, defaults to `"epsilon"`): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to `0`): + An offset added to the inference steps, as required by some model families. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", + timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace", + steps_offset: int = 0, + ): + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + sigmas = np.concatenate([sigmas[::-1], [0.0]]).astype(np.float32) + self.sigmas = torch.from_numpy(sigmas) + + # setable values + self.num_inference_steps = None + self.use_karras_sigmas = use_karras_sigmas + self.set_timesteps(num_train_timesteps, None) + self.derivatives = [] + self.is_scale_input_called = False + + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def init_noise_sigma(self) -> float | torch.Tensor: + """ + The standard deviation of the initial noise distribution. + + Returns: + `float` or `torch.Tensor`: + The standard deviation of the initial noise distribution, computed based on the maximum sigma value and + the timestep spacing configuration. + """ + # standard deviation of the initial noise distribution + if self.config.timestep_spacing in ["linspace", "trailing"]: + return self.sigmas.max() + + return (self.sigmas.max() ** 2 + 1) ** 0.5 + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase by 1 after each scheduler step. + + Returns: + `int` or `None`: + The current step index, or `None` if not initialized. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + + Returns: + `int` or `None`: + The begin index for the scheduler, or `None` if not set. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input(self, sample: torch.Tensor, timestep: float | torch.Tensor) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`float` or `torch.Tensor`): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + sample = sample / ((sigma**2 + 1) ** 0.5) + self.is_scale_input_called = True + return sample + + def get_lms_coefficient(self, order: int, t: int, current_order: int) -> float: + """ + Compute the linear multistep coefficient. + + Args: + order (`int`): + The order of the linear multistep method. + t (`int`): + The current timestep index. + current_order (`int`): + The current order for which to compute the coefficient. + + Returns: + `float`: + The computed linear multistep coefficient. + """ + + def lms_derivative(tau): + prod = 1.0 + for k in range(order): + if current_order == k: + continue + prod *= (tau - self.sigmas[t - k]) / (self.sigmas[t - current_order] - self.sigmas[t - k]) + return prod + + integrated_coeff = integrate.quad(lms_derivative, self.sigmas[t], self.sigmas[t + 1], epsrel=1e-4)[0] + + return integrated_coeff + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[ + ::-1 + ].copy() + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.float32) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(self.config.num_train_timesteps, 0, -step_ratio)).round().copy().astype(np.float32) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + + if self.config.use_karras_sigmas: + sigmas = self._convert_to_karras(in_sigmas=sigmas) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_exponential_sigmas: + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + elif self.config.use_beta_sigmas: + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + + sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) + + self.sigmas = torch.from_numpy(sigmas).to(device=device) + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.float32) + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + self.derivatives = [] + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + def _convert_to_karras(self, in_sigmas: torch.Tensor) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + sigma_min: float = in_sigmas[-1].item() + sigma_max: float = in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, self.num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def step( + self, + model_output: torch.Tensor, + timestep: float | torch.Tensor, + sample: torch.Tensor, + order: int = 4, + return_dict: bool = True, + ) -> LMSDiscreteSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`float` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + order (`int`, defaults to 4): + The order of the linear multistep method. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if not self.is_scale_input_called: + warnings.warn( + "The `scale_model_input` function should be called before `step` to ensure correct denoising. " + "See `StableDiffusionPipeline` for a usage example." + ) + + if self.step_index is None: + self._init_step_index(timestep) + + sigma = self.sigmas[self.step_index] + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + pred_original_sample = sample - sigma * model_output + elif self.config.prediction_type == "v_prediction": + # * c_out + input * c_skip + pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1)) + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + self.derivatives.append(derivative) + if len(self.derivatives) > order: + self.derivatives.pop(0) + + # 3. Compute linear multistep coefficients + order = min(self.step_index + 1, order) + lms_coeffs = [self.get_lms_coefficient(order, self.step_index, curr_order) for curr_order in range(order)] + + # 4. Compute previous sample based on the derivatives path + prev_sample = sample + sum( + coeff * derivative for coeff, derivative in zip(lms_coeffs, reversed(self.derivatives)) + ) + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return ( + prev_sample, + pred_original_sample, + ) + + return LMSDiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise tensor to add to the original samples. + timesteps (`torch.Tensor`): + The timesteps at which to add noise, determining the noise level from the schedule. + + Returns: + `torch.Tensor`: + The noisy samples with added noise scaled according to the timestep schedule. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + noisy_samples = original_samples + noise * sigma + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lms_discrete_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lms_discrete_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..65902678e1d9fff1061358e413a9043c9ffe0ac5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_lms_discrete_flax.py @@ -0,0 +1,307 @@ +# Copyright 2025 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +import flax +import jax.numpy as jnp +from scipy import integrate + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging +from .scheduling_utils_flax import ( + CommonSchedulerState, + FlaxKarrasDiffusionSchedulers, + FlaxSchedulerMixin, + FlaxSchedulerOutput, + broadcast_to_shape_from_left, +) + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class LMSDiscreteSchedulerState: + common: CommonSchedulerState + + # setable values + init_noise_sigma: jnp.ndarray + timesteps: jnp.ndarray + sigmas: jnp.ndarray + num_inference_steps: int = None + + # running values + derivatives: jnp.ndarray | None = None + + @classmethod + def create( + cls, + common: CommonSchedulerState, + init_noise_sigma: jnp.ndarray, + timesteps: jnp.ndarray, + sigmas: jnp.ndarray, + ): + return cls( + common=common, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + sigmas=sigmas, + ) + + +@dataclass +class FlaxLMSSchedulerOutput(FlaxSchedulerOutput): + state: LMSDiscreteSchedulerState + + +class FlaxLMSDiscreteScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + Linear Multistep Scheduler for discrete beta schedules. Based on the original k-diffusion implementation by + Katherine Crowson: + https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181 + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear` or `scaled_linear`. + trained_betas (`jnp.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + prediction_type (`str`, default `epsilon`, optional): + prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion + process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4 + https://huggingface.co/papers/2210.02303) + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + the `dtype` used for params and computation. + """ + + _compatibles = [e.name for e in FlaxKarrasDiffusionSchedulers] + + dtype: jnp.dtype + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: jnp.ndarray | None = None, + prediction_type: str = "epsilon", + dtype: jnp.dtype = jnp.float32, + ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + self.dtype = dtype + + def create_state(self, common: CommonSchedulerState | None = None) -> LMSDiscreteSchedulerState: + if common is None: + common = CommonSchedulerState.create(self) + + timesteps = jnp.arange(0, self.config.num_train_timesteps).round()[::-1] + sigmas = ((1 - common.alphas_cumprod) / common.alphas_cumprod) ** 0.5 + + # standard deviation of the initial noise distribution + init_noise_sigma = sigmas.max() + + return LMSDiscreteSchedulerState.create( + common=common, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + sigmas=sigmas, + ) + + def scale_model_input(self, state: LMSDiscreteSchedulerState, sample: jnp.ndarray, timestep: int) -> jnp.ndarray: + """ + Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the K-LMS algorithm. + + Args: + state (`LMSDiscreteSchedulerState`): + the `FlaxLMSDiscreteScheduler` state data class instance. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + timestep (`int`): + current discrete timestep in the diffusion chain. + + Returns: + `jnp.ndarray`: scaled input sample + """ + (step_index,) = jnp.where(state.timesteps == timestep, size=1) + step_index = step_index[0] + + sigma = state.sigmas[step_index] + sample = sample / ((sigma**2 + 1) ** 0.5) + return sample + + def get_lms_coefficient(self, state: LMSDiscreteSchedulerState, order, t, current_order): + """ + Compute a linear multistep coefficient. + + Args: + order (TODO): + t (TODO): + current_order (TODO): + """ + + def lms_derivative(tau): + prod = 1.0 + for k in range(order): + if current_order == k: + continue + prod *= (tau - state.sigmas[t - k]) / (state.sigmas[t - current_order] - state.sigmas[t - k]) + return prod + + integrated_coeff = integrate.quad(lms_derivative, state.sigmas[t], state.sigmas[t + 1], epsrel=1e-4)[0] + + return integrated_coeff + + def set_timesteps( + self, + state: LMSDiscreteSchedulerState, + num_inference_steps: int, + shape: tuple = (), + ) -> LMSDiscreteSchedulerState: + """ + Sets the timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`LMSDiscreteSchedulerState`): + the `FlaxLMSDiscreteScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + """ + + timesteps = jnp.linspace( + self.config.num_train_timesteps - 1, + 0, + num_inference_steps, + dtype=self.dtype, + ) + + low_idx = jnp.floor(timesteps).astype(jnp.int32) + high_idx = jnp.ceil(timesteps).astype(jnp.int32) + + frac = jnp.mod(timesteps, 1.0) + + sigmas = ((1 - state.common.alphas_cumprod) / state.common.alphas_cumprod) ** 0.5 + sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] + sigmas = jnp.concatenate([sigmas, jnp.array([0.0], dtype=self.dtype)]) + + timesteps = timesteps.astype(jnp.int32) + + # initial running values + derivatives = jnp.zeros((0,) + shape, dtype=self.dtype) + + return state.replace( + timesteps=timesteps, + sigmas=sigmas, + num_inference_steps=num_inference_steps, + derivatives=derivatives, + ) + + def step( + self, + state: LMSDiscreteSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + order: int = 4, + return_dict: bool = True, + ) -> FlaxLMSSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + state (`LMSDiscreteSchedulerState`): the `FlaxLMSDiscreteScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + order: coefficient for multi-step inference. + return_dict (`bool`): option for returning tuple rather than FlaxLMSSchedulerOutput class + + Returns: + [`FlaxLMSSchedulerOutput`] or `tuple`: [`FlaxLMSSchedulerOutput`] if `return_dict` is True, otherwise a + `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + if state.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + sigma = state.sigmas[timestep] + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + if self.config.prediction_type == "epsilon": + pred_original_sample = sample - sigma * model_output + elif self.config.prediction_type == "v_prediction": + # * c_out + input * c_skip + pred_original_sample = model_output * (-sigma / (sigma**2 + 1) ** 0.5) + (sample / (sigma**2 + 1)) + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`" + ) + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + state = state.replace(derivatives=jnp.append(state.derivatives, derivative)) + if len(state.derivatives) > order: + state = state.replace(derivatives=jnp.delete(state.derivatives, 0)) + + # 3. Compute linear multistep coefficients + order = min(timestep + 1, order) + lms_coeffs = [self.get_lms_coefficient(state, order, timestep, curr_order) for curr_order in range(order)] + + # 4. Compute previous sample based on the derivatives path + prev_sample = sample + sum( + coeff * derivative for coeff, derivative in zip(lms_coeffs, reversed(state.derivatives)) + ) + + if not return_dict: + return (prev_sample, state) + + return FlaxLMSSchedulerOutput(prev_sample=prev_sample, state=state) + + def add_noise( + self, + state: LMSDiscreteSchedulerState, + original_samples: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + sigma = state.sigmas[timesteps].flatten() + sigma = broadcast_to_shape_from_left(sigma, noise.shape) + + noisy_samples = original_samples + noise * sigma + + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ltx_euler_ancestral_rf.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ltx_euler_ancestral_rf.py new file mode 100644 index 0000000000000000000000000000000000000000..453c8515c301843f6efb490306cce95cf55e3872 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_ltx_euler_ancestral_rf.py @@ -0,0 +1,385 @@ +# Copyright 2025 Lightricks and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +LTXEulerAncestralRFScheduler + +This scheduler implements a K-diffusion style Euler-Ancestral sampler specialized for flow / CONST parameterization, +closely mirroring ComfyUI's `sample_euler_ancestral_RF` implementation used for LTX-Video. + +Reference implementation (ComfyUI): + comfy.k_diffusion.sampling.sample_euler_ancestral_RF +""" + +from dataclasses import dataclass + +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput, logging +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class LTXEulerAncestralRFSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.FloatTensor`): + Updated sample for the next step in the denoising process. + """ + + prev_sample: torch.FloatTensor + + +class LTXEulerAncestralRFScheduler(SchedulerMixin, ConfigMixin): + """ + Euler-Ancestral scheduler for LTX-Video (RF / CONST parametrization). + + This scheduler is intended for models where the network is trained with a CONST-like parameterization (as in LTXV / + FLUX). It approximates ComfyUI's `sample_euler_ancestral_RF` sampler and is useful when reproducing ComfyUI + workflows inside diffusers. + + The scheduler can either: + - reuse the [`FlowMatchEulerDiscreteScheduler`] sigma / timestep logic when only `num_inference_steps` is provided + (default diffusers-style usage), or + - follow an explicit ComfyUI-style sigma schedule when `sigmas` (or `timesteps`) are passed to [`set_timesteps`]. + + Args: + num_train_timesteps (`int`, defaults to 1000): + Included for config compatibility; not used to build the schedule. + eta (`float`, defaults to 1.0): + Stochasticity parameter. `eta=0.0` yields deterministic DDIM-like sampling; `eta=1.0` matches ComfyUI's + default RF behavior. + s_noise (`float`, defaults to 1.0): + Global scaling factor for the stochastic noise term. + """ + + # Allow config migration from the flow-match scheduler and back. + _compatibles = ["FlowMatchEulerDiscreteScheduler"] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + eta: float = 1.0, + s_noise: float = 1.0, + ): + # Note: num_train_timesteps is kept only for config compatibility. + self.num_inference_steps: int = None + self.sigmas: torch.Tensor | None = None + self.timesteps: torch.Tensor | None = None + self._step_index: int = None + self._begin_index: int = None + + @property + def step_index(self) -> int: + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It can be set from a pipeline with `set_begin_index` to support + image-to-image like workflows that start denoising part-way through the schedule. + """ + return self._begin_index + + def set_begin_index(self, begin_index: int = 0): + """ + Included for API compatibility; not strictly needed here but kept to allow pipelines that call + `set_begin_index`. + """ + self._begin_index = begin_index + + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Map a (continuous) `timestep` value to an index into `self.timesteps`. + + This follows the convention used in other discrete schedulers: if the same timestep value appears multiple + times in the schedule (which can happen when starting in the middle of the schedule), the *second* occurrence + is used for the first `step` call so that no sigma is accidentally skipped. + """ + if schedule_timesteps is None: + if self.timesteps is None: + raise ValueError("Timesteps have not been set. Call `set_timesteps` first.") + schedule_timesteps = self.timesteps + + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(schedule_timesteps.device) + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + if len(indices) == 0: + raise ValueError( + "Passed `timestep` is not in `self.timesteps`. Make sure to use values from `scheduler.timesteps`." + ) + + return indices[pos].item() + + def _init_step_index(self, timestep: float | torch.Tensor): + """ + Initialize the internal step index based on a given timestep. + """ + if self.timesteps is None: + raise ValueError("Timesteps have not been set. Call `set_timesteps` first.") + + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device | None = None, + sigmas: list[float] | torch.Tensor | None = None, + timesteps: list[float] | torch.Tensor | None = None, + mu: float | None = None, + **kwargs, + ): + """ + Set the sigma / timestep schedule for sampling. + + When `sigmas` or `timesteps` are provided explicitly, they are used as the RF sigma schedule (ComfyUI-style) + and are expected to include the terminal 0.0. When both are `None`, the scheduler reuses the + [`FlowMatchEulerDiscreteScheduler`] logic to generate sigmas from `num_inference_steps` and the stored config + (including any resolution-dependent shifting, Karras/beta schedules, etc.). + + Args: + num_inference_steps (`int`, *optional*): + Number of denoising steps. If provided together with explicit `sigmas`/`timesteps`, they are expected + to be consistent and are otherwise ignored with a warning. + device (`str` or `torch.device`, *optional*): + Device to move the internal tensors to. + sigmas (`list[float]` or `torch.Tensor`, *optional*): + Explicit sigma schedule, e.g. `[1.0, 0.99, ..., 0.0]`. + timesteps (`list[float]` or `torch.Tensor`, *optional*): + Optional alias for `sigmas`. If `sigmas` is None and `timesteps` is provided, timesteps are treated as + sigmas. + mu (`float`, *optional*): + Optional shift parameter used when delegating to [`FlowMatchEulerDiscreteScheduler.set_timesteps`] and + `config.use_dynamic_shifting` is `True`. + """ + # 1. Auto-generate schedule (FlowMatch-style) when no explicit sigmas/timesteps are given + if sigmas is None and timesteps is None: + if num_inference_steps is None: + raise ValueError( + "LTXEulerAncestralRFScheduler.set_timesteps requires either explicit `sigmas`/`timesteps` " + "or a `num_inference_steps` value." + ) + + # We reuse FlowMatchEulerDiscreteScheduler to construct a sigma schedule that is + # consistent with the original LTX training setup (including optional time shifting, + # Karras / exponential / beta schedules, etc.). + from .scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler + + base_scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.config) + base_scheduler.set_timesteps( + num_inference_steps=num_inference_steps, + device=device, + sigmas=None, + mu=mu, + timesteps=None, + ) + + self.num_inference_steps = base_scheduler.num_inference_steps + # Keep sigmas / timesteps on the requested device so step() can operate on-device without + # extra transfers. + self.sigmas = base_scheduler.sigmas.to(device=device) + self.timesteps = base_scheduler.timesteps.to(device=device) + self._step_index = None + self._begin_index = None + return + + # 2. Explicit sigma schedule (ComfyUI-style path) + if sigmas is None: + # `timesteps` is treated as sigmas in RF / flow-matching setups. + sigmas = timesteps + + if isinstance(sigmas, list): + sigmas_tensor = torch.tensor(sigmas, dtype=torch.float32) + elif isinstance(sigmas, torch.Tensor): + sigmas_tensor = sigmas.to(dtype=torch.float32) + else: + raise TypeError(f"`sigmas` must be a list or torch.Tensor, got {type(sigmas)}.") + + if sigmas_tensor.ndim != 1: + raise ValueError(f"`sigmas` must be a 1D tensor, got shape {tuple(sigmas_tensor.shape)}.") + + if sigmas_tensor[-1].abs().item() > 1e-6: + logger.warning( + "The last sigma in the schedule is not zero (%.6f). " + "For best compatibility with ComfyUI's RF sampler, the terminal sigma " + "should be 0.0.", + sigmas_tensor[-1].item(), + ) + + # Move to device once, then derive timesteps. + if device is not None: + sigmas_tensor = sigmas_tensor.to(device) + + # Internal sigma schedule stays in [0, 1] (as provided). + self.sigmas = sigmas_tensor + # Timesteps are scaled to match the training setup of LTX (FlowMatch-style), + # where the network expects timesteps on [0, num_train_timesteps]. + # This keeps the transformer conditioning in the expected range while the RF + # scheduler still operates on the raw sigma values. + num_train = float(getattr(self.config, "num_train_timesteps", 1000)) + self.timesteps = sigmas_tensor * num_train + + if num_inference_steps is not None and num_inference_steps != len(sigmas) - 1: + logger.warning( + "Provided `num_inference_steps=%d` does not match `len(sigmas)-1=%d`. " + "Overriding `num_inference_steps` with `len(sigmas)-1`.", + num_inference_steps, + len(sigmas) - 1, + ) + + self.num_inference_steps = len(sigmas) - 1 + self._step_index = None + self._begin_index = None + + def _sigma_broadcast(self, sigma: torch.Tensor, sample: torch.Tensor) -> torch.Tensor: + """ + Helper to broadcast a scalar sigma to the shape of `sample`. + """ + while sigma.ndim < sample.ndim: + sigma = sigma.view(*sigma.shape, 1) + return sigma + + def step( + self, + model_output: torch.FloatTensor, + timestep: float | torch.Tensor, + sample: torch.FloatTensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> LTXEulerAncestralRFSchedulerOutput | tuple[torch.FloatTensor]: + """ + Perform a single Euler-Ancestral RF update step. + + Args: + model_output (`torch.FloatTensor`): + Raw model output at the current step. Interpreted under the CONST parametrization as `v_t`, with + denoised state reconstructed as `x0 = x_t - sigma_t * v_t`. + timestep (`float` or `torch.Tensor`): + The current sigma value (must match one entry in `self.timesteps`). + sample (`torch.FloatTensor`): + Current latent sample `x_t`. + generator (`torch.Generator`, *optional*): + Optional generator for reproducible noise. + return_dict (`bool`): + If `True`, return a `LTXEulerAncestralRFSchedulerOutput`; otherwise return a tuple where the first + element is the updated sample. + """ + + if isinstance(timestep, (int, torch.IntTensor, torch.LongTensor)): + raise ValueError( + ( + "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" + " `LTXEulerAncestralRFScheduler.step()` is not supported. Make sure to pass" + " one of the `scheduler.timesteps` values as `timestep`." + ), + ) + + if self.sigmas is None or self.timesteps is None: + raise ValueError("Scheduler has not been initialized. Call `set_timesteps` before `step`.") + + if self._step_index is None: + self._init_step_index(timestep) + + i = self._step_index + if i >= len(self.sigmas) - 1: + # Already at the end; simply return the current sample. + prev_sample = sample + else: + # Work in float32 for numerical stability + sample_f = sample.to(torch.float32) + model_output_f = model_output.to(torch.float32) + + sigma = self.sigmas[i] + sigma_next = self.sigmas[i + 1] + + sigma_b = self._sigma_broadcast(sigma.view(1), sample_f) + sigma_next_b = self._sigma_broadcast(sigma_next.view(1), sample_f) + + # Approximate denoised x0 under CONST parametrization: + # x0 = x_t - sigma_t * v_t + denoised = sample_f - sigma_b * model_output_f + + if sigma_next.abs().item() < 1e-8: + # Final denoising step + x = denoised + else: + eta = float(self.config.eta) + s_noise = float(self.config.s_noise) + + # Downstep computation (ComfyUI RF variant) + downstep_ratio = 1.0 + (sigma_next / sigma - 1.0) * eta + sigma_down = sigma_next * downstep_ratio + + alpha_ip1 = 1.0 - sigma_next + alpha_down = 1.0 - sigma_down + + # Deterministic part (Euler step in (x, x0)-space) + sigma_down_b = self._sigma_broadcast(sigma_down.view(1), sample_f) + alpha_ip1_b = self._sigma_broadcast(alpha_ip1.view(1), sample_f) + alpha_down_b = self._sigma_broadcast(alpha_down.view(1), sample_f) + + sigma_ratio = sigma_down_b / sigma_b + x = sigma_ratio * sample_f + (1.0 - sigma_ratio) * denoised + + # Stochastic ancestral noise + if eta > 0.0 and s_noise > 0.0: + renoise_coeff = ( + (sigma_next_b**2 - sigma_down_b**2 * alpha_ip1_b**2 / (alpha_down_b**2 + 1e-12)) + .clamp(min=0.0) + .sqrt() + ) + + noise = randn_tensor( + sample_f.shape, generator=generator, device=sample_f.device, dtype=sample_f.dtype + ) + x = (alpha_ip1_b / (alpha_down_b + 1e-12)) * x + noise * renoise_coeff * s_noise + + prev_sample = x.to(sample.dtype) + + # Advance internal step index + self._step_index = min(self._step_index + 1, len(self.sigmas) - 1) + + if not return_dict: + return (prev_sample,) + + return LTXEulerAncestralRFSchedulerOutput(prev_sample=prev_sample) + + def __len__(self) -> int: + # For compatibility with other schedulers; used e.g. in some training + # utilities to infer the maximum number of training timesteps. + return int(getattr(self.config, "num_train_timesteps", 1000)) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_pndm.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_pndm.py new file mode 100644 index 0000000000000000000000000000000000000000..f87dff14375df852d0cdffe769f5c71586ef67b7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_pndm.py @@ -0,0 +1,504 @@ +# Copyright 2025 Zhejiang University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim + +import math +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class PNDMScheduler(SchedulerMixin, ConfigMixin): + """ + `PNDMScheduler` uses pseudo numerical methods for diffusion models such as the Runge-Kutta and linear multi-step + method. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to `1000`): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to `0.0001`): + The starting `beta` value of inference. + beta_end (`float`, defaults to `0.02`): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + skip_prk_steps (`bool`, defaults to `False`): + Allows the scheduler to skip the Runge-Kutta steps defined in the original paper as being required before + PLMS steps. + set_alpha_to_one (`bool`, defaults to `False`): + Each diffusion step uses the alphas product value at that step and at the previous one. For the final step + there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the alpha value at step 0. + prediction_type (`"epsilon"` or `"v_prediction"`, defaults to `"epsilon"`): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process) + or `v_prediction` (see section 2.4 of [Imagen Video](https://huggingface.co/papers/2210.02303) paper). + timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"leading"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to `0`): + An offset added to the inference steps, as required by some model families. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + skip_prk_steps: bool = False, + set_alpha_to_one: bool = False, + prediction_type: Literal["epsilon", "v_prediction"] = "epsilon", + timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading", + steps_offset: int = 0, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # For now we only support F-PNDM, i.e. the runge-kutta method + # For more information on the algorithm please take a look at the paper: https://huggingface.co/papers/2202.09778 + # mainly at formula (9), (12), (13) and the Algorithm 2. + self.pndm_order = 4 + + # running values + self.cur_model_output = 0 + self.counter = 0 + self.cur_sample = None + self.ets = [] + + # setable values + self.num_inference_steps = None + self._timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self.prk_timesteps = None + self.plms_timesteps = None + self.timesteps = None + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + + self.num_inference_steps = num_inference_steps + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + self._timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps).round().astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + self._timesteps = (np.arange(0, num_inference_steps) * step_ratio).round() + self._timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / self.num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + self._timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio))[::-1].astype( + np.int64 + ) + self._timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + if self.config.skip_prk_steps: + # for some models like stable diffusion the prk steps can/should be skipped to + # produce better results. When using PNDM with `self.config.skip_prk_steps` the implementation + # is based on crowsonkb's PLMS sampler implementation: https://github.com/CompVis/latent-diffusion/pull/51 + self.prk_timesteps = np.array([]) + self.plms_timesteps = np.concatenate([self._timesteps[:-1], self._timesteps[-2:-1], self._timesteps[-1:]])[ + ::-1 + ].copy() + else: + prk_timesteps = np.array(self._timesteps[-self.pndm_order :]).repeat(2) + np.tile( + np.array([0, self.config.num_train_timesteps // num_inference_steps // 2]), self.pndm_order + ) + self.prk_timesteps = (prk_timesteps[:-1].repeat(2)[1:-1])[::-1].copy() + self.plms_timesteps = self._timesteps[:-3][ + ::-1 + ].copy() # we copy to avoid having negative strides which are not supported by torch.from_numpy + + timesteps = np.concatenate([self.prk_timesteps, self.plms_timesteps]).astype(np.int64) + self.timesteps = torch.from_numpy(timesteps).to(device) + + self.ets = [] + self.counter = 0 + self.cur_model_output = 0 + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise), and calls [`~PNDMScheduler.step_prk`] + or [`~PNDMScheduler.step_plms`] depending on the internal variable `counter`. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.counter < len(self.prk_timesteps) and not self.config.skip_prk_steps: + return self.step_prk(model_output=model_output, timestep=timestep, sample=sample, return_dict=return_dict) + else: + return self.step_plms(model_output=model_output, timestep=timestep, sample=sample, return_dict=return_dict) + + def step_prk( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the Runge-Kutta method. It performs four forward passes to approximate the solution to the differential + equation. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + diff_to_prev = 0 if self.counter % 2 else self.config.num_train_timesteps // self.num_inference_steps // 2 + prev_timestep = timestep - diff_to_prev + timestep = self.prk_timesteps[self.counter // 4 * 4] + + if self.counter % 4 == 0: + self.cur_model_output += 1 / 6 * model_output + self.ets.append(model_output) + self.cur_sample = sample + elif (self.counter - 1) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 2) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 3) % 4 == 0: + model_output = self.cur_model_output + 1 / 6 * model_output + self.cur_model_output = 0 + + # cur_sample should not be `None` + cur_sample = self.cur_sample if self.cur_sample is not None else sample + + prev_sample = self._get_prev_sample(cur_sample, timestep, prev_timestep, model_output) + self.counter += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def step_plms( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the linear multistep method. It performs one forward pass multiple times to approximate the solution. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if not self.config.skip_prk_steps and len(self.ets) < 3: + raise ValueError( + f"{self.__class__} can only be run AFTER scheduler has been run " + "in 'prk' mode for at least 12 iterations " + "See: https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py " + "for more information." + ) + + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + if self.counter != 1: + self.ets = self.ets[-3:] + self.ets.append(model_output) + else: + prev_timestep = timestep + timestep = timestep + self.config.num_train_timesteps // self.num_inference_steps + + if len(self.ets) == 1 and self.counter == 0: + model_output = model_output + self.cur_sample = sample + elif len(self.ets) == 1 and self.counter == 1: + model_output = (model_output + self.ets[-1]) / 2 + sample = self.cur_sample + self.cur_sample = None + elif len(self.ets) == 2: + model_output = (3 * self.ets[-1] - self.ets[-2]) / 2 + elif len(self.ets) == 3: + model_output = (23 * self.ets[-1] - 16 * self.ets[-2] + 5 * self.ets[-3]) / 12 + else: + model_output = (1 / 24) * (55 * self.ets[-1] - 59 * self.ets[-2] + 37 * self.ets[-3] - 9 * self.ets[-4]) + + prev_sample = self._get_prev_sample(sample, timestep, prev_timestep, model_output) + self.counter += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def _get_prev_sample( + self, sample: torch.Tensor, timestep: int, prev_timestep: int, model_output: torch.Tensor + ) -> torch.Tensor: + """ + Compute the previous sample x_(t-δ) from the current sample x_t using formula (9) from the [PNDM + paper](https://huggingface.co/papers/2202.09778). + + Args: + sample (`torch.Tensor`): + The current sample x_t. + timestep (`int`): + The current timestep t. + prev_timestep (`int`): + The previous timestep (t-δ). + model_output (`torch.Tensor`): + The model output e_θ(x_t, t). + + Returns: + `torch.Tensor`: + The previous sample x_(t-δ). + """ + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + if self.config.prediction_type == "v_prediction": + model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + elif self.config.prediction_type != "epsilon": + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon` or `v_prediction`" + ) + + # corresponds to (α_(t−δ) - α_t) divided by + # denominator of x_t in formula (9) and plus 1 + # Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) = + # sqrt(α_(t−δ)) / sqrt(α_t)) + sample_coeff = (alpha_prod_t_prev / alpha_prod_t) ** (0.5) + + # corresponds to denominator of e_θ(x_t, t) in formula (9) + model_output_denom_coeff = alpha_prod_t * beta_prod_t_prev ** (0.5) + ( + alpha_prod_t * beta_prod_t * alpha_prod_t_prev + ) ** (0.5) + + # full formula (9) + prev_sample = ( + sample_coeff * sample - (alpha_prod_t_prev - alpha_prod_t) * model_output / model_output_denom_coeff + ) + + return prev_sample + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_pndm_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_pndm_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..377e2d35ba0af39a1543d32ef6772abf878cb8cd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_pndm_flax.py @@ -0,0 +1,528 @@ +# Copyright 2025 Zhejiang University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim + +from dataclasses import dataclass + +import flax +import jax +import jax.numpy as jnp + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging +from .scheduling_utils_flax import ( + CommonSchedulerState, + FlaxKarrasDiffusionSchedulers, + FlaxSchedulerMixin, + FlaxSchedulerOutput, + add_noise_common, +) + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class PNDMSchedulerState: + common: CommonSchedulerState + final_alpha_cumprod: jnp.ndarray + + # setable values + init_noise_sigma: jnp.ndarray + timesteps: jnp.ndarray + num_inference_steps: int = None + prk_timesteps: jnp.ndarray | None = None + plms_timesteps: jnp.ndarray | None = None + + # running values + cur_model_output: jnp.ndarray | None = None + counter: jnp.int32 | None = None + cur_sample: jnp.ndarray | None = None + ets: jnp.ndarray | None = None + + @classmethod + def create( + cls, + common: CommonSchedulerState, + final_alpha_cumprod: jnp.ndarray, + init_noise_sigma: jnp.ndarray, + timesteps: jnp.ndarray, + ): + return cls( + common=common, + final_alpha_cumprod=final_alpha_cumprod, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + ) + + +@dataclass +class FlaxPNDMSchedulerOutput(FlaxSchedulerOutput): + state: PNDMSchedulerState + + +class FlaxPNDMScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + Pseudo numerical methods for diffusion models (PNDM) proposes using more advanced ODE integration techniques, + namely Runge-Kutta method and a linear multi-step method. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + For more details, see the original paper: https://huggingface.co/papers/2202.09778 + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`jnp.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + skip_prk_steps (`bool`): + allows the scheduler to skip the Runge-Kutta steps that are defined in the original paper as being required + before plms steps; defaults to `False`. + set_alpha_to_one (`bool`, default `False`): + each diffusion step uses the value of alphas product at that step and at the previous one. For the final + step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the value of alpha at step 0. + steps_offset (`int`, default `0`): + An offset added to the inference steps, as required by some model families. + prediction_type (`str`, default `epsilon`, optional): + prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion + process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4 + https://huggingface.co/papers/2210.02303) + dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`): + the `dtype` used for params and computation. + """ + + _compatibles = [e.name for e in FlaxKarrasDiffusionSchedulers] + + dtype: jnp.dtype + pndm_order: int + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: jnp.ndarray | None = None, + skip_prk_steps: bool = False, + set_alpha_to_one: bool = False, + steps_offset: int = 0, + prediction_type: str = "epsilon", + dtype: jnp.dtype = jnp.float32, + ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + self.dtype = dtype + + # For now we only support F-PNDM, i.e. the runge-kutta method + # For more information on the algorithm please take a look at the paper: https://huggingface.co/papers/2202.09778 + # mainly at formula (9), (12), (13) and the Algorithm 2. + self.pndm_order = 4 + + def create_state(self, common: CommonSchedulerState | None = None) -> PNDMSchedulerState: + if common is None: + common = CommonSchedulerState.create(self) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this parameter simply to one or + # whether we use the final alpha of the "non-previous" one. + final_alpha_cumprod = ( + jnp.array(1.0, dtype=self.dtype) if self.config.set_alpha_to_one else common.alphas_cumprod[0] + ) + + # standard deviation of the initial noise distribution + init_noise_sigma = jnp.array(1.0, dtype=self.dtype) + + timesteps = jnp.arange(0, self.config.num_train_timesteps).round()[::-1] + + return PNDMSchedulerState.create( + common=common, + final_alpha_cumprod=final_alpha_cumprod, + init_noise_sigma=init_noise_sigma, + timesteps=timesteps, + ) + + def set_timesteps(self, state: PNDMSchedulerState, num_inference_steps: int, shape: tuple) -> PNDMSchedulerState: + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`PNDMSchedulerState`): + the `FlaxPNDMScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + shape (`tuple`): + the shape of the samples to be generated. + """ + + step_ratio = self.config.num_train_timesteps // num_inference_steps + # creates integer timesteps by multiplying by ratio + # rounding to avoid issues when num_inference_step is power of 3 + _timesteps = (jnp.arange(0, num_inference_steps) * step_ratio).round() + self.config.steps_offset + + if self.config.skip_prk_steps: + # for some models like stable diffusion the prk steps can/should be skipped to + # produce better results. When using PNDM with `self.config.skip_prk_steps` the implementation + # is based on crowsonkb's PLMS sampler implementation: https://github.com/CompVis/latent-diffusion/pull/51 + + prk_timesteps = jnp.array([], dtype=jnp.int32) + plms_timesteps = jnp.concatenate([_timesteps[:-1], _timesteps[-2:-1], _timesteps[-1:]])[::-1] + + else: + prk_timesteps = _timesteps[-self.pndm_order :].repeat(2) + jnp.tile( + jnp.array( + [0, self.config.num_train_timesteps // num_inference_steps // 2], + dtype=jnp.int32, + ), + self.pndm_order, + ) + + prk_timesteps = (prk_timesteps[:-1].repeat(2)[1:-1])[::-1] + plms_timesteps = _timesteps[:-3][::-1] + + timesteps = jnp.concatenate([prk_timesteps, plms_timesteps]) + + # initial running values + + cur_model_output = jnp.zeros(shape, dtype=self.dtype) + counter = jnp.int32(0) + cur_sample = jnp.zeros(shape, dtype=self.dtype) + ets = jnp.zeros((4,) + shape, dtype=self.dtype) + + return state.replace( + timesteps=timesteps, + num_inference_steps=num_inference_steps, + prk_timesteps=prk_timesteps, + plms_timesteps=plms_timesteps, + cur_model_output=cur_model_output, + counter=counter, + cur_sample=cur_sample, + ets=ets, + ) + + def scale_model_input( + self, + state: PNDMSchedulerState, + sample: jnp.ndarray, + timestep: int | None = None, + ) -> jnp.ndarray: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + state (`PNDMSchedulerState`): the `FlaxPNDMScheduler` state data class instance. + sample (`jnp.ndarray`): input sample + timestep (`int`, optional): current timestep + + Returns: + `jnp.ndarray`: scaled input sample + """ + return sample + + def step( + self, + state: PNDMSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + return_dict: bool = True, + ) -> FlaxPNDMSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + This function calls `step_prk()` or `step_plms()` depending on the internal variable `counter`. + + Args: + state (`PNDMSchedulerState`): the `FlaxPNDMScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + return_dict (`bool`): option for returning tuple rather than FlaxPNDMSchedulerOutput class + + Returns: + [`FlaxPNDMSchedulerOutput`] or `tuple`: [`FlaxPNDMSchedulerOutput`] if `return_dict` is True, otherwise a + `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + + if state.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.config.skip_prk_steps: + prev_sample, state = self.step_plms(state, model_output, timestep, sample) + else: + prk_prev_sample, prk_state = self.step_prk(state, model_output, timestep, sample) + plms_prev_sample, plms_state = self.step_plms(state, model_output, timestep, sample) + + cond = state.counter < len(state.prk_timesteps) + + prev_sample = jax.lax.select(cond, prk_prev_sample, plms_prev_sample) + + state = state.replace( + cur_model_output=jax.lax.select(cond, prk_state.cur_model_output, plms_state.cur_model_output), + ets=jax.lax.select(cond, prk_state.ets, plms_state.ets), + cur_sample=jax.lax.select(cond, prk_state.cur_sample, plms_state.cur_sample), + counter=jax.lax.select(cond, prk_state.counter, plms_state.counter), + ) + + if not return_dict: + return (prev_sample, state) + + return FlaxPNDMSchedulerOutput(prev_sample=prev_sample, state=state) + + def step_prk( + self, + state: PNDMSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + ) -> FlaxPNDMSchedulerOutput | tuple: + """ + Step function propagating the sample with the Runge-Kutta method. RK takes 4 forward passes to approximate the + solution to the differential equation. + + Args: + state (`PNDMSchedulerState`): the `FlaxPNDMScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + return_dict (`bool`): option for returning tuple rather than FlaxPNDMSchedulerOutput class + + Returns: + [`FlaxPNDMSchedulerOutput`] or `tuple`: [`FlaxPNDMSchedulerOutput`] if `return_dict` is True, otherwise a + `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + + if state.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + diff_to_prev = jnp.where( + state.counter % 2, + 0, + self.config.num_train_timesteps // state.num_inference_steps // 2, + ) + prev_timestep = timestep - diff_to_prev + timestep = state.prk_timesteps[state.counter // 4 * 4] + + model_output = jax.lax.select( + (state.counter % 4) != 3, + model_output, # remainder 0, 1, 2 + state.cur_model_output + 1 / 6 * model_output, # remainder 3 + ) + + state = state.replace( + cur_model_output=jax.lax.select_n( + state.counter % 4, + state.cur_model_output + 1 / 6 * model_output, # remainder 0 + state.cur_model_output + 1 / 3 * model_output, # remainder 1 + state.cur_model_output + 1 / 3 * model_output, # remainder 2 + jnp.zeros_like(state.cur_model_output), # remainder 3 + ), + ets=jax.lax.select( + (state.counter % 4) == 0, + state.ets.at[0:3].set(state.ets[1:4]).at[3].set(model_output), # remainder 0 + state.ets, # remainder 1, 2, 3 + ), + cur_sample=jax.lax.select( + (state.counter % 4) == 0, + sample, # remainder 0 + state.cur_sample, # remainder 1, 2, 3 + ), + ) + + cur_sample = state.cur_sample + prev_sample = self._get_prev_sample(state, cur_sample, timestep, prev_timestep, model_output) + state = state.replace(counter=state.counter + 1) + + return (prev_sample, state) + + def step_plms( + self, + state: PNDMSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + ) -> FlaxPNDMSchedulerOutput | tuple: + """ + Step function propagating the sample with the linear multi-step method. This has one forward pass with multiple + times to approximate the solution. + + Args: + state (`PNDMSchedulerState`): the `FlaxPNDMScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + return_dict (`bool`): option for returning tuple rather than FlaxPNDMSchedulerOutput class + + Returns: + [`FlaxPNDMSchedulerOutput`] or `tuple`: [`FlaxPNDMSchedulerOutput`] if `return_dict` is True, otherwise a + `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + + if state.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + # NOTE: There is no way to check in the jitted runtime if the prk mode was ran before + + prev_timestep = timestep - self.config.num_train_timesteps // state.num_inference_steps + prev_timestep = jnp.where(prev_timestep > 0, prev_timestep, 0) + + # Reference: + # if state.counter != 1: + # state.ets.append(model_output) + # else: + # prev_timestep = timestep + # timestep = timestep + self.config.num_train_timesteps // state.num_inference_steps + + prev_timestep = jnp.where(state.counter == 1, timestep, prev_timestep) + timestep = jnp.where( + state.counter == 1, + timestep + self.config.num_train_timesteps // state.num_inference_steps, + timestep, + ) + + # Reference: + # if len(state.ets) == 1 and state.counter == 0: + # model_output = model_output + # state.cur_sample = sample + # elif len(state.ets) == 1 and state.counter == 1: + # model_output = (model_output + state.ets[-1]) / 2 + # sample = state.cur_sample + # state.cur_sample = None + # elif len(state.ets) == 2: + # model_output = (3 * state.ets[-1] - state.ets[-2]) / 2 + # elif len(state.ets) == 3: + # model_output = (23 * state.ets[-1] - 16 * state.ets[-2] + 5 * state.ets[-3]) / 12 + # else: + # model_output = (1 / 24) * (55 * state.ets[-1] - 59 * state.ets[-2] + 37 * state.ets[-3] - 9 * state.ets[-4]) + + state = state.replace( + ets=jax.lax.select( + state.counter != 1, + state.ets.at[0:3].set(state.ets[1:4]).at[3].set(model_output), # counter != 1 + state.ets, # counter 1 + ), + cur_sample=jax.lax.select( + state.counter != 1, + sample, # counter != 1 + state.cur_sample, # counter 1 + ), + ) + + state = state.replace( + cur_model_output=jax.lax.select_n( + jnp.clip(state.counter, 0, 4), + model_output, # counter 0 + (model_output + state.ets[-1]) / 2, # counter 1 + (3 * state.ets[-1] - state.ets[-2]) / 2, # counter 2 + (23 * state.ets[-1] - 16 * state.ets[-2] + 5 * state.ets[-3]) / 12, # counter 3 + (1 / 24) + * (55 * state.ets[-1] - 59 * state.ets[-2] + 37 * state.ets[-3] - 9 * state.ets[-4]), # counter >= 4 + ), + ) + + sample = state.cur_sample + model_output = state.cur_model_output + prev_sample = self._get_prev_sample(state, sample, timestep, prev_timestep, model_output) + state = state.replace(counter=state.counter + 1) + + return (prev_sample, state) + + def _get_prev_sample(self, state: PNDMSchedulerState, sample, timestep, prev_timestep, model_output): + # See formula (9) of PNDM paper https://huggingface.co/papers/2202.09778 + # this function computes x_(t−δ) using the formula of (9) + # Note that x_t needs to be added to both sides of the equation + + # Notation ( -> + # alpha_prod_t -> α_t + # alpha_prod_t_prev -> α_(t−δ) + # beta_prod_t -> (1 - α_t) + # beta_prod_t_prev -> (1 - α_(t−δ)) + # sample -> x_t + # model_output -> e_θ(x_t, t) + # prev_sample -> x_(t−δ) + alpha_prod_t = state.common.alphas_cumprod[timestep] + alpha_prod_t_prev = jnp.where( + prev_timestep >= 0, + state.common.alphas_cumprod[prev_timestep], + state.final_alpha_cumprod, + ) + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + if self.config.prediction_type == "v_prediction": + model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + elif self.config.prediction_type != "epsilon": + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon` or `v_prediction`" + ) + + # corresponds to (α_(t−δ) - α_t) divided by + # denominator of x_t in formula (9) and plus 1 + # Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) = + # sqrt(α_(t−δ)) / sqrt(α_t)) + sample_coeff = (alpha_prod_t_prev / alpha_prod_t) ** (0.5) + + # corresponds to denominator of e_θ(x_t, t) in formula (9) + model_output_denom_coeff = alpha_prod_t * beta_prod_t_prev ** (0.5) + ( + alpha_prod_t * beta_prod_t * alpha_prod_t_prev + ) ** (0.5) + + # full formula (9) + prev_sample = ( + sample_coeff * sample - (alpha_prod_t_prev - alpha_prod_t) * model_output / model_output_denom_coeff + ) + + return prev_sample + + def add_noise( + self, + state: PNDMSchedulerState, + original_samples: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + return add_noise_common(state.common, original_samples, noise, timesteps) + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_repaint.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_repaint.py new file mode 100644 index 0000000000000000000000000000000000000000..43665f6c68e80679efe73b507a7514b40e3df225 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_repaint.py @@ -0,0 +1,373 @@ +# Copyright 2025 ETH Zurich Computer Vision Lab and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +@dataclass +class RePaintSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's step function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample (x_{0}) based on the model output from + the current timestep. `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class RePaintScheduler(SchedulerMixin, ConfigMixin): + """ + `RePaintScheduler` is a scheduler for DDPM inpainting inside a given mask. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, `squaredcos_cap_v2`, or `sigmoid`. + eta (`float`): + The weight of noise for added noise in diffusion step. If its value is between 0.0 and 1.0 it corresponds + to the DDIM scheduler, and if its value is between -0.0 and 1.0 it corresponds to the DDPM scheduler. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample between -1 and 1 for numerical stability. + + """ + + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + eta: float = 0.0, + trained_betas: np.ndarray | None = None, + clip_sample: bool = True, + ): + if trained_betas is not None: + self.betas = torch.from_numpy(trained_betas) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + elif beta_schedule == "sigmoid": + # GeoDiff sigmoid schedule + betas = torch.linspace(-6, 6, num_train_timesteps) + self.betas = torch.sigmoid(betas) * (beta_end - beta_start) + beta_start + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + self.one = torch.tensor(1.0) + + self.final_alpha_cumprod = torch.tensor(1.0) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy()) + + self.eta = eta + + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def set_timesteps( + self, + num_inference_steps: int, + jump_length: int = 10, + jump_n_sample: int = 10, + device: str | torch.device = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. If used, + `timesteps` must be `None`. + jump_length (`int`, defaults to 10): + The number of steps taken forward in time before going backward in time for a single jump (“j” in + RePaint paper). Take a look at Figure 9 and 10 in the paper. + jump_n_sample (`int`, defaults to 10): + The number of times to make a forward time jump for a given chosen time sample. Take a look at Figure 9 + and 10 in the paper. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + + """ + num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps) + self.num_inference_steps = num_inference_steps + + timesteps = [] + + jumps = {} + for j in range(0, num_inference_steps - jump_length, jump_length): + jumps[j] = jump_n_sample - 1 + + t = num_inference_steps + while t >= 1: + t = t - 1 + timesteps.append(t) + + if jumps.get(t, 0) > 0: + jumps[t] = jumps[t] - 1 + for _ in range(jump_length): + t = t + 1 + timesteps.append(t) + + timesteps = np.array(timesteps) * (self.config.num_train_timesteps // self.num_inference_steps) + self.timesteps = torch.from_numpy(timesteps).to(device) + + def _get_variance(self, t): + prev_timestep = t - self.config.num_train_timesteps // self.num_inference_steps + + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from + # https://huggingface.co/papers/2006.11239) and sample from it to get + # previous sample x_{t-1} ~ N(pred_prev_sample, variance) == add + # variance to pred_sample + # Is equivalent to formula (16) in https://huggingface.co/papers/2010.02502 + # without eta. + # variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t] + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + original_image: torch.Tensor, + mask: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> RePaintSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + original_image (`torch.Tensor`): + The original image to inpaint on. + mask (`torch.Tensor`): + The mask where a value of 0.0 indicates which part of the original image to inpaint. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_repaint.RePaintSchedulerOutput`] is returned, + otherwise a tuple is returned where the first element is the sample tensor. + + """ + t = timestep + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239 + pred_original_sample = (sample - beta_prod_t**0.5 * model_output) / alpha_prod_t**0.5 + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = torch.clamp(pred_original_sample, -1, 1) + + # We choose to follow RePaint Algorithm 1 to get x_{t-1}, however we + # substitute formula (7) in the algorithm coming from DDPM paper + # (formula (4) Algorithm 2 - Sampling) with formula (12) from DDIM paper. + # DDIM schedule gives the same results as DDPM with eta = 1.0 + # Noise is being reused in 7. and 8., but no impact on quality has + # been observed. + + # 5. Add noise + device = model_output.device + noise = randn_tensor(model_output.shape, generator=generator, device=device, dtype=model_output.dtype) + std_dev_t = self.eta * self._get_variance(timestep) ** 0.5 + + variance = 0 + if t > 0 and self.eta > 0: + variance = std_dev_t * noise + + # 6. compute "direction pointing to x_t" of formula (12) + # from https://huggingface.co/papers/2010.02502 + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** 0.5 * model_output + + # 7. compute x_{t-1} of formula (12) from https://huggingface.co/papers/2010.02502 + prev_unknown_part = alpha_prod_t_prev**0.5 * pred_original_sample + pred_sample_direction + variance + + # 8. Algorithm 1 Line 5 https://huggingface.co/papers/2201.09865 + # The computation reported in Algorithm 1 Line 5 is incorrect. Line 5 refers to formula (8a) of the same paper, + # which tells to sample from a Gaussian distribution with mean "(alpha_prod_t_prev**0.5) * original_image" + # and variance "(1 - alpha_prod_t_prev)". This means that the standard Gaussian distribution "noise" should be + # scaled by the square root of the variance (as it is done here), however Algorithm 1 Line 5 tells to scale by the variance. + prev_known_part = (alpha_prod_t_prev**0.5) * original_image + ((1 - alpha_prod_t_prev) ** 0.5) * noise + + # 9. Algorithm 1 Line 8 https://huggingface.co/papers/2201.09865 + pred_prev_sample = mask * prev_known_part + (1.0 - mask) * prev_unknown_part + + if not return_dict: + return ( + pred_prev_sample, + pred_original_sample, + ) + + return RePaintSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample) + + def undo_step(self, sample, timestep, generator=None): + n = self.config.num_train_timesteps // self.num_inference_steps + + for i in range(n): + beta = self.betas[timestep + i] + if sample.device.type == "mps": + # randn does not work reproducibly on mps + noise = randn_tensor(sample.shape, dtype=sample.dtype, generator=generator) + noise = noise.to(sample.device) + else: + noise = randn_tensor(sample.shape, generator=generator, device=sample.device, dtype=sample.dtype) + + # 10. Algorithm 1 Line 10 https://huggingface.co/papers/2201.09865 + sample = (1 - beta) ** 0.5 * sample + beta**0.5 * noise + + return sample + + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + raise NotImplementedError("Use `DDPMScheduler.add_noise()` to train for sampling with RePaint.") + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sasolver.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sasolver.py new file mode 100644 index 0000000000000000000000000000000000000000..28369e9e33841b3cf6dc9fb70e0e8304de5cebf1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sasolver.py @@ -0,0 +1,1338 @@ +# Copyright 2025 Shuchen Xue, etc. in University of Chinese Academy of Sciences Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: check https://huggingface.co/papers/2309.05019 +# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py + +import math +from typing import Callable, Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import deprecate, is_scipy_available +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput + + +if is_scipy_available(): + import scipy.stats + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class SASolverScheduler(SchedulerMixin, ConfigMixin): + """ + `SASolverScheduler` is a fast dedicated high-order solver for diffusion SDEs. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + predictor_order (`int`, defaults to 2): + The predictor order which can be `1` or `2` or `3` or '4'. It is recommended to use `predictor_order=2` for + guided sampling, and `predictor_order=3` for unconditional sampling. + corrector_order (`int`, defaults to 2): + The corrector order which can be `1` or `2` or `3` or '4'. It is recommended to use `corrector_order=2` for + guided sampling, and `corrector_order=3` for unconditional sampling. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + tau_func (`Callable`, *optional*): + Stochasticity during the sampling. Default in init is `lambda t: 1 if t >= 200 and t <= 800 else 0`. + SA-Solver will sample from vanilla diffusion ODE if tau_func is set to `lambda t: 0`. SA-Solver will sample + from vanilla diffusion SDE if tau_func is set to `lambda t: 1`. For more details, please check + https://huggingface.co/papers/2309.05019 + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True` and + `algorithm_type="dpmsolver++"`. + algorithm_type (`str`, defaults to `data_prediction`): + Algorithm type for the solver; can be `data_prediction` or `noise_prediction`. It is recommended to use + `data_prediction` with `solver_order=2` for guided sampling like in Stable Diffusion. + lower_order_final (`bool`, defaults to `True`): + Whether to use lower-order solvers in the final steps. Default = True. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + lambda_min_clipped (`float`, defaults to `-inf`): + Clipping threshold for the minimum value of `lambda(t)` for numerical stability. This is critical for the + cosine (`squaredcos_cap_v2`) noise schedule. + variance_type (`str`, *optional*): + Set to "learned" or "learned_range" for diffusion models that predict variance. If set, the model's output + contains the predicted Gaussian variance. + timestep_spacing (`str`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + predictor_order: int = 2, + corrector_order: int = 2, + prediction_type: str = "epsilon", + tau_func: Callable | None = None, + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: str = "data_prediction", + lower_order_final: bool = True, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + use_flow_sigmas: bool = False, + flow_shift: float = 1.0, + lambda_min_clipped: float = -float("inf"), + variance_type: str | None = None, + timestep_spacing: str = "linspace", + steps_offset: int = 0, + ): + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + # Currently we only support VP-type noise schedule + self.alpha_t = torch.sqrt(self.alphas_cumprod) + self.sigma_t = torch.sqrt(1 - self.alphas_cumprod) + self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t) + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + if algorithm_type not in ["data_prediction", "noise_prediction"]: + raise NotImplementedError(f"{algorithm_type} is not implemented for {self.__class__}") + + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps) + self.timestep_list = [None] * max(predictor_order, corrector_order - 1) + self.model_outputs = [None] * max(predictor_order, corrector_order - 1) + + if tau_func is None: + self.tau_func = lambda t: 1 if t >= 200 and t <= 800 else 0 + else: + self.tau_func = tau_func + self.predict_x0 = algorithm_type == "data_prediction" + self.lower_order_nums = 0 + self.last_sample = None + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def step_index(self): + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_timesteps(self, num_inference_steps: int = None, device: str | torch.device = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + # Clipping the minimum of all lambda(t) for numerical stability. + # This is critical for cosine (squaredcos_cap_v2) noise schedule. + clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped) + last_timestep = ((self.config.num_train_timesteps - clipped_idx).numpy()).item() + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, last_timestep - 1, num_inference_steps + 1).round()[::-1][:-1].copy().astype(np.int64) + ) + + elif self.config.timestep_spacing == "leading": + step_ratio = last_timestep // (num_inference_steps + 1) + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.arange(last_timestep, 0, -step_ratio).round().copy().astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + if self.config.use_karras_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round() + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_exponential_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_beta_sigmas: + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + elif self.config.use_flow_sigmas: + alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1) + sigmas = 1.0 - alphas + sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy() + timesteps = (sigmas * self.config.num_train_timesteps).copy() + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + else: + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + + self.sigmas = torch.from_numpy(sigmas) + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.int64) + + self.num_inference_steps = len(timesteps) + self.model_outputs = [ + None, + ] * max(self.config.predictor_order, self.config.corrector_order - 1) + self.lower_order_nums = 0 + self.last_sample = None + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma, log_sigmas): + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t + def _sigma_to_alpha_sigma_t(self, sigma): + """ + Convert sigma values to alpha_t and sigma_t values. + + Args: + sigma (`torch.Tensor`): + The sigma value(s) to convert. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A tuple containing (alpha_t, sigma_t) values. + """ + if self.config.use_flow_sigmas: + alpha_t = 1 - sigma + sigma_t = sigma + else: + alpha_t = 1 / ((sigma**2 + 1) ** 0.5) + sigma_t = sigma * alpha_t + + return alpha_t, sigma_t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def convert_model_output( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + """ + Convert the model output to the corresponding type the data_prediction/noise_prediction algorithm needs. + Noise_prediction is designed to discretize an integral of the noise prediction model, and data_prediction is + designed to discretize an integral of the data prediction model. + + > [!TIP] > The algorithm and model type are decoupled. You can use either data_prediction or noise_prediction + for both > noise prediction and data prediction models. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + # SA-Solver_data_prediction needs to solve an integral of the data prediction model. + if self.config.algorithm_type in ["data_prediction"]: + if self.config.prediction_type == "epsilon": + # SA-Solver only needs the "mean" output. + if self.config.variance_type in ["learned", "learned_range"]: + model_output = model_output[:, :3] + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + x0_pred = alpha_t * sample - sigma_t * model_output + elif self.config.prediction_type == "flow_prediction": + sigma_t = self.sigmas[self.step_index] + x0_pred = sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + "`v_prediction`, or `flow_prediction` for the SASolverScheduler." + ) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + return x0_pred + + # SA-Solver_noise_prediction needs to solve an integral of the noise prediction model. + elif self.config.algorithm_type in ["noise_prediction"]: + if self.config.prediction_type == "epsilon": + # SA-Solver only needs the "mean" output. + if self.config.variance_type in ["learned", "learned_range"]: + epsilon = model_output[:, :3] + else: + epsilon = model_output + elif self.config.prediction_type == "sample": + epsilon = (sample - alpha_t * model_output) / sigma_t + elif self.config.prediction_type == "v_prediction": + epsilon = alpha_t * model_output + sigma_t * sample + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction` for the SASolverScheduler." + ) + + if self.config.thresholding: + alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep] + x0_pred = (sample - sigma_t * epsilon) / alpha_t + x0_pred = self._threshold_sample(x0_pred) + epsilon = (sample - alpha_t * x0_pred) / sigma_t + + return epsilon + + def get_coefficients_exponential_negative(self, order, interval_start, interval_end): + """ + Calculate the integral of exp(-x) * x^order dx from interval_start to interval_end + """ + assert order in [0, 1, 2, 3], "order is only supported for 0, 1, 2 and 3" + + if order == 0: + return torch.exp(-interval_end) * (torch.exp(interval_end - interval_start) - 1) + elif order == 1: + return torch.exp(-interval_end) * ( + (interval_start + 1) * torch.exp(interval_end - interval_start) - (interval_end + 1) + ) + elif order == 2: + return torch.exp(-interval_end) * ( + (interval_start**2 + 2 * interval_start + 2) * torch.exp(interval_end - interval_start) + - (interval_end**2 + 2 * interval_end + 2) + ) + elif order == 3: + return torch.exp(-interval_end) * ( + (interval_start**3 + 3 * interval_start**2 + 6 * interval_start + 6) + * torch.exp(interval_end - interval_start) + - (interval_end**3 + 3 * interval_end**2 + 6 * interval_end + 6) + ) + + def get_coefficients_exponential_positive(self, order, interval_start, interval_end, tau): + """ + Calculate the integral of exp(x(1+tau^2)) * x^order dx from interval_start to interval_end + """ + assert order in [0, 1, 2, 3], "order is only supported for 0, 1, 2 and 3" + + # after change of variable(cov) + interval_end_cov = (1 + tau**2) * interval_end + interval_start_cov = (1 + tau**2) * interval_start + + if order == 0: + return ( + torch.exp(interval_end_cov) * (1 - torch.exp(-(interval_end_cov - interval_start_cov))) / (1 + tau**2) + ) + elif order == 1: + return ( + torch.exp(interval_end_cov) + * ( + (interval_end_cov - 1) + - (interval_start_cov - 1) * torch.exp(-(interval_end_cov - interval_start_cov)) + ) + / ((1 + tau**2) ** 2) + ) + elif order == 2: + return ( + torch.exp(interval_end_cov) + * ( + (interval_end_cov**2 - 2 * interval_end_cov + 2) + - (interval_start_cov**2 - 2 * interval_start_cov + 2) + * torch.exp(-(interval_end_cov - interval_start_cov)) + ) + / ((1 + tau**2) ** 3) + ) + elif order == 3: + return ( + torch.exp(interval_end_cov) + * ( + (interval_end_cov**3 - 3 * interval_end_cov**2 + 6 * interval_end_cov - 6) + - (interval_start_cov**3 - 3 * interval_start_cov**2 + 6 * interval_start_cov - 6) + * torch.exp(-(interval_end_cov - interval_start_cov)) + ) + / ((1 + tau**2) ** 4) + ) + + def lagrange_polynomial_coefficient(self, order, lambda_list): + """ + Calculate the coefficient of lagrange polynomial + """ + + assert order in [0, 1, 2, 3] + assert order == len(lambda_list) - 1 + if order == 0: + return [[1]] + elif order == 1: + return [ + [ + 1 / (lambda_list[0] - lambda_list[1]), + -lambda_list[1] / (lambda_list[0] - lambda_list[1]), + ], + [ + 1 / (lambda_list[1] - lambda_list[0]), + -lambda_list[0] / (lambda_list[1] - lambda_list[0]), + ], + ] + elif order == 2: + denominator1 = (lambda_list[0] - lambda_list[1]) * (lambda_list[0] - lambda_list[2]) + denominator2 = (lambda_list[1] - lambda_list[0]) * (lambda_list[1] - lambda_list[2]) + denominator3 = (lambda_list[2] - lambda_list[0]) * (lambda_list[2] - lambda_list[1]) + return [ + [ + 1 / denominator1, + (-lambda_list[1] - lambda_list[2]) / denominator1, + lambda_list[1] * lambda_list[2] / denominator1, + ], + [ + 1 / denominator2, + (-lambda_list[0] - lambda_list[2]) / denominator2, + lambda_list[0] * lambda_list[2] / denominator2, + ], + [ + 1 / denominator3, + (-lambda_list[0] - lambda_list[1]) / denominator3, + lambda_list[0] * lambda_list[1] / denominator3, + ], + ] + elif order == 3: + denominator1 = ( + (lambda_list[0] - lambda_list[1]) + * (lambda_list[0] - lambda_list[2]) + * (lambda_list[0] - lambda_list[3]) + ) + denominator2 = ( + (lambda_list[1] - lambda_list[0]) + * (lambda_list[1] - lambda_list[2]) + * (lambda_list[1] - lambda_list[3]) + ) + denominator3 = ( + (lambda_list[2] - lambda_list[0]) + * (lambda_list[2] - lambda_list[1]) + * (lambda_list[2] - lambda_list[3]) + ) + denominator4 = ( + (lambda_list[3] - lambda_list[0]) + * (lambda_list[3] - lambda_list[1]) + * (lambda_list[3] - lambda_list[2]) + ) + return [ + [ + 1 / denominator1, + (-lambda_list[1] - lambda_list[2] - lambda_list[3]) / denominator1, + ( + lambda_list[1] * lambda_list[2] + + lambda_list[1] * lambda_list[3] + + lambda_list[2] * lambda_list[3] + ) + / denominator1, + (-lambda_list[1] * lambda_list[2] * lambda_list[3]) / denominator1, + ], + [ + 1 / denominator2, + (-lambda_list[0] - lambda_list[2] - lambda_list[3]) / denominator2, + ( + lambda_list[0] * lambda_list[2] + + lambda_list[0] * lambda_list[3] + + lambda_list[2] * lambda_list[3] + ) + / denominator2, + (-lambda_list[0] * lambda_list[2] * lambda_list[3]) / denominator2, + ], + [ + 1 / denominator3, + (-lambda_list[0] - lambda_list[1] - lambda_list[3]) / denominator3, + ( + lambda_list[0] * lambda_list[1] + + lambda_list[0] * lambda_list[3] + + lambda_list[1] * lambda_list[3] + ) + / denominator3, + (-lambda_list[0] * lambda_list[1] * lambda_list[3]) / denominator3, + ], + [ + 1 / denominator4, + (-lambda_list[0] - lambda_list[1] - lambda_list[2]) / denominator4, + ( + lambda_list[0] * lambda_list[1] + + lambda_list[0] * lambda_list[2] + + lambda_list[1] * lambda_list[2] + ) + / denominator4, + (-lambda_list[0] * lambda_list[1] * lambda_list[2]) / denominator4, + ], + ] + + def get_coefficients_fn(self, order, interval_start, interval_end, lambda_list, tau): + assert order in [1, 2, 3, 4] + assert order == len(lambda_list), "the length of lambda list must be equal to the order" + coefficients = [] + lagrange_coefficient = self.lagrange_polynomial_coefficient(order - 1, lambda_list) + for i in range(order): + coefficient = 0 + for j in range(order): + if self.predict_x0: + coefficient += lagrange_coefficient[i][j] * self.get_coefficients_exponential_positive( + order - 1 - j, interval_start, interval_end, tau + ) + else: + coefficient += lagrange_coefficient[i][j] * self.get_coefficients_exponential_negative( + order - 1 - j, interval_start, interval_end + ) + coefficients.append(coefficient) + assert len(coefficients) == order, "the length of coefficients does not match the order" + return coefficients + + def stochastic_adams_bashforth_update( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor, + noise: torch.Tensor, + order: int, + tau: torch.Tensor, + **kwargs, + ) -> torch.Tensor: + """ + One step for the SA-Predictor. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model at the current timestep. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + order (`int`): + The order of SA-Predictor at this timestep. + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + prev_timestep = args[0] if len(args) > 0 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if noise is None: + if len(args) > 2: + noise = args[2] + else: + raise ValueError("missing `noise` as a required keyword argument") + if order is None: + if len(args) > 3: + order = args[3] + else: + raise ValueError("missing `order` as a required keyword argument") + if tau is None: + if len(args) > 4: + tau = args[4] + else: + raise ValueError("missing `tau` as a required keyword argument") + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + model_output_list = self.model_outputs + sigma_t, sigma_s0 = ( + self.sigmas[self.step_index + 1], + self.sigmas[self.step_index], + ) + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + + gradient_part = torch.zeros_like(sample) + h = lambda_t - lambda_s0 + lambda_list = [] + + for i in range(order): + si = self.step_index - i + alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) + lambda_si = torch.log(alpha_si) - torch.log(sigma_si) + lambda_list.append(lambda_si) + + gradient_coefficients = self.get_coefficients_fn(order, lambda_s0, lambda_t, lambda_list, tau) + + x = sample + + if self.predict_x0: + if ( + order == 2 + ): ## if order = 2 we do a modification that does not influence the convergence order similar to unipc. Note: This is used only for few steps sampling. + # The added term is O(h^3). Empirically we find it will slightly improve the image quality. + # ODE case + # gradient_coefficients[0] += 1.0 * torch.exp(lambda_t) * (h ** 2 / 2 - (h - 1 + torch.exp(-h))) / (ns.marginal_lambda(t_prev_list[-1]) - ns.marginal_lambda(t_prev_list[-2])) + # gradient_coefficients[1] -= 1.0 * torch.exp(lambda_t) * (h ** 2 / 2 - (h - 1 + torch.exp(-h))) / (ns.marginal_lambda(t_prev_list[-1]) - ns.marginal_lambda(t_prev_list[-2])) + temp_sigma = self.sigmas[self.step_index - 1] + temp_alpha_s, temp_sigma_s = self._sigma_to_alpha_sigma_t(temp_sigma) + temp_lambda_s = torch.log(temp_alpha_s) - torch.log(temp_sigma_s) + gradient_coefficients[0] += ( + 1.0 + * torch.exp((1 + tau**2) * lambda_t) + * (h**2 / 2 - (h * (1 + tau**2) - 1 + torch.exp((1 + tau**2) * (-h))) / ((1 + tau**2) ** 2)) + / (lambda_s0 - temp_lambda_s) + ) + gradient_coefficients[1] -= ( + 1.0 + * torch.exp((1 + tau**2) * lambda_t) + * (h**2 / 2 - (h * (1 + tau**2) - 1 + torch.exp((1 + tau**2) * (-h))) / ((1 + tau**2) ** 2)) + / (lambda_s0 - temp_lambda_s) + ) + + for i in range(order): + if self.predict_x0: + gradient_part += ( + (1 + tau**2) + * sigma_t + * torch.exp(-(tau**2) * lambda_t) + * gradient_coefficients[i] + * model_output_list[-(i + 1)] + ) + else: + gradient_part += -(1 + tau**2) * alpha_t * gradient_coefficients[i] * model_output_list[-(i + 1)] + + if self.predict_x0: + noise_part = sigma_t * torch.sqrt(1 - torch.exp(-2 * tau**2 * h)) * noise + else: + noise_part = tau * sigma_t * torch.sqrt(torch.exp(2 * h) - 1) * noise + + if self.predict_x0: + x_t = torch.exp(-(tau**2) * h) * (sigma_t / sigma_s0) * x + gradient_part + noise_part + else: + x_t = (alpha_t / alpha_s0) * x + gradient_part + noise_part + + x_t = x_t.to(x.dtype) + return x_t + + def stochastic_adams_moulton_update( + self, + this_model_output: torch.Tensor, + *args, + last_sample: torch.Tensor, + last_noise: torch.Tensor, + this_sample: torch.Tensor, + order: int, + tau: torch.Tensor, + **kwargs, + ) -> torch.Tensor: + """ + One step for the SA-Corrector. + + Args: + this_model_output (`torch.Tensor`): + The model outputs at `x_t`. + this_timestep (`int`): + The current timestep `t`. + last_sample (`torch.Tensor`): + The generated sample before the last predictor `x_{t-1}`. + this_sample (`torch.Tensor`): + The generated sample after the last predictor `x_{t}`. + order (`int`): + The order of SA-Corrector at this step. + + Returns: + `torch.Tensor`: + The corrected sample tensor at the current timestep. + """ + + this_timestep = args[0] if len(args) > 0 else kwargs.pop("this_timestep", None) + if last_sample is None: + if len(args) > 1: + last_sample = args[1] + else: + raise ValueError("missing `last_sample` as a required keyword argument") + if last_noise is None: + if len(args) > 2: + last_noise = args[2] + else: + raise ValueError("missing `last_noise` as a required keyword argument") + if this_sample is None: + if len(args) > 3: + this_sample = args[3] + else: + raise ValueError("missing `this_sample` as a required keyword argument") + if order is None: + if len(args) > 4: + order = args[4] + else: + raise ValueError("missing `order` as a required keyword argument") + if tau is None: + if len(args) > 5: + tau = args[5] + else: + raise ValueError("missing `tau` as a required keyword argument") + if this_timestep is not None: + deprecate( + "this_timestep", + "1.0.0", + "Passing `this_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + model_output_list = self.model_outputs + sigma_t, sigma_s0 = ( + self.sigmas[self.step_index], + self.sigmas[self.step_index - 1], + ) + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + gradient_part = torch.zeros_like(this_sample) + h = lambda_t - lambda_s0 + lambda_list = [] + for i in range(order): + si = self.step_index - i + alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) + lambda_si = torch.log(alpha_si) - torch.log(sigma_si) + lambda_list.append(lambda_si) + + model_prev_list = model_output_list + [this_model_output] + + gradient_coefficients = self.get_coefficients_fn(order, lambda_s0, lambda_t, lambda_list, tau) + + x = last_sample + + if self.predict_x0: + if ( + order == 2 + ): ## if order = 2 we do a modification that does not influence the convergence order similar to UniPC. Note: This is used only for few steps sampling. + # The added term is O(h^3). Empirically we find it will slightly improve the image quality. + # ODE case + # gradient_coefficients[0] += 1.0 * torch.exp(lambda_t) * (h / 2 - (h - 1 + torch.exp(-h)) / h) + # gradient_coefficients[1] -= 1.0 * torch.exp(lambda_t) * (h / 2 - (h - 1 + torch.exp(-h)) / h) + gradient_coefficients[0] += ( + 1.0 + * torch.exp((1 + tau**2) * lambda_t) + * (h / 2 - (h * (1 + tau**2) - 1 + torch.exp((1 + tau**2) * (-h))) / ((1 + tau**2) ** 2 * h)) + ) + gradient_coefficients[1] -= ( + 1.0 + * torch.exp((1 + tau**2) * lambda_t) + * (h / 2 - (h * (1 + tau**2) - 1 + torch.exp((1 + tau**2) * (-h))) / ((1 + tau**2) ** 2 * h)) + ) + + for i in range(order): + if self.predict_x0: + gradient_part += ( + (1 + tau**2) + * sigma_t + * torch.exp(-(tau**2) * lambda_t) + * gradient_coefficients[i] + * model_prev_list[-(i + 1)] + ) + else: + gradient_part += -(1 + tau**2) * alpha_t * gradient_coefficients[i] * model_prev_list[-(i + 1)] + + if self.predict_x0: + noise_part = sigma_t * torch.sqrt(1 - torch.exp(-2 * tau**2 * h)) * last_noise + else: + noise_part = tau * sigma_t * torch.sqrt(torch.exp(2 * h) - 1) * last_noise + + if self.predict_x0: + x_t = torch.exp(-(tau**2) * h) * (sigma_t / sigma_s0) * x + gradient_part + noise_part + else: + x_t = (alpha_t / alpha_s0) * x + gradient_part + noise_part + + x_t = x_t.to(x.dtype) + return x_t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep + def index_for_timestep( + self, + timestep: int | torch.Tensor, + schedule_timesteps: torch.Tensor | None = None, + ) -> int: + """ + Find the index for a given timestep in the schedule. + + Args: + timestep (`int` or `torch.Tensor`): + The timestep for which to find the index. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + index_candidates = (schedule_timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + return step_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._init_step_index + def _init_step_index(self, timestep): + """ + Initialize the step_index counter for the scheduler. + + Args: + timestep (`int` or `torch.Tensor`): + The current timestep for which to initialize the step index. + """ + + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + generator=None, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the SA-Solver. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + use_corrector = self.step_index > 0 and self.last_sample is not None + + model_output_convert = self.convert_model_output(model_output, sample=sample) + + if use_corrector: + current_tau = self.tau_func(self.timestep_list[-1]) + sample = self.stochastic_adams_moulton_update( + this_model_output=model_output_convert, + last_sample=self.last_sample, + last_noise=self.last_noise, + this_sample=sample, + order=self.this_corrector_order, + tau=current_tau, + ) + + for i in range(max(self.config.predictor_order, self.config.corrector_order - 1) - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.timestep_list[i] = self.timestep_list[i + 1] + + self.model_outputs[-1] = model_output_convert + self.timestep_list[-1] = timestep + + noise = randn_tensor( + model_output.shape, + generator=generator, + device=model_output.device, + dtype=model_output.dtype, + ) + + if self.config.lower_order_final: + this_predictor_order = min(self.config.predictor_order, len(self.timesteps) - self.step_index) + this_corrector_order = min(self.config.corrector_order, len(self.timesteps) - self.step_index + 1) + else: + this_predictor_order = self.config.predictor_order + this_corrector_order = self.config.corrector_order + + self.this_predictor_order = min(this_predictor_order, self.lower_order_nums + 1) # warmup for multistep + self.this_corrector_order = min(this_corrector_order, self.lower_order_nums + 2) # warmup for multistep + assert self.this_predictor_order > 0 + assert self.this_corrector_order > 0 + + self.last_sample = sample + self.last_noise = noise + + current_tau = self.tau_func(self.timestep_list[-1]) + prev_sample = self.stochastic_adams_bashforth_update( + model_output=model_output_convert, + sample=sample, + noise=noise, + order=self.this_predictor_order, + tau=current_tau, + ) + + if self.lower_order_nums < max(self.config.predictor_order, self.config.corrector_order - 1): + self.lower_order_nums += 1 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_scm.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_scm.py new file mode 100644 index 0000000000000000000000000000000000000000..fb3bbc40d726cf7f824dcbd5fb14e9493649fb7e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_scm.py @@ -0,0 +1,286 @@ +# # Copyright 2025 Sana-Sprint Authors and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion + +from dataclasses import dataclass + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..schedulers.scheduling_utils import SchedulerMixin +from ..utils import BaseOutput, logging +from ..utils.torch_utils import randn_tensor + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->SCM +class SCMSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +class SCMScheduler(SchedulerMixin, ConfigMixin): + """ + `SCMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with + non-Markovian guidance. This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass + documentation for the generic methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + prediction_type (`str`, defaults to `trigflow`): + Prediction type of the scheduler function. Currently only supports "trigflow". + sigma_data (`float`, defaults to 0.5): + The standard deviation of the noise added during multi-step inference. + """ + + # _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + prediction_type: str = "trigflow", + sigma_data: float = 0.5, + ): + """ + Initialize the SCM scheduler. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + prediction_type (`str`, defaults to `trigflow`): + Prediction type of the scheduler function. Currently only supports "trigflow". + sigma_data (`float`, defaults to 0.5): + The standard deviation of the noise added during multi-step inference. + """ + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) + + self._step_index = None + self._begin_index = None + + @property + def step_index(self): + return self._step_index + + @property + def begin_index(self): + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_timesteps( + self, + num_inference_steps: int, + timesteps: torch.Tensor = None, + device: str | torch.device = None, + max_timesteps: float = 1.57080, + intermediate_timesteps: float = 1.3, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + timesteps (`torch.Tensor`, *optional*): + Custom timesteps to use for the denoising process. + max_timesteps (`float`, defaults to 1.57080): + The maximum timestep value used in the SCM scheduler. + intermediate_timesteps (`float`, *optional*, defaults to 1.3): + The intermediate timestep value used in SCM scheduler (only used when num_inference_steps=2). + """ + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + if timesteps is not None and len(timesteps) != num_inference_steps + 1: + raise ValueError("If providing custom timesteps, `timesteps` must be of length `num_inference_steps + 1`.") + + if timesteps is not None and max_timesteps is not None: + raise ValueError("If providing custom timesteps, `max_timesteps` should not be provided.") + + if timesteps is None and max_timesteps is None: + raise ValueError("Should provide either `timesteps` or `max_timesteps`.") + + if intermediate_timesteps is not None and num_inference_steps != 2: + raise ValueError("Intermediate timesteps for SCM is not supported when num_inference_steps != 2.") + + self.num_inference_steps = num_inference_steps + + if timesteps is not None: + if isinstance(timesteps, list): + self.timesteps = torch.tensor(timesteps, device=device).float() + elif isinstance(timesteps, torch.Tensor): + self.timesteps = timesteps.to(device).float() + else: + raise ValueError(f"Unsupported timesteps type: {type(timesteps)}") + elif intermediate_timesteps is not None: + self.timesteps = torch.tensor([max_timesteps, intermediate_timesteps, 0], device=device).float() + else: + # max_timesteps=arctan(80/0.5)=1.56454 is the default from sCM paper, we choose a different value here + self.timesteps = torch.linspace(max_timesteps, 0, num_inference_steps + 1, device=device).float() + + self._step_index = None + self._begin_index = None + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + def step( + self, + model_output: torch.FloatTensor, + timestep: float, + sample: torch.FloatTensor, + generator: torch.Generator = None, + return_dict: bool = True, + ) -> SCMSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.FloatTensor`): + The direct output from learned diffusion model. + timestep (`float`): + The current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_scm.SCMSchedulerOutput`] or `tuple`. + Returns: + [`~schedulers.scheduling_utils.SCMSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_scm.SCMSchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + # 2. compute alphas, betas + t = self.timesteps[self.step_index + 1] + s = self.timesteps[self.step_index] + + # 4. Different Parameterization: + parameterization = self.config.prediction_type + + if parameterization == "trigflow": + pred_x0 = torch.cos(s) * sample - torch.sin(s) * model_output + else: + raise ValueError(f"Unsupported parameterization: {parameterization}") + + # 5. Sample z ~ N(0, I), For MultiStep Inference + # Noise is not used for one-step sampling. + if len(self.timesteps) > 1: + noise = ( + randn_tensor(model_output.shape, device=model_output.device, generator=generator) + * self.config.sigma_data + ) + prev_sample = torch.cos(t) * pred_x0 + torch.sin(t) * noise + else: + prev_sample = pred_x0 + + self._step_index += 1 + + if not return_dict: + return (prev_sample, pred_x0) + + return SCMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_x0) + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sde_ve.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sde_ve.py new file mode 100644 index 0000000000000000000000000000000000000000..e4e37666c60d2cb598b401c4fa8578aa2d9ec68e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sde_ve.py @@ -0,0 +1,298 @@ +# Copyright 2025 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch + +import math +from dataclasses import dataclass + +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin, SchedulerOutput + + +@dataclass +class SdeVeOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + prev_sample_mean (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Mean averaged `prev_sample` over previous timesteps. + """ + + prev_sample: torch.Tensor + prev_sample_mean: torch.Tensor + + +class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin): + """ + `ScoreSdeVeScheduler` is a variance exploding stochastic differential equation (SDE) scheduler. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + snr (`float`, defaults to 0.15): + A coefficient weighting the step from the `model_output` sample (from the network) to the random noise. + sigma_min (`float`, defaults to 0.01): + The initial noise scale for the sigma sequence in the sampling procedure. The minimum sigma should mirror + the distribution of the data. + sigma_max (`float`, defaults to 1348.0): + The maximum value used for the range of continuous timesteps passed into the model. + sampling_eps (`float`, defaults to 1e-5): + The end value of sampling where timesteps decrease progressively from 1 to epsilon. + correct_steps (`int`, defaults to 1): + The number of correction steps performed on a produced sample. + """ + + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 2000, + snr: float = 0.15, + sigma_min: float = 0.01, + sigma_max: float = 1348.0, + sampling_eps: float = 1e-5, + correct_steps: int = 1, + ): + # standard deviation of the initial noise distribution + self.init_noise_sigma = sigma_max + + # setable values + self.timesteps = None + + self.set_sigmas(num_train_timesteps, sigma_min, sigma_max, sampling_eps) + + def scale_model_input(self, sample: torch.Tensor, timestep: int = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + def set_timesteps(self, num_inference_steps: int, sampling_eps: float = None, device: str | torch.device = None): + """ + Sets the continuous timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + sampling_eps (`float`, *optional*): + The final timestep value (overrides value given during scheduler instantiation). + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + + """ + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + + self.timesteps = torch.linspace(1, sampling_eps, num_inference_steps, device=device) + + def set_sigmas( + self, num_inference_steps: int, sigma_min: float = None, sigma_max: float = None, sampling_eps: float = None + ): + """ + Sets the noise scales used for the diffusion chain (to be run before inference). The sigmas control the weight + of the `drift` and `diffusion` components of the sample update. + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + sigma_min (`float`, optional): + The initial noise scale value (overrides value given during scheduler instantiation). + sigma_max (`float`, optional): + The final noise scale value (overrides value given during scheduler instantiation). + sampling_eps (`float`, optional): + The final timestep value (overrides value given during scheduler instantiation). + + """ + sigma_min = sigma_min if sigma_min is not None else self.config.sigma_min + sigma_max = sigma_max if sigma_max is not None else self.config.sigma_max + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + if self.timesteps is None: + self.set_timesteps(num_inference_steps, sampling_eps) + + self.sigmas = sigma_min * (sigma_max / sigma_min) ** (self.timesteps / sampling_eps) + self.discrete_sigmas = torch.exp(torch.linspace(math.log(sigma_min), math.log(sigma_max), num_inference_steps)) + self.sigmas = torch.tensor([sigma_min * (sigma_max / sigma_min) ** t for t in self.timesteps]) + + def get_adjacent_sigma(self, timesteps, t): + return torch.where( + timesteps == 0, + torch.zeros_like(t.to(timesteps.device)), + self.discrete_sigmas[timesteps - 1].to(timesteps.device), + ) + + def step_pred( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> SdeVeOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_sde_ve.SdeVeOutput`] is returned, otherwise a tuple + is returned where the first element is the sample tensor. + + """ + if self.timesteps is None: + raise ValueError( + "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler" + ) + + timestep = timestep * torch.ones( + sample.shape[0], device=sample.device + ) # torch.repeat_interleave(timestep, sample.shape[0]) + timesteps = (timestep * (len(self.timesteps) - 1)).long() + + # mps requires indices to be in the same device, so we use cpu as is the default with cuda + timesteps = timesteps.to(self.discrete_sigmas.device) + + sigma = self.discrete_sigmas[timesteps].to(sample.device) + adjacent_sigma = self.get_adjacent_sigma(timesteps, timestep).to(sample.device) + drift = torch.zeros_like(sample) + diffusion = (sigma**2 - adjacent_sigma**2) ** 0.5 + + # equation 6 in the paper: the model_output modeled by the network is grad_x log pt(x) + # also equation 47 shows the analog from SDE models to ancestral sampling methods + diffusion = diffusion.flatten() + while len(diffusion.shape) < len(sample.shape): + diffusion = diffusion.unsqueeze(-1) + drift = drift - diffusion**2 * model_output + + # equation 6: sample noise for the diffusion term of + noise = randn_tensor( + sample.shape, layout=sample.layout, generator=generator, device=sample.device, dtype=sample.dtype + ) + prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep + # TODO is the variable diffusion the correct scaling term for the noise? + prev_sample = prev_sample_mean + diffusion * noise # add impact of diffusion field g + + if not return_dict: + return (prev_sample, prev_sample_mean) + + return SdeVeOutput(prev_sample=prev_sample, prev_sample_mean=prev_sample_mean) + + def step_correct( + self, + model_output: torch.Tensor, + sample: torch.Tensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Correct the predicted sample based on the `model_output` of the network. This is often run repeatedly after + making the prediction for the previous timestep. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_sde_ve.SdeVeOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_sde_ve.SdeVeOutput`] is returned, otherwise a tuple + is returned where the first element is the sample tensor. + + """ + if self.timesteps is None: + raise ValueError( + "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler" + ) + + # For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z" + # sample noise for correction + noise = randn_tensor(sample.shape, layout=sample.layout, generator=generator).to(sample.device) + + # compute step size from the model_output, the noise, and the snr + grad_norm = torch.norm(model_output.reshape(model_output.shape[0], -1), dim=-1).mean() + noise_norm = torch.norm(noise.reshape(noise.shape[0], -1), dim=-1).mean() + step_size = (self.config.snr * noise_norm / grad_norm) ** 2 * 2 + step_size = step_size * torch.ones(sample.shape[0]).to(sample.device) + # self.repeat_scalar(step_size, sample.shape[0]) + + # compute corrected sample: model_output term and noise term + step_size = step_size.flatten() + while len(step_size.shape) < len(sample.shape): + step_size = step_size.unsqueeze(-1) + prev_sample_mean = sample + step_size * model_output + prev_sample = prev_sample_mean + ((step_size * 2) ** 0.5) * noise + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.Tensor, + ) -> torch.Tensor: + # Make sure sigmas and timesteps have the same device and dtype as original_samples + timesteps = timesteps.to(original_samples.device) + sigmas = self.discrete_sigmas.to(original_samples.device)[timesteps] + noise = ( + noise * sigmas[:, None, None, None] + if noise is not None + else torch.randn_like(original_samples) * sigmas[:, None, None, None] + ) + noisy_samples = noise + original_samples + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sde_ve_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sde_ve_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..62a54c7dc948061dd0512e04d2b33230b9abcb05 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_sde_ve_flax.py @@ -0,0 +1,294 @@ +# Copyright 2025 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch + +from dataclasses import dataclass + +import flax +import jax +import jax.numpy as jnp +from jax import random + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging +from .scheduling_utils_flax import ( + FlaxSchedulerMixin, + FlaxSchedulerOutput, + broadcast_to_shape_from_left, +) + + +logger = logging.get_logger(__name__) + + +@flax.struct.dataclass +class ScoreSdeVeSchedulerState: + # setable values + timesteps: jnp.ndarray | None = None + discrete_sigmas: jnp.ndarray | None = None + sigmas: jnp.ndarray | None = None + + @classmethod + def create(cls): + return cls() + + +@dataclass +class FlaxSdeVeOutput(FlaxSchedulerOutput): + """ + Output class for the ScoreSdeVeScheduler's step function output. + + Args: + state (`ScoreSdeVeSchedulerState`): + prev_sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + prev_sample_mean (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)` for images): + Mean averaged `prev_sample`. Same as `prev_sample`, only mean-averaged over previous timesteps. + """ + + state: ScoreSdeVeSchedulerState + prev_sample: jnp.ndarray + prev_sample_mean: jnp.ndarray | None = None + + +class FlaxScoreSdeVeScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + The variance exploding stochastic differential equation (SDE) scheduler. + + For more information, see the original paper: https://huggingface.co/papers/2011.13456 + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and + [`~SchedulerMixin.from_pretrained`] functions. + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + snr (`float`): + coefficient weighting the step from the model_output sample (from the network) to the random noise. + sigma_min (`float`): + initial noise scale for sigma sequence in sampling procedure. The minimum sigma should mirror the + distribution of the data. + sigma_max (`float`): maximum value used for the range of continuous timesteps passed into the model. + sampling_eps (`float`): the end value of sampling, where timesteps decrease progressively from 1 to + epsilon. + correct_steps (`int`): number of correction steps performed on a produced sample. + """ + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 2000, + snr: float = 0.15, + sigma_min: float = 0.01, + sigma_max: float = 1348.0, + sampling_eps: float = 1e-5, + correct_steps: int = 1, + ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + + def create_state(self): + state = ScoreSdeVeSchedulerState.create() + return self.set_sigmas( + state, + self.config.num_train_timesteps, + self.config.sigma_min, + self.config.sigma_max, + self.config.sampling_eps, + ) + + def set_timesteps( + self, + state: ScoreSdeVeSchedulerState, + num_inference_steps: int, + shape: tuple = (), + sampling_eps: float = None, + ) -> ScoreSdeVeSchedulerState: + """ + Sets the continuous timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`ScoreSdeVeSchedulerState`): the `FlaxScoreSdeVeScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + sampling_eps (`float`, optional): + final timestep value (overrides value given at Scheduler instantiation). + + """ + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + + timesteps = jnp.linspace(1, sampling_eps, num_inference_steps) + return state.replace(timesteps=timesteps) + + def set_sigmas( + self, + state: ScoreSdeVeSchedulerState, + num_inference_steps: int, + sigma_min: float = None, + sigma_max: float = None, + sampling_eps: float = None, + ) -> ScoreSdeVeSchedulerState: + """ + Sets the noise scales used for the diffusion chain. Supporting function to be run before inference. + + The sigmas control the weight of the `drift` and `diffusion` components of sample update. + + Args: + state (`ScoreSdeVeSchedulerState`): the `FlaxScoreSdeVeScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + sigma_min (`float`, optional): + initial noise scale value (overrides value given at Scheduler instantiation). + sigma_max (`float`, optional): + final noise scale value (overrides value given at Scheduler instantiation). + sampling_eps (`float`, optional): + final timestep value (overrides value given at Scheduler instantiation). + """ + sigma_min = sigma_min if sigma_min is not None else self.config.sigma_min + sigma_max = sigma_max if sigma_max is not None else self.config.sigma_max + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + if state.timesteps is None: + state = self.set_timesteps(state, num_inference_steps, sampling_eps) + + discrete_sigmas = jnp.exp(jnp.linspace(jnp.log(sigma_min), jnp.log(sigma_max), num_inference_steps)) + sigmas = jnp.array([sigma_min * (sigma_max / sigma_min) ** t for t in state.timesteps]) + + return state.replace(discrete_sigmas=discrete_sigmas, sigmas=sigmas) + + def get_adjacent_sigma(self, state, timesteps, t): + return jnp.where(timesteps == 0, jnp.zeros_like(t), state.discrete_sigmas[timesteps - 1]) + + def step_pred( + self, + state: ScoreSdeVeSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + key: jax.Array, + return_dict: bool = True, + ) -> FlaxSdeVeOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + state (`ScoreSdeVeSchedulerState`): the `FlaxScoreSdeVeScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + generator: random number generator. + return_dict (`bool`): option for returning tuple rather than FlaxSdeVeOutput class + + Returns: + [`FlaxSdeVeOutput`] or `tuple`: [`FlaxSdeVeOutput`] if `return_dict` is True, otherwise a `tuple`. When + returning a tuple, the first element is the sample tensor. + + """ + if state.timesteps is None: + raise ValueError( + "`state.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler" + ) + + timestep = timestep * jnp.ones( + sample.shape[0], + ) + timesteps = (timestep * (len(state.timesteps) - 1)).long() + + sigma = state.discrete_sigmas[timesteps] + adjacent_sigma = self.get_adjacent_sigma(state, timesteps, timestep) + drift = jnp.zeros_like(sample) + diffusion = (sigma**2 - adjacent_sigma**2) ** 0.5 + + # equation 6 in the paper: the model_output modeled by the network is grad_x log pt(x) + # also equation 47 shows the analog from SDE models to ancestral sampling methods + diffusion = diffusion.flatten() + diffusion = broadcast_to_shape_from_left(diffusion, sample.shape) + drift = drift - diffusion**2 * model_output + + # equation 6: sample noise for the diffusion term of + key = random.split(key, num=1) + noise = random.normal(key=key, shape=sample.shape) + prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep + # TODO is the variable diffusion the correct scaling term for the noise? + prev_sample = prev_sample_mean + diffusion * noise # add impact of diffusion field g + + if not return_dict: + return (prev_sample, prev_sample_mean, state) + + return FlaxSdeVeOutput(prev_sample=prev_sample, prev_sample_mean=prev_sample_mean, state=state) + + def step_correct( + self, + state: ScoreSdeVeSchedulerState, + model_output: jnp.ndarray, + sample: jnp.ndarray, + key: jax.Array, + return_dict: bool = True, + ) -> FlaxSdeVeOutput | tuple: + """ + Correct the predicted sample based on the output model_output of the network. This is often run repeatedly + after making the prediction for the previous timestep. + + Args: + state (`ScoreSdeVeSchedulerState`): the `FlaxScoreSdeVeScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + generator: random number generator. + return_dict (`bool`): option for returning tuple rather than FlaxSdeVeOutput class + + Returns: + [`FlaxSdeVeOutput`] or `tuple`: [`FlaxSdeVeOutput`] if `return_dict` is True, otherwise a `tuple`. When + returning a tuple, the first element is the sample tensor. + + """ + if state.timesteps is None: + raise ValueError( + "`state.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler" + ) + + # For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z" + # sample noise for correction + key = random.split(key, num=1) + noise = random.normal(key=key, shape=sample.shape) + + # compute step size from the model_output, the noise, and the snr + grad_norm = jnp.linalg.norm(model_output) + noise_norm = jnp.linalg.norm(noise) + step_size = (self.config.snr * noise_norm / grad_norm) ** 2 * 2 + step_size = step_size * jnp.ones(sample.shape[0]) + + # compute corrected sample: model_output term and noise term + step_size = step_size.flatten() + step_size = broadcast_to_shape_from_left(step_size, sample.shape) + prev_sample_mean = sample + step_size * model_output + prev_sample = prev_sample_mean + ((step_size * 2) ** 0.5) * noise + + if not return_dict: + return (prev_sample, state) + + return FlaxSdeVeOutput(prev_sample=prev_sample, state=state) + + def __len__(self): + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_tcd.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_tcd.py new file mode 100644 index 0000000000000000000000000000000000000000..838d040c04a618eb54e4950bed0c9b2ff4d2c73e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_tcd.py @@ -0,0 +1,791 @@ +# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion +# and https://github.com/hojonathanho/diffusion + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..schedulers.scheduling_utils import SchedulerMixin +from ..utils import BaseOutput, logging +from ..utils.torch_utils import randn_tensor + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +@dataclass +class TCDSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_noised_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted noised sample `(x_{s})` based on the model output from the current timestep. + """ + + prev_sample: torch.Tensor + pred_noised_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class TCDScheduler(SchedulerMixin, ConfigMixin): + """ + `TCDScheduler` incorporates the `Strategic Stochastic Sampling` introduced by the paper `Trajectory Consistency + Distillation`, extending the original Multistep Consistency Sampling to enable unrestricted trajectory traversal. + + This code is based on the official repo of TCD(https://github.com/jabir-zheng/TCD). + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. [`~ConfigMixin`] takes care of storing all config + attributes that are passed in the scheduler's `__init__` function, such as `num_train_timesteps`. They can be + accessed via `scheduler.config.num_train_timesteps`. [`SchedulerMixin`] provides general loading and saving + functionality via the [`SchedulerMixin.save_pretrained`] and [`~SchedulerMixin.from_pretrained`] functions. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + original_inference_steps (`int`, *optional*, defaults to 50): + The default number of inference steps used to generate a linearly-spaced timestep schedule, from which we + will ultimately take `num_inference_steps` evenly spaced timesteps to form the final timestep schedule. + clip_sample (`bool`, defaults to `True`): + Clip the predicted sample for numerical stability. + clip_sample_range (`float`, defaults to 1.0): + The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. + set_alpha_to_one (`bool`, defaults to `True`): + Each diffusion step uses the alphas product value at that step and at the previous one. For the final step + there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, + otherwise it uses the alpha value at step 0. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper). + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True`. + timestep_spacing (`str`, defaults to `"leading"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + timestep_scaling (`float`, defaults to 10.0): + The factor the timesteps will be multiplied by when calculating the consistency model boundary conditions + `c_skip` and `c_out`. Increasing this will decrease the approximation error (although the approximation + error at the default of `10.0` is already pretty small). + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.00085, + beta_end: float = 0.012, + beta_schedule: str = "scaled_linear", + trained_betas: np.ndarray | list[float] | None = None, + original_inference_steps: int = 50, + clip_sample: bool = False, + clip_sample_range: float = 1.0, + set_alpha_to_one: bool = True, + steps_offset: int = 0, + prediction_type: str = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + timestep_spacing: str = "leading", + timestep_scaling: float = 10.0, + rescale_betas_zero_snr: bool = False, + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + + # Rescale for zero SNR + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this parameter simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) + self.custom_timesteps = False + + self._step_index = None + self._begin_index = None + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep + def index_for_timestep( + self, timestep: float | torch.Tensor, schedule_timesteps: torch.Tensor | None = None + ) -> int: + """ + Find the index of a given timestep in the timestep schedule. + + Args: + timestep (`float` or `torch.Tensor`): + The timestep value to find in the schedule. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. For the very first step, returns the second index if + multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image). + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + indices = (schedule_timesteps == timestep).nonzero() + + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + pos = 1 if len(indices) > 1 else 0 + + return indices[pos].item() + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index + def _init_step_index(self, timestep: float | torch.Tensor) -> None: + """ + Initialize the step index for the scheduler based on the given timestep. + + Args: + timestep (`float` or `torch.Tensor`): + The current timestep to initialize the step index from. + """ + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + @property + def step_index(self): + return self._step_index + + @property + def begin_index(self): + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0): + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + timestep (`int`, *optional*): + The current timestep in the diffusion chain. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler._get_variance + def _get_variance(self, timestep, prev_timestep): + """ + Computes the variance of the noise added at a given diffusion step. + + For a given `timestep` and its previous step, this method calculates the variance as defined in DDIM/DDPM + literature: + var_t = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + where alpha_prod and beta_prod are cumulative products of alphas and betas, respectively. + + Args: + timestep (`int`): + The current timestep in the diffusion process. + prev_timestep (`int`): + The previous timestep in the diffusion process. If negative, uses `final_alpha_cumprod`. + + Returns: + `torch.Tensor`: + The variance for the current timestep. + """ + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device = None, + original_inference_steps: int | None = None, + timesteps: list[int] | None = None, + strength: float = 1.0, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`, *optional*): + The number of diffusion steps used when generating samples with a pre-trained model. If used, + `timesteps` must be `None`. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + original_inference_steps (`int`, *optional*): + The original number of inference steps, which will be used to generate a linearly-spaced timestep + schedule (which is different from the standard `diffusers` implementation). We will then take + `num_inference_steps` timesteps from this schedule, evenly spaced in terms of indices, and use that as + our final timestep schedule. If not set, this will default to the `original_inference_steps` attribute. + timesteps (`list[int]`, *optional*): + Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default + timestep spacing strategy of equal spacing between timesteps on the training/distillation timestep + schedule is used. If `timesteps` is passed, `num_inference_steps` must be `None`. + strength (`float`, *optional*, defaults to 1.0): + Used to determine the number of timesteps used for inference when using img2img, inpaint, etc. + """ + # 0. Check inputs + if num_inference_steps is None and timesteps is None: + raise ValueError("Must pass exactly one of `num_inference_steps` or `custom_timesteps`.") + + if num_inference_steps is not None and timesteps is not None: + raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.") + + # 1. Calculate the TCD original training/distillation timestep schedule. + original_steps = ( + original_inference_steps if original_inference_steps is not None else self.config.original_inference_steps + ) + + if original_inference_steps is None: + # default option, timesteps align with discrete inference steps + if original_steps > self.config.num_train_timesteps: + raise ValueError( + f"`original_steps`: {original_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + # TCD Timesteps Setting + # The skipping step parameter k from the paper. + k = self.config.num_train_timesteps // original_steps + # TCD Training/Distillation Steps Schedule + tcd_origin_timesteps = np.asarray(list(range(1, int(original_steps * strength) + 1))) * k - 1 + else: + # customised option, sampled timesteps can be any arbitrary value + tcd_origin_timesteps = np.asarray(list(range(0, int(self.config.num_train_timesteps * strength)))) + + # 2. Calculate the TCD inference timestep schedule. + if timesteps is not None: + # 2.1 Handle custom timestep schedules. + train_timesteps = set(tcd_origin_timesteps) + non_train_timesteps = [] + for i in range(1, len(timesteps)): + if timesteps[i] >= timesteps[i - 1]: + raise ValueError("`custom_timesteps` must be in descending order.") + + if timesteps[i] not in train_timesteps: + non_train_timesteps.append(timesteps[i]) + + if timesteps[0] >= self.config.num_train_timesteps: + raise ValueError( + f"`timesteps` must start before `self.config.train_timesteps`: {self.config.num_train_timesteps}." + ) + + # Raise warning if timestep schedule does not start with self.config.num_train_timesteps - 1 + if strength == 1.0 and timesteps[0] != self.config.num_train_timesteps - 1: + logger.warning( + f"The first timestep on the custom timestep schedule is {timesteps[0]}, not" + f" `self.config.num_train_timesteps - 1`: {self.config.num_train_timesteps - 1}. You may get" + f" unexpected results when using this timestep schedule." + ) + + # Raise warning if custom timestep schedule contains timesteps not on original timestep schedule + if non_train_timesteps: + logger.warning( + f"The custom timestep schedule contains the following timesteps which are not on the original" + f" training/distillation timestep schedule: {non_train_timesteps}. You may get unexpected results" + f" when using this timestep schedule." + ) + + # Raise warning if custom timestep schedule is longer than original_steps + if original_steps is not None: + if len(timesteps) > original_steps: + logger.warning( + f"The number of timesteps in the custom timestep schedule is {len(timesteps)}, which exceeds the" + f" the length of the timestep schedule used for training: {original_steps}. You may get some" + f" unexpected results when using this timestep schedule." + ) + else: + if len(timesteps) > self.config.num_train_timesteps: + logger.warning( + f"The number of timesteps in the custom timestep schedule is {len(timesteps)}, which exceeds the" + f" the length of the timestep schedule used for training: {self.config.num_train_timesteps}. You may get some" + f" unexpected results when using this timestep schedule." + ) + + timesteps = np.array(timesteps, dtype=np.int64) + self.num_inference_steps = len(timesteps) + self.custom_timesteps = True + + # Apply strength (e.g. for img2img pipelines) (see StableDiffusionImg2ImgPipeline.get_timesteps) + init_timestep = min(int(self.num_inference_steps * strength), self.num_inference_steps) + t_start = max(self.num_inference_steps - init_timestep, 0) + timesteps = timesteps[t_start * self.order :] + # TODO: also reset self.num_inference_steps? + else: + # 2.2 Create the "standard" TCD inference timestep schedule. + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" + f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" + f" maximal {self.config.num_train_timesteps} timesteps." + ) + + if original_steps is not None: + skipping_step = len(tcd_origin_timesteps) // num_inference_steps + + if skipping_step < 1: + raise ValueError( + f"The combination of `original_steps x strength`: {original_steps} x {strength} is smaller than `num_inference_steps`: {num_inference_steps}. Make sure to either reduce `num_inference_steps` to a value smaller than {int(original_steps * strength)} or increase `strength` to a value higher than {float(num_inference_steps / original_steps)}." + ) + + self.num_inference_steps = num_inference_steps + + if original_steps is not None: + if num_inference_steps > original_steps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `original_inference_steps`:" + f" {original_steps} because the final timestep schedule will be a subset of the" + f" `original_inference_steps`-sized initial timestep schedule." + ) + else: + if num_inference_steps > self.config.num_train_timesteps: + raise ValueError( + f"`num_inference_steps`: {num_inference_steps} cannot be larger than `num_train_timesteps`:" + f" {self.config.num_train_timesteps} because the final timestep schedule will be a subset of the" + f" `num_train_timesteps`-sized initial timestep schedule." + ) + + # TCD Inference Steps Schedule + tcd_origin_timesteps = tcd_origin_timesteps[::-1].copy() + # Select (approximately) evenly spaced indices from tcd_origin_timesteps. + inference_indices = np.linspace(0, len(tcd_origin_timesteps), num=num_inference_steps, endpoint=False) + inference_indices = np.floor(inference_indices).astype(np.int64) + timesteps = tcd_origin_timesteps[inference_indices] + + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.long) + + self._step_index = None + self._begin_index = None + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + eta: float = 0.3, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> TCDSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + eta (`float`): + A stochastic parameter (referred to as `gamma` in the paper) used to control the stochasticity in every + step. When eta = 0, it represents deterministic sampling, whereas eta = 1 indicates full stochastic + sampling. + generator (`torch.Generator`, *optional*): + A random number generator. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_tcd.TCDSchedulerOutput`] or `tuple`. + Returns: + [`~schedulers.scheduling_utils.TCDSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_tcd.TCDSchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + assert 0 <= eta <= 1.0, "gamma must be less than or equal to 1.0" + + # 1. get previous step value + prev_step_index = self.step_index + 1 + if prev_step_index < len(self.timesteps): + prev_timestep = self.timesteps[prev_step_index] + else: + prev_timestep = torch.tensor(0) + + timestep_s = torch.floor((1 - eta) * prev_timestep).to(dtype=torch.long) + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + beta_prod_t = 1 - alpha_prod_t + + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + + alpha_prod_s = self.alphas_cumprod[timestep_s] + beta_prod_s = 1 - alpha_prod_s + + # 3. Compute the predicted noised sample x_s based on the model parameterization + if self.config.prediction_type == "epsilon": # noise-prediction + pred_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt() + pred_epsilon = model_output + pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon + elif self.config.prediction_type == "sample": # x-prediction + pred_original_sample = model_output + pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) + pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon + elif self.config.prediction_type == "v_prediction": # v-prediction + pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output + pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample + pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or" + " `v_prediction` for `TCDScheduler`." + ) + + # 4. Sample and inject noise z ~ N(0, I) for MultiStep Inference + # Noise is not used on the final timestep of the timestep schedule. + # This also means that noise is not used for one-step sampling. + # Eta (referred to as "gamma" in the paper) was introduced to control the stochasticity in every step. + # When eta = 0, it represents deterministic sampling, whereas eta = 1 indicates full stochastic sampling. + if eta > 0: + if self.step_index != self.num_inference_steps - 1: + noise = randn_tensor( + model_output.shape, generator=generator, device=model_output.device, dtype=pred_noised_sample.dtype + ) + prev_sample = (alpha_prod_t_prev / alpha_prod_s).sqrt() * pred_noised_sample + ( + 1 - alpha_prod_t_prev / alpha_prod_s + ).sqrt() * noise + else: + prev_sample = pred_noised_sample + else: + prev_sample = pred_noised_sample + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample, pred_noised_sample) + + return TCDSchedulerOutput(prev_sample=prev_sample, pred_noised_sample=pred_noised_sample) + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity + def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: + """ + Compute the velocity prediction from the sample and noise according to the velocity formula. + + Args: + sample (`torch.Tensor`): + The input sample. + noise (`torch.Tensor`): + The noise tensor. + timesteps (`torch.IntTensor`): + The timesteps for velocity computation. + + Returns: + `torch.Tensor`: + The computed velocity. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as sample + self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) + timesteps = timesteps.to(sample.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(sample.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity + + def __len__(self): + return self.config.num_train_timesteps + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep + def previous_timestep(self, timestep): + """ + Compute the previous timestep in the diffusion chain. + + Args: + timestep (`int`): + The current timestep. + + Returns: + `int` or `torch.Tensor`: + The previous timestep. + """ + if self.custom_timesteps or self.num_inference_steps: + index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] + if index == self.timesteps.shape[0] - 1: + prev_t = torch.tensor(-1) + else: + prev_t = self.timesteps[index + 1] + else: + prev_t = timestep - 1 + return prev_t diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_unclip.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_unclip.py new file mode 100644 index 0000000000000000000000000000000000000000..26226b4eb35b42cffd32b3036176e45362b3b81b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_unclip.py @@ -0,0 +1,379 @@ +# Copyright 2025 Kakao Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from ..utils.torch_utils import randn_tensor +from .scheduling_utils import SchedulerMixin + + +@dataclass +# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->UnCLIP +class UnCLIPSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's `step` function output. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample `(x_{0})` based on the model output from the current timestep. + `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.Tensor + pred_original_sample: torch.Tensor | None = None + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class UnCLIPScheduler(SchedulerMixin, ConfigMixin): + """ + NOTE: do not use this scheduler. The DDPM scheduler has been updated to support the changes made here. This + scheduler will be removed and replaced with DDPM. + + This is a modified DDPM Scheduler specifically for the karlo unCLIP model. + + This scheduler has some minor variations in how it calculates the learned range variance and dynamically + re-calculates betas based off the timesteps it is skipping. + + The scheduler also uses a slightly different step ratio when computing timesteps to use for inference. + + See [`~DDPMScheduler`] for more information on DDPM scheduling + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + variance_type (`str`): + options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small_log` + or `learned_range`. + clip_sample (`bool`, default `True`): + option to clip predicted sample between `-clip_sample_range` and `clip_sample_range` for numerical + stability. + clip_sample_range (`float`, default `1.0`): + The range to clip the sample between. See `clip_sample`. + prediction_type (`str`, default `epsilon`, optional): + prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion process) + or `sample` (directly predicting the noisy sample`) + """ + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + variance_type: str = "fixed_small_log", + clip_sample: bool = True, + clip_sample_range: float = 1.0, + prediction_type: str = "epsilon", + beta_schedule: str = "squaredcos_cap_v2", + ): + if beta_schedule != "squaredcos_cap_v2": + raise ValueError("UnCLIPScheduler only supports `beta_schedule`: 'squaredcos_cap_v2'") + + self.betas = betas_for_alpha_bar(num_train_timesteps) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + self.one = torch.tensor(1.0) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy()) + + self.variance_type = variance_type + + def scale_model_input(self, sample: torch.Tensor, timestep: int | None = None) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): input sample + timestep (`int`, optional): current timestep + + Returns: + `torch.Tensor`: scaled input sample + """ + return sample + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None): + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Note that this scheduler uses a slightly different step ratio than the other diffusers schedulers. The + different step ratio is to mimic the original karlo implementation and does not affect the quality or accuracy + of the results. + + Args: + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + """ + self.num_inference_steps = num_inference_steps + step_ratio = (self.config.num_train_timesteps - 1) / (self.num_inference_steps - 1) + timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64) + self.timesteps = torch.from_numpy(timesteps).to(device) + + def _get_variance(self, t, prev_timestep=None, predicted_variance=None, variance_type=None): + if prev_timestep is None: + prev_timestep = t - 1 + + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + if prev_timestep == t - 1: + beta = self.betas[t] + else: + beta = 1 - alpha_prod_t / alpha_prod_t_prev + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = beta_prod_t_prev / beta_prod_t * beta + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probably added for training stability + if variance_type == "fixed_small_log": + variance = torch.log(torch.clamp(variance, min=1e-20)) + variance = torch.exp(0.5 * variance) + elif variance_type == "learned_range": + # NOTE difference with DDPM scheduler + min_log = variance.log() + max_log = beta.log() + + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + def step( + self, + model_output: torch.Tensor, + timestep: int, + sample: torch.Tensor, + prev_timestep: int | None = None, + generator=None, + return_dict: bool = True, + ) -> UnCLIPSchedulerOutput | tuple: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.Tensor`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + current instance of sample being created by diffusion process. + prev_timestep (`int`, *optional*): The previous timestep to predict the previous sample at. + Used to dynamically compute beta. If not given, `t-1` is used and the pre-computed beta is used. + generator: random number generator. + return_dict (`bool`): option for returning tuple rather than UnCLIPSchedulerOutput class + + Returns: + [`~schedulers.scheduling_utils.UnCLIPSchedulerOutput`] or `tuple`: + [`~schedulers.scheduling_utils.UnCLIPSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When + returning a tuple, the first element is the sample tensor. + + """ + t = timestep + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type == "learned_range": + model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + if prev_timestep is None: + prev_timestep = t - 1 + + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + if prev_timestep == t - 1: + beta = self.betas[t] + alpha = self.alphas[t] + else: + beta = 1 - alpha_prod_t / alpha_prod_t_prev + alpha = 1 - beta + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239 + if self.config.prediction_type == "epsilon": + pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) + elif self.config.prediction_type == "sample": + pred_original_sample = model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon` or `sample`" + " for the UnCLIPScheduler." + ) + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = torch.clamp( + pred_original_sample, -self.config.clip_sample_range, self.config.clip_sample_range + ) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * beta) / beta_prod_t + current_sample_coeff = alpha ** (0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://huggingface.co/papers/2006.11239 + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + variance = 0 + if t > 0: + variance_noise = randn_tensor( + model_output.shape, dtype=model_output.dtype, generator=generator, device=model_output.device + ) + + variance = self._get_variance( + t, + predicted_variance=predicted_variance, + prev_timestep=prev_timestep, + ) + + if self.variance_type == "fixed_small_log": + variance = variance + elif self.variance_type == "learned_range": + variance = (0.5 * variance).exp() + else: + raise ValueError( + f"variance_type given as {self.variance_type} must be one of `fixed_small_log` or `learned_range`" + " for the UnCLIPScheduler." + ) + + variance = variance * variance_noise + + pred_prev_sample = pred_prev_sample + variance + + if not return_dict: + return ( + pred_prev_sample, + pred_original_sample, + ) + + return UnCLIPSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample) + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise magnitude at each timestep (this is the forward + diffusion process). + + Args: + original_samples (`torch.Tensor`): + The original samples to which noise will be added. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps indicating the noise level for each sample. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement + # for the subsequent add_noise calls + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) + alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_unipc_multistep.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_unipc_multistep.py new file mode 100644 index 0000000000000000000000000000000000000000..71a5444491ed5d4f8f78e0a93d8189bfcb9abd7d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_unipc_multistep.py @@ -0,0 +1,1300 @@ +# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: check https://huggingface.co/papers/2302.04867 and https://github.com/wl-zhao/UniPC for more info +# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py + +import math +from typing import Literal + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import deprecate, is_scipy_available +from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput + + +if is_scipy_available(): + import scipy.stats + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps: int, + max_beta: float = 0.999, + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", +) -> torch.Tensor: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + Args: + num_diffusion_timesteps (`int`): + The number of betas to produce. + max_beta (`float`, defaults to `0.999`): + The maximum beta to use; use values lower than 1 to avoid numerical instability. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. + + Returns: + `torch.Tensor`: + The betas used by the scheduler to step the model outputs. + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "laplace": + + def alpha_bar_fn(t): + lmb = -0.5 * math.copysign(1, 0.5 - t) * math.log(1 - 2 * math.fabs(0.5 - t) + 1e-6) + snr = math.exp(lmb) + return math.sqrt(snr / (1 + snr)) + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: + """ + Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) + + Args: + betas (`torch.Tensor`): + The betas that the scheduler is being initialized with. + + Returns: + `torch.Tensor`: + Rescaled betas with zero terminal SNR. + """ + # Convert betas to alphas_bar_sqrt + alphas = 1.0 - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_bar_sqrt = alphas_cumprod.sqrt() + + # Store old values. + alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() + alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() + + # Shift so the last timestep is zero. + alphas_bar_sqrt -= alphas_bar_sqrt_T + + # Scale so the first timestep is back to the old value. + alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) + + # Convert alphas_bar_sqrt to betas + alphas_bar = alphas_bar_sqrt**2 # Revert sqrt + alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod + alphas = torch.cat([alphas_bar[0:1], alphas]) + betas = 1 - alphas + + return betas + + +class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): + """ + `UniPCMultistepScheduler` is a training-free framework designed for the fast sampling of diffusion models. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + solver_order (`int`, defaults to `2`): + The UniPC order which can be any positive integer. The effective order of accuracy is `solver_order + 1` + due to the UniC. It is recommended to use `solver_order=2` for guided sampling, and `solver_order=3` for + unconditional sampling. + prediction_type (`"epsilon"`, `"sample"`, `"v_prediction"`, or `"flow_prediction"`, defaults to `"epsilon"`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`), `v_prediction` (see section 2.4 of [Imagen + Video](https://huggingface.co/papers/2210.02303) paper), or `flow_prediction`. + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True` and `predict_x0=True`. + predict_x0 (`bool`, defaults to `True`): + Whether to use the updating algorithm on the predicted x0. + solver_type (`"bh1"` or `"bh2"`, defaults to `"bh2"`): + Solver type for UniPC. It is recommended to use `bh1` for unconditional sampling when steps < 10, and `bh2` + otherwise. + lower_order_final (`bool`, default `True`): + Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can + stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10. + disable_corrector (`list`, default `[]`): + Decides which step to disable the corrector to mitigate the misalignment between `epsilon_theta(x_t, c)` + and `epsilon_theta(x_t^c, c)` which can influence convergence for a large guidance scale. Corrector is + usually disabled during the first few steps. + solver_p (`SchedulerMixin`, default `None`): + Any other scheduler that if specified, the algorithm becomes `solver_p + UniC`. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + use_exponential_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process. + use_beta_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + use_flow_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use flow sigmas for step sizes in the noise schedule during the sampling process. + timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps, as required by some model families. + final_sigmas_type (`"zero"` or `"sigma_min"`, defaults to `"zero"`): + The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final + sigma is the same as the last sigma in the training schedule. If `zero`, the final sigma is set to 0. + rescale_betas_zero_snr (`bool`, defaults to `False`): + Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and + dark samples instead of limiting it to samples with medium brightness. Loosely related to + [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: np.ndarray | list[float] | None = None, + solver_order: int = 2, + prediction_type: Literal["epsilon", "sample", "v_prediction", "flow_prediction"] = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + predict_x0: bool = True, + solver_type: Literal["bh1", "bh2"] = "bh2", + lower_order_final: bool = True, + disable_corrector: list[int] = [], + solver_p: SchedulerMixin = None, + use_karras_sigmas: bool = False, + use_exponential_sigmas: bool = False, + use_beta_sigmas: bool = False, + use_flow_sigmas: bool = False, + flow_shift: float = 1.0, + timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace", + steps_offset: int = 0, + final_sigmas_type: Literal["zero", "sigma_min"] = "zero", + rescale_betas_zero_snr: bool = False, + use_dynamic_shifting: bool = False, + time_shift_type: Literal["exponential"] = "exponential", + sigma_min: float | None = None, + sigma_max: bool | None = None, + shift_terminal: bool | None = None, + ) -> None: + if self.config.use_beta_sigmas and not is_scipy_available(): + raise ImportError("Make sure to install scipy if you want to use beta sigmas.") + if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1: + raise ValueError( + "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used." + ) + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + if shift_terminal is not None and not use_flow_sigmas: + raise ValueError("`shift_terminal` is only supported when `use_flow_sigmas=True`.") + + if rescale_betas_zero_snr: + self.betas = rescale_zero_terminal_snr(self.betas) + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + + if rescale_betas_zero_snr: + # Close to 0 without being 0 so first sigma is not inf + # FP16 smallest positive subnormal works well here + self.alphas_cumprod[-1] = 2**-24 + + # Currently we only support VP-type noise schedule + self.alpha_t = torch.sqrt(self.alphas_cumprod) + self.sigma_t = torch.sqrt(1 - self.alphas_cumprod) + self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t) + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5 + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + if solver_type not in ["bh1", "bh2"]: + if solver_type in ["midpoint", "heun", "logrho"]: + self.register_to_config(solver_type="bh2") + else: + raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}") + + self.predict_x0 = predict_x0 + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps) + self.model_outputs = [None] * solver_order + self.timestep_list = [None] * solver_order + self.lower_order_nums = 0 + self.disable_corrector = disable_corrector + self.solver_p = solver_p + self.last_sample = None + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + @property + def step_index(self) -> int: + """ + The index counter for current timestep. It will increase 1 after each scheduler step. + """ + return self._step_index + + @property + def begin_index(self) -> int: + """ + The index for the first timestep. It should be set from pipeline with `set_begin_index` method. + """ + return self._begin_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index + def set_begin_index(self, begin_index: int = 0) -> None: + """ + Sets the begin index for the scheduler. This function should be run from pipeline before the inference. + + Args: + begin_index (`int`, defaults to `0`): + The begin index for the scheduler. + """ + self._begin_index = begin_index + + def set_timesteps( + self, + num_inference_steps: int | None = None, + device: str | torch.device = None, + sigmas: list[float] | None = None, + mu: bool | None = None, + ): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + sigmas (`List[float]`, *optional*): + Custom values for sigmas to be used for each diffusion step. If `None`, the sigmas are computed + automatically. + mu (`float`, *optional*): + Optional mu parameter for dynamic shifting when using exponential time shift type. + """ + if self.config.use_dynamic_shifting and mu is None: + raise ValueError("`mu` must be passed when `use_dynamic_shifting` is set to be `True`") + + if sigmas is not None: + if not self.config.use_flow_sigmas: + raise ValueError( + "Passing `sigmas` is only supported when `use_flow_sigmas=True`. " + "Please set `use_flow_sigmas=True` during scheduler initialization." + ) + num_inference_steps = len(sigmas) + + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1) + .round()[::-1][:-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // (num_inference_steps + 1) + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.arange(self.config.num_train_timesteps, 0, -step_ratio).round().copy().astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + if self.config.use_karras_sigmas: + if sigmas is None: + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + if self.config.use_flow_sigmas: + sigmas = sigmas / (sigmas + 1) + timesteps = (sigmas * self.config.num_train_timesteps).copy() + else: + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round() + + if self.config.final_sigmas_type == "sigma_min": + sigma_last = sigmas[-1] + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + elif self.config.use_exponential_sigmas: + if sigmas is None: + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + if self.config.final_sigmas_type == "sigma_min": + sigma_last = sigmas[-1] + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + elif self.config.use_beta_sigmas: + if sigmas is None: + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + log_sigmas = np.log(sigmas) + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]) + if self.config.final_sigmas_type == "sigma_min": + sigma_last = sigmas[-1] + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + elif self.config.use_flow_sigmas: + if sigmas is None: + sigmas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)[:-1] + if self.config.use_dynamic_shifting: + sigmas = self.time_shift(mu, 1.0, sigmas) + else: + sigmas = self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas) + if self.config.shift_terminal: + sigmas = self.stretch_shift_to_terminal(sigmas) + eps = 1e-6 + if np.fabs(sigmas[0] - 1) < eps: + # to avoid inf torch.log(alpha_si) in multistep_uni_p_bh_update during first/second update + sigmas[0] -= eps + timesteps = (sigmas * self.config.num_train_timesteps).copy() + if self.config.final_sigmas_type == "sigma_min": + sigma_last = sigmas[-1] + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + else: + if sigmas is None: + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + if self.config.final_sigmas_type == "sigma_min": + sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 + elif self.config.final_sigmas_type == "zero": + sigma_last = 0 + else: + raise ValueError( + f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}" + ) + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + + self.sigmas = torch.from_numpy(sigmas) + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.int64) + + self.num_inference_steps = len(timesteps) + + self.model_outputs = [ + None, + ] * self.config.solver_order + self.lower_order_nums = 0 + self.last_sample = None + if self.solver_p: + self.solver_p.set_timesteps(self.num_inference_steps, device=device) + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + self._begin_index = None + self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.time_shift + def time_shift(self, mu: float, sigma: float, t: torch.Tensor): + """ + Apply time shifting to the sigmas. + + Args: + mu (`float`): + The mu parameter for the time shift. + sigma (`float`): + The sigma parameter for the time shift. + t (`torch.Tensor`): + The input timesteps. + + Returns: + `torch.Tensor`: + The time-shifted timesteps. + """ + if self.config.time_shift_type == "exponential": + return self._time_shift_exponential(mu, sigma, t) + elif self.config.time_shift_type == "linear": + return self._time_shift_linear(mu, sigma, t) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.stretch_shift_to_terminal + def stretch_shift_to_terminal(self, t: torch.Tensor) -> torch.Tensor: + r""" + Stretches and shifts the timestep schedule to ensure it terminates at the configured `shift_terminal` config + value. + + Reference: + https://github.com/Lightricks/LTX-Video/blob/a01a171f8fe3d99dce2728d60a73fecf4d4238ae/ltx_video/schedulers/rf.py#L51 + + Args: + t (`torch.Tensor`): + A tensor of timesteps to be stretched and shifted. + + Returns: + `torch.Tensor`: + A tensor of adjusted timesteps such that the final value equals `self.config.shift_terminal`. + """ + one_minus_z = 1 - t + scale_factor = one_minus_z[-1] / (1 - self.config.shift_terminal) + stretched_t = 1 - (one_minus_z / scale_factor) + return stretched_t + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_exponential + def _time_shift_exponential(self, mu, sigma, t): + return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_linear + def _time_shift_linear(self, mu, sigma, t): + return mu / (mu + (1 / t - 1) ** sigma) + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: + """ + Apply dynamic thresholding to the predicted sample. + + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://huggingface.co/papers/2205.11487 + + Args: + sample (`torch.Tensor`): + The predicted sample to be thresholded. + + Returns: + `torch.Tensor`: + The thresholded sample. + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray: + """ + Convert sigma values to corresponding timestep values through interpolation. + + Args: + sigma (`np.ndarray`): + The sigma value(s) to convert to timestep(s). + log_sigmas (`np.ndarray`): + The logarithm of the sigma schedule used for interpolation. + + Returns: + `np.ndarray`: + The interpolated timestep value(s) corresponding to the input sigma(s). + """ + # get log sigma + log_sigma = np.log(np.maximum(sigma, 1e-10)) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t + def _sigma_to_alpha_sigma_t(self, sigma: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Convert sigma values to alpha_t and sigma_t values. + + Args: + sigma (`torch.Tensor`): + The sigma value(s) to convert. + + Returns: + `tuple[torch.Tensor, torch.Tensor]`: + A tuple containing (alpha_t, sigma_t) values. + """ + if self.config.use_flow_sigmas: + alpha_t = 1 - sigma + sigma_t = sigma + else: + alpha_t = 1 / ((sigma**2 + 1) ** 0.5) + sigma_t = sigma * alpha_t + + return alpha_t, sigma_t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative + Models](https://huggingface.co/papers/2206.00364). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following the Karras noise schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential + def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor: + """ + Construct an exponential noise schedule. + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + + Returns: + `torch.Tensor`: + The converted sigma values following an exponential schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps)) + return sigmas + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta( + self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6 + ) -> torch.Tensor: + """ + Construct a beta noise schedule as proposed in [Beta Sampling is All You + Need](https://huggingface.co/papers/2407.12173). + + Args: + in_sigmas (`torch.Tensor`): + The input sigma values to be converted. + num_inference_steps (`int`): + The number of inference steps to generate the noise schedule for. + alpha (`float`, *optional*, defaults to `0.6`): + The alpha parameter for the beta distribution. + beta (`float`, *optional*, defaults to `0.6`): + The beta parameter for the beta distribution. + + Returns: + `torch.Tensor`: + The converted sigma values following a beta distribution schedule. + """ + + # Hack to make sure that other schedulers which copy this function don't break + # TODO: Add this logic to the other schedulers + if hasattr(self.config, "sigma_min"): + sigma_min = self.config.sigma_min + else: + sigma_min = None + + if hasattr(self.config, "sigma_max"): + sigma_max = self.config.sigma_max + else: + sigma_max = None + + sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item() + sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item() + + sigmas = np.array( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps) + ] + ] + ) + return sigmas + + def convert_model_output( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor = None, + **kwargs, + ) -> torch.Tensor: + r""" + Convert the model output to the corresponding type the UniPC algorithm needs. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.Tensor`: + The converted model output. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + + if self.predict_x0: + if self.config.prediction_type == "epsilon": + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + x0_pred = alpha_t * sample - sigma_t * model_output + elif self.config.prediction_type == "flow_prediction": + sigma_t = self.sigmas[self.step_index] + x0_pred = sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, " + "`v_prediction`, or `flow_prediction` for the UniPCMultistepScheduler." + ) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + return x0_pred + else: + if self.config.prediction_type == "epsilon": + return model_output + elif self.config.prediction_type == "sample": + epsilon = (sample - alpha_t * model_output) / sigma_t + return epsilon + elif self.config.prediction_type == "v_prediction": + epsilon = alpha_t * model_output + sigma_t * sample + return epsilon + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction` for the UniPCMultistepScheduler." + ) + + def multistep_uni_p_bh_update( + self, + model_output: torch.Tensor, + *args, + sample: torch.Tensor = None, + order: int = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the UniP (B(h) version). Alternatively, `self.solver_p` is used if is specified. + + Args: + model_output (`torch.Tensor`): + The direct output from the learned diffusion model at the current timestep. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + order (`int`): + The order of UniP at this timestep (corresponds to the *p* in UniPC-p). + + Returns: + `torch.Tensor`: + The sample tensor at the previous timestep. + """ + prev_timestep = args[0] if len(args) > 0 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyword argument") + if order is None: + if len(args) > 2: + order = args[2] + else: + raise ValueError("missing `order` as a required keyword argument") + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + model_output_list = self.model_outputs + + s0 = self.timestep_list[-1] + m0 = model_output_list[-1] + x = sample + + if self.solver_p: + x_t = self.solver_p.step(model_output, s0, x).prev_sample + return x_t + + sigma_t, sigma_s0 = self.sigmas[self.step_index + 1], self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + + h = lambda_t - lambda_s0 + device = sample.device + + rks = [] + D1s = [] + for i in range(1, order): + si = self.step_index - i + mi = model_output_list[-(i + 1)] + alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) + lambda_si = torch.log(alpha_si) - torch.log(sigma_si) + rk = (lambda_si - lambda_s0) / h + rks.append(rk) + D1s.append((mi - m0) / rk) + + rks.append(1.0) + rks = torch.tensor(rks, device=device) + + R = [] + b = [] + + hh = -h if self.predict_x0 else h + h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 + h_phi_k = h_phi_1 / hh - 1 + + factorial_i = 1 + + if self.config.solver_type == "bh1": + B_h = hh + elif self.config.solver_type == "bh2": + B_h = torch.expm1(hh) + else: + raise NotImplementedError() + + for i in range(1, order + 1): + R.append(torch.pow(rks, i - 1)) + b.append(h_phi_k * factorial_i / B_h) + factorial_i *= i + 1 + h_phi_k = h_phi_k / hh - 1 / factorial_i + + R = torch.stack(R) + b = torch.tensor(b, device=device) + + if len(D1s) > 0: + D1s = torch.stack(D1s, dim=1) # (B, K) + # for order 2, we use a simplified version + if order == 2: + rhos_p = torch.tensor([0.5], dtype=x.dtype, device=device) + else: + rhos_p = torch.linalg.solve(R[:-1, :-1], b[:-1]).to(device).to(x.dtype) + else: + D1s = None + + if self.predict_x0: + x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0 + if D1s is not None: + pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) + else: + pred_res = 0 + x_t = x_t_ - alpha_t * B_h * pred_res + else: + x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0 + if D1s is not None: + pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) + else: + pred_res = 0 + x_t = x_t_ - sigma_t * B_h * pred_res + + x_t = x_t.to(x.dtype) + return x_t + + def multistep_uni_c_bh_update( + self, + this_model_output: torch.Tensor, + *args, + last_sample: torch.Tensor = None, + this_sample: torch.Tensor = None, + order: int = None, + **kwargs, + ) -> torch.Tensor: + """ + One step for the UniC (B(h) version). + + Args: + this_model_output (`torch.Tensor`): + The model outputs at `x_t`. + this_timestep (`int`): + The current timestep `t`. + last_sample (`torch.Tensor`): + The generated sample before the last predictor `x_{t-1}`. + this_sample (`torch.Tensor`): + The generated sample after the last predictor `x_{t}`. + order (`int`): + The `p` of UniC-p at this step. The effective order of accuracy should be `order + 1`. + + Returns: + `torch.Tensor`: + The corrected sample tensor at the current timestep. + """ + this_timestep = args[0] if len(args) > 0 else kwargs.pop("this_timestep", None) + if last_sample is None: + if len(args) > 1: + last_sample = args[1] + else: + raise ValueError("missing `last_sample` as a required keyword argument") + if this_sample is None: + if len(args) > 2: + this_sample = args[2] + else: + raise ValueError("missing `this_sample` as a required keyword argument") + if order is None: + if len(args) > 3: + order = args[3] + else: + raise ValueError("missing `order` as a required keyword argument") + if this_timestep is not None: + deprecate( + "this_timestep", + "1.0.0", + "Passing `this_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + model_output_list = self.model_outputs + + m0 = model_output_list[-1] + x = last_sample + x_t = this_sample + model_t = this_model_output + + sigma_t, sigma_s0 = self.sigmas[self.step_index], self.sigmas[self.step_index - 1] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + + h = lambda_t - lambda_s0 + device = this_sample.device + + rks = [] + D1s = [] + for i in range(1, order): + si = self.step_index - (i + 1) + mi = model_output_list[-(i + 1)] + alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) + lambda_si = torch.log(alpha_si) - torch.log(sigma_si) + rk = (lambda_si - lambda_s0) / h + rks.append(rk) + D1s.append((mi - m0) / rk) + + rks.append(1.0) + rks = torch.tensor(rks, device=device) + + R = [] + b = [] + + hh = -h if self.predict_x0 else h + h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 + h_phi_k = h_phi_1 / hh - 1 + + factorial_i = 1 + + if self.config.solver_type == "bh1": + B_h = hh + elif self.config.solver_type == "bh2": + B_h = torch.expm1(hh) + else: + raise NotImplementedError() + + for i in range(1, order + 1): + R.append(torch.pow(rks, i - 1)) + b.append(h_phi_k * factorial_i / B_h) + factorial_i *= i + 1 + h_phi_k = h_phi_k / hh - 1 / factorial_i + + R = torch.stack(R) + b = torch.tensor(b, device=device) + + if len(D1s) > 0: + D1s = torch.stack(D1s, dim=1) + else: + D1s = None + + # for order 1, we use a simplified version + if order == 1: + rhos_c = torch.tensor([0.5], dtype=x.dtype, device=device) + else: + rhos_c = torch.linalg.solve(R, b).to(device).to(x.dtype) + + if self.predict_x0: + x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0 + if D1s is not None: + corr_res = torch.einsum("k,bkc...->bc...", rhos_c[:-1], D1s) + else: + corr_res = 0 + D1_t = model_t - m0 + x_t = x_t_ - alpha_t * B_h * (corr_res + rhos_c[-1] * D1_t) + else: + x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0 + if D1s is not None: + corr_res = torch.einsum("k,bkc...->bc...", rhos_c[:-1], D1s) + else: + corr_res = 0 + D1_t = model_t - m0 + x_t = x_t_ - sigma_t * B_h * (corr_res + rhos_c[-1] * D1_t) + x_t = x_t.to(x.dtype) + return x_t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.index_for_timestep + def index_for_timestep( + self, + timestep: int | torch.Tensor, + schedule_timesteps: torch.Tensor | None = None, + ) -> int: + """ + Find the index for a given timestep in the schedule. + + Args: + timestep (`int` or `torch.Tensor`): + The timestep for which to find the index. + schedule_timesteps (`torch.Tensor`, *optional*): + The timestep schedule to search in. If `None`, uses `self.timesteps`. + + Returns: + `int`: + The index of the timestep in the schedule. + """ + if schedule_timesteps is None: + schedule_timesteps = self.timesteps + + index_candidates = (schedule_timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + return step_index + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._init_step_index + def _init_step_index(self, timestep: int | torch.Tensor) -> None: + """ + Initialize the step_index counter for the scheduler. + + Args: + timestep (`int` or `torch.Tensor`): + The current timestep for which to initialize the step index. + """ + + if self.begin_index is None: + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + self._step_index = self.index_for_timestep(timestep) + else: + self._step_index = self._begin_index + + def step( + self, + model_output: torch.Tensor, + timestep: int | torch.Tensor, + sample: torch.Tensor, + return_dict: bool = True, + ) -> SchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the multistep UniPC. + + Args: + model_output (`torch.Tensor`): + The direct output from learned diffusion model. + timestep (`int` or `torch.Tensor`): + The current discrete timestep in the diffusion chain. + sample (`torch.Tensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + use_corrector = ( + self.step_index > 0 and self.step_index - 1 not in self.disable_corrector and self.last_sample is not None + ) + + model_output_convert = self.convert_model_output(model_output, sample=sample) + if use_corrector: + sample = self.multistep_uni_c_bh_update( + this_model_output=model_output_convert, + last_sample=self.last_sample, + this_sample=sample, + order=self.this_order, + ) + + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.timestep_list[i] = self.timestep_list[i + 1] + + self.model_outputs[-1] = model_output_convert + self.timestep_list[-1] = timestep + + if self.config.lower_order_final: + this_order = min(self.config.solver_order, len(self.timesteps) - self.step_index) + else: + this_order = self.config.solver_order + + self.this_order = min(this_order, self.lower_order_nums + 1) # warmup for multistep + assert self.this_order > 0 + + self.last_sample = sample + prev_sample = self.multistep_uni_p_bh_update( + model_output=model_output, # pass the original non-converted model output, in case solver-p is used + sample=sample, + order=self.this_order, + ) + + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.Tensor`): + The input sample. + + Returns: + `torch.Tensor`: + A scaled input sample. + """ + return sample + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.add_noise + def add_noise( + self, + original_samples: torch.Tensor, + noise: torch.Tensor, + timesteps: torch.IntTensor, + ) -> torch.Tensor: + """ + Add noise to the original samples according to the noise schedule at the specified timesteps. + + Args: + original_samples (`torch.Tensor`): + The original samples without noise. + noise (`torch.Tensor`): + The noise to add to the samples. + timesteps (`torch.IntTensor`): + The timesteps at which to add noise to the samples. + + Returns: + `torch.Tensor`: + The noisy samples. + """ + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + # begin_index is None when the scheduler is used for training or pipeline does not implement set_begin_index + if self.begin_index is None: + step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps] + elif self.step_index is not None: + # add_noise is called after first denoising step (for inpainting) + step_indices = [self.step_index] * timesteps.shape[0] + else: + # add noise is called before first denoising step to create initial latent(img2img) + step_indices = [self.begin_index] * timesteps.shape[0] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + noisy_samples = alpha_t * original_samples + sigma_t * noise + return noisy_samples + + def __len__(self) -> int: + return self.config.num_train_timesteps diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0d48d791ac42a86a18537f226f7325db9d602c05 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_utils.py @@ -0,0 +1,189 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import os +from dataclasses import dataclass +from enum import Enum + +import torch +from huggingface_hub.utils import validate_hf_hub_args +from typing_extensions import Self + +from ..utils import BaseOutput, PushToHubMixin + + +SCHEDULER_CONFIG_NAME = "scheduler_config.json" + + +# NOTE: We make this type an enum because it simplifies usage in docs and prevents +# circular imports when used for `_compatibles` within the schedulers module. +# When it's used as a type in pipelines, it really is a Union because the actual +# scheduler instance is passed in. +class KarrasDiffusionSchedulers(Enum): + DDIMScheduler = 1 + DDPMScheduler = 2 + PNDMScheduler = 3 + LMSDiscreteScheduler = 4 + EulerDiscreteScheduler = 5 + HeunDiscreteScheduler = 6 + EulerAncestralDiscreteScheduler = 7 + DPMSolverMultistepScheduler = 8 + DPMSolverSinglestepScheduler = 9 + KDPM2DiscreteScheduler = 10 + KDPM2AncestralDiscreteScheduler = 11 + DEISMultistepScheduler = 12 + UniPCMultistepScheduler = 13 + DPMSolverSDEScheduler = 14 + EDMEulerScheduler = 15 + + +AysSchedules = { + "StableDiffusionTimesteps": [999, 850, 736, 645, 545, 455, 343, 233, 124, 24], + "StableDiffusionSigmas": [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.0], + "StableDiffusionXLTimesteps": [999, 845, 730, 587, 443, 310, 193, 116, 53, 13], + "StableDiffusionXLSigmas": [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.0], + "StableDiffusionVideoSigmas": [700.00, 54.5, 15.886, 7.977, 4.248, 1.789, 0.981, 0.403, 0.173, 0.034, 0.0], +} + + +@dataclass +class SchedulerOutput(BaseOutput): + """ + Base class for the output of a scheduler's `step` function. + + Args: + prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.Tensor + + +class SchedulerMixin(PushToHubMixin): + """ + Base class for all schedulers. + + [`SchedulerMixin`] contains common functions shared by all schedulers such as general loading and saving + functionalities. + + [`ConfigMixin`] takes care of storing the configuration attributes (like `num_train_timesteps`) that are passed to + the scheduler's `__init__` function, and the attributes can be accessed by `scheduler.config.num_train_timesteps`. + + Class attributes: + - **_compatibles** (`list[str]`) -- A list of scheduler classes that are compatible with the parent scheduler + class. Use [`~ConfigMixin.from_config`] to load a different compatible scheduler class (should be overridden + by parent class). + """ + + config_name = SCHEDULER_CONFIG_NAME + _compatibles = [] + has_compatibles = True + + @classmethod + @validate_hf_hub_args + def from_pretrained( + cls, + pretrained_model_name_or_path: str | os.PathLike | None = None, + subfolder: str | None = None, + return_unused_kwargs=False, + **kwargs, + ) -> Self: + r""" + Instantiate a scheduler from a pre-defined JSON configuration file in a local directory or Hub repository. + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on + the Hub. + - A path to a *directory* (for example `./my_model_directory`) containing the scheduler + configuration saved with [`~SchedulerMixin.save_pretrained`]. + subfolder (`str`, *optional*): + The subfolder location of a model file within a larger model repository on the Hub or locally. + return_unused_kwargs (`bool`, *optional*, defaults to `False`): + Whether kwargs that are not consumed by the Python class should be returned or not. + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory where a downloaded pretrained model configuration is cached if the standard cache + is not used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether to only load local model weights and configuration files or not. If set to `True`, the model + won't be downloaded from the Hub. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from + `diffusers-cli login` (stored in `~/.huggingface`) is used. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier + allowed by Git. + + > [!TIP] > To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in + with `hf > auth login`. You can also activate the special > + ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use this method in a > + firewalled environment. + + """ + config, kwargs, commit_hash = cls.load_config( + pretrained_model_name_or_path=pretrained_model_name_or_path, + subfolder=subfolder, + return_unused_kwargs=True, + return_commit_hash=True, + **kwargs, + ) + return cls.from_config(config, return_unused_kwargs=return_unused_kwargs, **kwargs) + + def save_pretrained(self, save_directory: str | os.PathLike, push_to_hub: bool = False, **kwargs): + """ + Save a scheduler configuration object to a directory so that it can be reloaded using the + [`~SchedulerMixin.from_pretrained`] class method. + + Args: + save_directory (`str` or `os.PathLike`): + Directory where the configuration JSON file will be saved (will be created if it does not exist). + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the + repository you want to push to with `repo_id` (will default to the name of `save_directory` in your + namespace). + kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + self.save_config(save_directory=save_directory, push_to_hub=push_to_hub, **kwargs) + + @property + def compatibles(self): + """ + Returns all schedulers that are compatible with this scheduler + + Returns: + `list[SchedulerMixin]`: list of compatible schedulers + """ + return self._get_compatibles() + + @classmethod + def _get_compatibles(cls): + compatible_classes_str = list(set([cls.__name__] + cls._compatibles)) + diffusers_library = importlib.import_module(__name__.split(".")[0]) + compatible_classes = [ + getattr(diffusers_library, c) for c in compatible_classes_str if hasattr(diffusers_library, c) + ] + return compatible_classes diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_utils_flax.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_utils_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..44de56a3980daa3f5348eac2ee947c7bce6bfead --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_utils_flax.py @@ -0,0 +1,289 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import math +import os +from dataclasses import dataclass +from enum import Enum + +import flax +import jax.numpy as jnp +from huggingface_hub.utils import validate_hf_hub_args + +from ..utils import BaseOutput, PushToHubMixin, logging + + +logger = logging.get_logger(__name__) + +SCHEDULER_CONFIG_NAME = "scheduler_config.json" + + +# NOTE: We make this type an enum because it simplifies usage in docs and prevents +# circular imports when used for `_compatibles` within the schedulers module. +# When it's used as a type in pipelines, it really is a Union because the actual +# scheduler instance is passed in. +class FlaxKarrasDiffusionSchedulers(Enum): + FlaxDDIMScheduler = 1 + FlaxDDPMScheduler = 2 + FlaxPNDMScheduler = 3 + FlaxLMSDiscreteScheduler = 4 + FlaxDPMSolverMultistepScheduler = 5 + FlaxEulerDiscreteScheduler = 6 + + +@dataclass +class FlaxSchedulerOutput(BaseOutput): + """ + Base class for the scheduler's step function output. + + Args: + prev_sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: jnp.ndarray + + +class FlaxSchedulerMixin(PushToHubMixin): + """ + Mixin containing common functions for the schedulers. + + Class attributes: + - **_compatibles** (`list[str]`) -- A list of classes that are compatible with the parent class, so that + `from_config` can be used from a class different than the one used to save the config (should be overridden + by parent class). + """ + + config_name = SCHEDULER_CONFIG_NAME + ignore_for_config = ["dtype"] + _compatibles = [] + has_compatibles = True + + @classmethod + @validate_hf_hub_args + def from_pretrained( + cls, + pretrained_model_name_or_path: str | os.PathLike | None = None, + subfolder: str | None = None, + return_unused_kwargs=False, + **kwargs, + ): + r""" + Instantiate a Scheduler class from a pre-defined JSON-file. + + Parameters: + pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*): + Can be either: + + - A string, the *model id* of a model repo on huggingface.co. Valid model ids should have an + organization name, like `google/ddpm-celebahq-256`. + - A path to a *directory* containing model weights saved using [`~SchedulerMixin.save_pretrained`], + e.g., `./my_model_directory/`. + subfolder (`str`, *optional*): + In case the relevant files are located inside a subfolder of the model repo (either remote in + huggingface.co or downloaded locally), you can specify the folder name here. + return_unused_kwargs (`bool`, *optional*, defaults to `False`): + Whether kwargs that are not consumed by the Python class should be returned or not. + + cache_dir (`str | os.PathLike`, *optional*): + Path to a directory in which a downloaded pretrained model configuration should be cached if the + standard cache should not be used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force the (re-)download of the model weights and configuration files, overriding the + cached versions if they exist. + + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + output_loading_info(`bool`, *optional*, defaults to `False`): + Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether or not to only look at local files (i.e., do not try to download the model). + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `transformers-cli login` (stored in `~/.huggingface`). + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a + git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any + identifier allowed by git. + + > [!TIP] > It is required to be logged in (`hf auth login`) when you want to use private or [gated > + models](https://huggingface.co/docs/hub/models-gated#gated-models). + + > [!TIP] > Activate the special + ["offline-mode"](https://huggingface.co/transformers/installation.html#offline-mode) to > use this method in a + firewalled environment. + + """ + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) + config, kwargs = cls.load_config( + pretrained_model_name_or_path=pretrained_model_name_or_path, + subfolder=subfolder, + return_unused_kwargs=True, + **kwargs, + ) + scheduler, unused_kwargs = cls.from_config(config, return_unused_kwargs=True, **kwargs) + + if hasattr(scheduler, "create_state") and getattr(scheduler, "has_state", False): + state = scheduler.create_state() + + if return_unused_kwargs: + return scheduler, state, unused_kwargs + + return scheduler, state + + def save_pretrained(self, save_directory: str | os.PathLike, push_to_hub: bool = False, **kwargs): + """ + Save a scheduler configuration object to the directory `save_directory`, so that it can be re-loaded using the + [`~FlaxSchedulerMixin.from_pretrained`] class method. + + Args: + save_directory (`str` or `os.PathLike`): + Directory where the configuration JSON file will be saved (will be created if it does not exist). + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the + repository you want to push to with `repo_id` (will default to the name of `save_directory` in your + namespace). + kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + self.save_config(save_directory=save_directory, push_to_hub=push_to_hub, **kwargs) + + @property + def compatibles(self): + """ + Returns all schedulers that are compatible with this scheduler + + Returns: + `list[SchedulerMixin]`: list of compatible schedulers + """ + return self._get_compatibles() + + @classmethod + def _get_compatibles(cls): + compatible_classes_str = list(set([cls.__name__] + cls._compatibles)) + diffusers_library = importlib.import_module(__name__.split(".")[0]) + compatible_classes = [ + getattr(diffusers_library, c) for c in compatible_classes_str if hasattr(diffusers_library, c) + ] + return compatible_classes + + +def broadcast_to_shape_from_left(x: jnp.ndarray, shape: tuple[int]) -> jnp.ndarray: + assert len(shape) >= x.ndim + return jnp.broadcast_to(x.reshape(x.shape + (1,) * (len(shape) - x.ndim)), shape) + + +def betas_for_alpha_bar(num_diffusion_timesteps: int, max_beta=0.999, dtype=jnp.float32) -> jnp.ndarray: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + + Args: + num_diffusion_timesteps (`int`): the number of betas to produce. + max_beta (`float`): the maximum beta to use; use values lower than 1 to + prevent singularities. + + Returns: + betas (`jnp.ndarray`): the betas used by the scheduler to step the model outputs + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return jnp.array(betas, dtype=dtype) + + +@flax.struct.dataclass +class CommonSchedulerState: + alphas: jnp.ndarray + betas: jnp.ndarray + alphas_cumprod: jnp.ndarray + + @classmethod + def create(cls, scheduler): + config = scheduler.config + + if config.trained_betas is not None: + betas = jnp.asarray(config.trained_betas, dtype=scheduler.dtype) + elif config.beta_schedule == "linear": + betas = jnp.linspace(config.beta_start, config.beta_end, config.num_train_timesteps, dtype=scheduler.dtype) + elif config.beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + betas = ( + jnp.linspace( + config.beta_start**0.5, config.beta_end**0.5, config.num_train_timesteps, dtype=scheduler.dtype + ) + ** 2 + ) + elif config.beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + betas = betas_for_alpha_bar(config.num_train_timesteps, dtype=scheduler.dtype) + else: + raise NotImplementedError( + f"beta_schedule {config.beta_schedule} is not implemented for scheduler {scheduler.__class__.__name__}" + ) + + alphas = 1.0 - betas + + alphas_cumprod = jnp.cumprod(alphas, axis=0) + + return cls( + alphas=alphas, + betas=betas, + alphas_cumprod=alphas_cumprod, + ) + + +def get_sqrt_alpha_prod( + state: CommonSchedulerState, original_samples: jnp.ndarray, noise: jnp.ndarray, timesteps: jnp.ndarray +): + alphas_cumprod = state.alphas_cumprod + + sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + sqrt_alpha_prod = broadcast_to_shape_from_left(sqrt_alpha_prod, original_samples.shape) + + sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + sqrt_one_minus_alpha_prod = broadcast_to_shape_from_left(sqrt_one_minus_alpha_prod, original_samples.shape) + + return sqrt_alpha_prod, sqrt_one_minus_alpha_prod + + +def add_noise_common( + state: CommonSchedulerState, original_samples: jnp.ndarray, noise: jnp.ndarray, timesteps: jnp.ndarray +): + sqrt_alpha_prod, sqrt_one_minus_alpha_prod = get_sqrt_alpha_prod(state, original_samples, noise, timesteps) + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + +def get_velocity_common(state: CommonSchedulerState, sample: jnp.ndarray, noise: jnp.ndarray, timesteps: jnp.ndarray): + sqrt_alpha_prod, sqrt_one_minus_alpha_prod = get_sqrt_alpha_prod(state, sample, noise, timesteps) + velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample + return velocity diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_vq_diffusion.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_vq_diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..7ba3a8e6f5c4e3f503de3928dfc4d12d2b866b1b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/schedulers/scheduling_vq_diffusion.py @@ -0,0 +1,466 @@ +# Copyright 2025 Microsoft and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +import numpy as np +import torch +import torch.nn.functional as F + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from .scheduling_utils import SchedulerMixin + + +@dataclass +class VQDiffusionSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's step function output. + + Args: + prev_sample (`torch.LongTensor` of shape `(batch size, num latent pixels)`): + Computed sample x_{t-1} of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.LongTensor + + +def index_to_log_onehot(x: torch.LongTensor, num_classes: int) -> torch.Tensor: + """ + Convert batch of vector of class indices into batch of log onehot vectors + + Args: + x (`torch.LongTensor` of shape `(batch size, vector length)`): + Batch of class indices + + num_classes (`int`): + number of classes to be used for the onehot vectors + + Returns: + `torch.Tensor` of shape `(batch size, num classes, vector length)`: + Log onehot vectors + """ + x_onehot = F.one_hot(x, num_classes) + x_onehot = x_onehot.permute(0, 2, 1) + log_x = torch.log(x_onehot.float().clamp(min=1e-30)) + return log_x + + +def gumbel_noised(logits: torch.Tensor, generator: torch.Generator | None) -> torch.Tensor: + """ + Apply gumbel noise to `logits` + """ + uniform = torch.rand(logits.shape, device=logits.device, generator=generator) + gumbel_noise = -torch.log(-torch.log(uniform + 1e-30) + 1e-30) + noised = gumbel_noise + logits + return noised + + +def alpha_schedules(num_diffusion_timesteps: int, alpha_cum_start=0.99999, alpha_cum_end=0.000009): + """ + Cumulative and non-cumulative alpha schedules. + + See section 4.1. + """ + att = ( + np.arange(0, num_diffusion_timesteps) / (num_diffusion_timesteps - 1) * (alpha_cum_end - alpha_cum_start) + + alpha_cum_start + ) + att = np.concatenate(([1], att)) + at = att[1:] / att[:-1] + att = np.concatenate((att[1:], [1])) + return at, att + + +def gamma_schedules(num_diffusion_timesteps: int, gamma_cum_start=0.000009, gamma_cum_end=0.99999): + """ + Cumulative and non-cumulative gamma schedules. + + See section 4.1. + """ + ctt = ( + np.arange(0, num_diffusion_timesteps) / (num_diffusion_timesteps - 1) * (gamma_cum_end - gamma_cum_start) + + gamma_cum_start + ) + ctt = np.concatenate(([0], ctt)) + one_minus_ctt = 1 - ctt + one_minus_ct = one_minus_ctt[1:] / one_minus_ctt[:-1] + ct = 1 - one_minus_ct + ctt = np.concatenate((ctt[1:], [0])) + return ct, ctt + + +class VQDiffusionScheduler(SchedulerMixin, ConfigMixin): + """ + A scheduler for vector quantized diffusion. + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_vec_classes (`int`): + The number of classes of the vector embeddings of the latent pixels. Includes the class for the masked + latent pixel. + num_train_timesteps (`int`, defaults to 100): + The number of diffusion steps to train the model. + alpha_cum_start (`float`, defaults to 0.99999): + The starting cumulative alpha value. + alpha_cum_end (`float`, defaults to 0.00009): + The ending cumulative alpha value. + gamma_cum_start (`float`, defaults to 0.00009): + The starting cumulative gamma value. + gamma_cum_end (`float`, defaults to 0.99999): + The ending cumulative gamma value. + """ + + order = 1 + + @register_to_config + def __init__( + self, + num_vec_classes: int, + num_train_timesteps: int = 100, + alpha_cum_start: float = 0.99999, + alpha_cum_end: float = 0.000009, + gamma_cum_start: float = 0.000009, + gamma_cum_end: float = 0.99999, + ): + self.num_embed = num_vec_classes + + # By convention, the index for the mask class is the last class index + self.mask_class = self.num_embed - 1 + + at, att = alpha_schedules(num_train_timesteps, alpha_cum_start=alpha_cum_start, alpha_cum_end=alpha_cum_end) + ct, ctt = gamma_schedules(num_train_timesteps, gamma_cum_start=gamma_cum_start, gamma_cum_end=gamma_cum_end) + + num_non_mask_classes = self.num_embed - 1 + bt = (1 - at - ct) / num_non_mask_classes + btt = (1 - att - ctt) / num_non_mask_classes + + at = torch.tensor(at.astype("float64")) + bt = torch.tensor(bt.astype("float64")) + ct = torch.tensor(ct.astype("float64")) + log_at = torch.log(at) + log_bt = torch.log(bt) + log_ct = torch.log(ct) + + att = torch.tensor(att.astype("float64")) + btt = torch.tensor(btt.astype("float64")) + ctt = torch.tensor(ctt.astype("float64")) + log_cumprod_at = torch.log(att) + log_cumprod_bt = torch.log(btt) + log_cumprod_ct = torch.log(ctt) + + self.log_at = log_at.float() + self.log_bt = log_bt.float() + self.log_ct = log_ct.float() + self.log_cumprod_at = log_cumprod_at.float() + self.log_cumprod_bt = log_cumprod_bt.float() + self.log_cumprod_ct = log_cumprod_ct.float() + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy()) + + def set_timesteps(self, num_inference_steps: int, device: str | torch.device = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps and diffusion process parameters (alpha, beta, gamma) should be moved + to. + """ + self.num_inference_steps = num_inference_steps + timesteps = np.arange(0, self.num_inference_steps)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps).to(device) + + self.log_at = self.log_at.to(device) + self.log_bt = self.log_bt.to(device) + self.log_ct = self.log_ct.to(device) + self.log_cumprod_at = self.log_cumprod_at.to(device) + self.log_cumprod_bt = self.log_cumprod_bt.to(device) + self.log_cumprod_ct = self.log_cumprod_ct.to(device) + + def step( + self, + model_output: torch.Tensor, + timestep: torch.long, + sample: torch.LongTensor, + generator: torch.Generator | None = None, + return_dict: bool = True, + ) -> VQDiffusionSchedulerOutput | tuple: + """ + Predict the sample from the previous timestep by the reverse transition distribution. See + [`~VQDiffusionScheduler.q_posterior`] for more details about how the distribution is computer. + + Args: + log_p_x_0: (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`): + The log probabilities for the predicted classes of the initial latent pixels. Does not include a + prediction for the masked class as the initial unnoised image cannot be masked. + t (`torch.long`): + The timestep that determines which transition matrices are used. + x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`): + The classes of each latent pixel at time `t`. + generator (`torch.Generator`, or `None`): + A random number generator for the noise applied to `p(x_{t-1} | x_t)` before it is sampled from. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or + `tuple`. + + Returns: + [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] is + returned, otherwise a tuple is returned where the first element is the sample tensor. + """ + if timestep == 0: + log_p_x_t_min_1 = model_output + else: + log_p_x_t_min_1 = self.q_posterior(model_output, sample, timestep) + + log_p_x_t_min_1 = gumbel_noised(log_p_x_t_min_1, generator) + + x_t_min_1 = log_p_x_t_min_1.argmax(dim=1) + + if not return_dict: + return (x_t_min_1,) + + return VQDiffusionSchedulerOutput(prev_sample=x_t_min_1) + + def q_posterior(self, log_p_x_0, x_t, t): + """ + Calculates the log probabilities for the predicted classes of the image at timestep `t-1`: + + ``` + p(x_{t-1} | x_t) = sum( q(x_t | x_{t-1}) * q(x_{t-1} | x_0) * p(x_0) / q(x_t | x_0) ) + ``` + + Args: + log_p_x_0 (`torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`): + The log probabilities for the predicted classes of the initial latent pixels. Does not include a + prediction for the masked class as the initial unnoised image cannot be masked. + x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`): + The classes of each latent pixel at time `t`. + t (`torch.Long`): + The timestep that determines which transition matrix is used. + + Returns: + `torch.Tensor` of shape `(batch size, num classes, num latent pixels)`: + The log probabilities for the predicted classes of the image at timestep `t-1`. + """ + log_onehot_x_t = index_to_log_onehot(x_t, self.num_embed) + + log_q_x_t_given_x_0 = self.log_Q_t_transitioning_to_known_class( + t=t, x_t=x_t, log_onehot_x_t=log_onehot_x_t, cumulative=True + ) + + log_q_t_given_x_t_min_1 = self.log_Q_t_transitioning_to_known_class( + t=t, x_t=x_t, log_onehot_x_t=log_onehot_x_t, cumulative=False + ) + + # p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) ... p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) + # . . . + # . . . + # . . . + # p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) ... p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) + q = log_p_x_0 - log_q_x_t_given_x_0 + + # sum_0 = p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) + ... + p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}), ... , + # sum_n = p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) + ... + p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) + q_log_sum_exp = torch.logsumexp(q, dim=1, keepdim=True) + + # p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_0 ... p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_n + # . . . + # . . . + # . . . + # p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_0 ... p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_n + q = q - q_log_sum_exp + + # (p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_0) * a_cumulative_{t-1} + b_cumulative_{t-1} ... (p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_n) * a_cumulative_{t-1} + b_cumulative_{t-1} + # . . . + # . . . + # . . . + # (p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_0) * a_cumulative_{t-1} + b_cumulative_{t-1} ... (p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_n) * a_cumulative_{t-1} + b_cumulative_{t-1} + # c_cumulative_{t-1} ... c_cumulative_{t-1} + q = self.apply_cumulative_transitions(q, t - 1) + + # ((p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_0) * a_cumulative_{t-1} + b_cumulative_{t-1}) * q(x_t | x_{t-1}=C_0) * sum_0 ... ((p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_n) * a_cumulative_{t-1} + b_cumulative_{t-1}) * q(x_t | x_{t-1}=C_0) * sum_n + # . . . + # . . . + # . . . + # ((p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_0) * a_cumulative_{t-1} + b_cumulative_{t-1}) * q(x_t | x_{t-1}=C_{k-1}) * sum_0 ... ((p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_n) * a_cumulative_{t-1} + b_cumulative_{t-1}) * q(x_t | x_{t-1}=C_{k-1}) * sum_n + # c_cumulative_{t-1} * q(x_t | x_{t-1}=C_k) * sum_0 ... c_cumulative_{t-1} * q(x_t | x_{t-1}=C_k) * sum_0 + log_p_x_t_min_1 = q + log_q_t_given_x_t_min_1 + q_log_sum_exp + + # For each column, there are two possible cases. + # + # Where: + # - sum(p_n(x_0))) is summing over all classes for x_0 + # - C_i is the class transitioning from (not to be confused with c_t and c_cumulative_t being used for gamma's) + # - C_j is the class transitioning to + # + # 1. x_t is masked i.e. x_t = c_k + # + # Simplifying the expression, the column vector is: + # . + # . + # . + # (c_t / c_cumulative_t) * (a_cumulative_{t-1} * p_n(x_0 = C_i | x_t) + b_cumulative_{t-1} * sum(p_n(x_0))) + # . + # . + # . + # (c_cumulative_{t-1} / c_cumulative_t) * sum(p_n(x_0)) + # + # From equation (11) stated in terms of forward probabilities, the last row is trivially verified. + # + # For the other rows, we can state the equation as ... + # + # (c_t / c_cumulative_t) * [b_cumulative_{t-1} * p(x_0=c_0) + ... + (a_cumulative_{t-1} + b_cumulative_{t-1}) * p(x_0=C_i) + ... + b_cumulative_{k-1} * p(x_0=c_{k-1})] + # + # This verifies the other rows. + # + # 2. x_t is not masked + # + # Simplifying the expression, there are two cases for the rows of the column vector, where C_j = C_i and where C_j != C_i: + # . + # . + # . + # C_j != C_i: b_t * ((b_cumulative_{t-1} / b_cumulative_t) * p_n(x_0 = c_0) + ... + ((a_cumulative_{t-1} + b_cumulative_{t-1}) / b_cumulative_t) * p_n(x_0 = C_i) + ... + (b_cumulative_{t-1} / (a_cumulative_t + b_cumulative_t)) * p_n(c_0=C_j) + ... + (b_cumulative_{t-1} / b_cumulative_t) * p_n(x_0 = c_{k-1})) + # . + # . + # . + # C_j = C_i: (a_t + b_t) * ((b_cumulative_{t-1} / b_cumulative_t) * p_n(x_0 = c_0) + ... + ((a_cumulative_{t-1} + b_cumulative_{t-1}) / (a_cumulative_t + b_cumulative_t)) * p_n(x_0 = C_i = C_j) + ... + (b_cumulative_{t-1} / b_cumulative_t) * p_n(x_0 = c_{k-1})) + # . + # . + # . + # 0 + # + # The last row is trivially verified. The other rows can be verified by directly expanding equation (11) stated in terms of forward probabilities. + return log_p_x_t_min_1 + + def log_Q_t_transitioning_to_known_class( + self, *, t: torch.int, x_t: torch.LongTensor, log_onehot_x_t: torch.Tensor, cumulative: bool + ): + """ + Calculates the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each + latent pixel in `x_t`. + + Args: + t (`torch.Long`): + The timestep that determines which transition matrix is used. + x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`): + The classes of each latent pixel at time `t`. + log_onehot_x_t (`torch.Tensor` of shape `(batch size, num classes, num latent pixels)`): + The log one-hot vectors of `x_t`. + cumulative (`bool`): + If cumulative is `False`, the single step transition matrix `t-1`->`t` is used. If cumulative is + `True`, the cumulative transition matrix `0`->`t` is used. + + Returns: + `torch.Tensor` of shape `(batch size, num classes - 1, num latent pixels)`: + Each _column_ of the returned matrix is a _row_ of log probabilities of the complete probability + transition matrix. + + When non cumulative, returns `self.num_classes - 1` rows because the initial latent pixel cannot be + masked. + + Where: + - `q_n` is the probability distribution for the forward process of the `n`th latent pixel. + - C_0 is a class of a latent pixel embedding + - C_k is the class of the masked latent pixel + + non-cumulative result (omitting logarithms): + ``` + q_0(x_t | x_{t-1} = C_0) ... q_n(x_t | x_{t-1} = C_0) + . . . + . . . + . . . + q_0(x_t | x_{t-1} = C_k) ... q_n(x_t | x_{t-1} = C_k) + ``` + + cumulative result (omitting logarithms): + ``` + q_0_cumulative(x_t | x_0 = C_0) ... q_n_cumulative(x_t | x_0 = C_0) + . . . + . . . + . . . + q_0_cumulative(x_t | x_0 = C_{k-1}) ... q_n_cumulative(x_t | x_0 = C_{k-1}) + ``` + """ + if cumulative: + a = self.log_cumprod_at[t] + b = self.log_cumprod_bt[t] + c = self.log_cumprod_ct[t] + else: + a = self.log_at[t] + b = self.log_bt[t] + c = self.log_ct[t] + + if not cumulative: + # The values in the onehot vector can also be used as the logprobs for transitioning + # from masked latent pixels. If we are not calculating the cumulative transitions, + # we need to save these vectors to be re-appended to the final matrix so the values + # aren't overwritten. + # + # `P(x_t!=mask|x_{t-1=mask}) = 0` and 0 will be the value of the last row of the onehot vector + # if x_t is not masked + # + # `P(x_t=mask|x_{t-1=mask}) = 1` and 1 will be the value of the last row of the onehot vector + # if x_t is masked + log_onehot_x_t_transitioning_from_masked = log_onehot_x_t[:, -1, :].unsqueeze(1) + + # `index_to_log_onehot` will add onehot vectors for masked pixels, + # so the default one hot matrix has one too many rows. See the doc string + # for an explanation of the dimensionality of the returned matrix. + log_onehot_x_t = log_onehot_x_t[:, :-1, :] + + # this is a cheeky trick to produce the transition probabilities using log one-hot vectors. + # + # Don't worry about what values this sets in the columns that mark transitions + # to masked latent pixels. They are overwrote later with the `mask_class_mask`. + # + # Looking at the below logspace formula in non-logspace, each value will evaluate to either + # `1 * a + b = a + b` where `log_Q_t` has the one hot value in the column + # or + # `0 * a + b = b` where `log_Q_t` has the 0 values in the column. + # + # See equation 7 for more details. + log_Q_t = (log_onehot_x_t + a).logaddexp(b) + + # The whole column of each masked pixel is `c` + mask_class_mask = x_t == self.mask_class + mask_class_mask = mask_class_mask.unsqueeze(1).expand(-1, self.num_embed - 1, -1) + log_Q_t[mask_class_mask] = c + + if not cumulative: + log_Q_t = torch.cat((log_Q_t, log_onehot_x_t_transitioning_from_masked), dim=1) + + return log_Q_t + + def apply_cumulative_transitions(self, q, t): + bsz = q.shape[0] + a = self.log_cumprod_at[t] + b = self.log_cumprod_bt[t] + c = self.log_cumprod_ct[t] + + num_latent_pixels = q.shape[2] + c = c.expand(bsz, 1, num_latent_pixels) + + q = (q + a).logaddexp(b) + q = torch.cat((q, c), dim=1) + + return q diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..23d7ac7c6c2d1e3f5fc57b8fe35ec06f54a906a0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__init__.py @@ -0,0 +1,171 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os + +from packaging import version + +from .. import __version__ +from .constants import ( + CONFIG_NAME, + DEFAULT_HF_PARALLEL_LOADING_WORKERS, + DEPRECATED_REVISION_ARGS, + DIFFUSERS_DYNAMIC_MODULE_NAME, + DIFFUSERS_LOAD_ID_FIELDS, + FLAX_WEIGHTS_NAME, + GGUF_FILE_EXTENSION, + HF_ENABLE_PARALLEL_LOADING, + HF_MODULES_CACHE, + HUGGINGFACE_CO_RESOLVE_ENDPOINT, + MIN_PEFT_VERSION, + ONNX_EXTERNAL_WEIGHTS_NAME, + ONNX_WEIGHTS_NAME, + SAFE_WEIGHTS_INDEX_NAME, + SAFETENSORS_FILE_EXTENSION, + SAFETENSORS_WEIGHTS_NAME, + USE_PEFT_BACKEND, + WEIGHTS_INDEX_NAME, + WEIGHTS_NAME, +) +from .deprecation_utils import _maybe_remap_transformers_class, deprecate +from .doc_utils import replace_example_docstring +from .dynamic_modules_utils import get_class_from_dynamic_module +from .export_utils import export_to_gif, export_to_obj, export_to_ply, export_to_video +from .hub_utils import ( + PushToHubMixin, + _add_variant, + _get_checkpoint_shard_files, + _get_model_file, + extract_commit_hash, + http_user_agent, +) +from .import_utils import ( + BACKENDS_MAPPING, + DIFFUSERS_SLOW_IMPORT, + ENV_VARS_TRUE_AND_AUTO_VALUES, + ENV_VARS_TRUE_VALUES, + USE_JAX, + USE_TF, + USE_TORCH, + DummyObject, + OptionalDependencyNotAvailable, + _LazyModule, + get_objects_from_module, + is_accelerate_available, + is_accelerate_version, + is_aiter_available, + is_aiter_version, + is_av_available, + is_better_profanity_available, + is_bitsandbytes_available, + is_bitsandbytes_version, + is_bs4_available, + is_cosmos_guardrail_available, + is_flash_attn_3_available, + is_flash_attn_available, + is_flash_attn_version, + is_flax_available, + is_ftfy_available, + is_gguf_available, + is_gguf_version, + is_google_colab, + is_hf_hub_version, + is_hpu_available, + is_inflect_available, + is_invisible_watermark_available, + is_kernels_available, + is_kernels_version, + is_kornia_available, + is_librosa_available, + is_matplotlib_available, + is_nltk_available, + is_note_seq_available, + is_nvidia_modelopt_available, + is_nvidia_modelopt_version, + is_onnx_available, + is_opencv_available, + is_optimum_quanto_available, + is_optimum_quanto_version, + is_peft_available, + is_peft_version, + is_pytorch_retinaface_available, + is_safetensors_available, + is_sageattention_available, + is_sageattention_version, + is_scipy_available, + is_sentencepiece_available, + is_tensorboard_available, + is_timm_available, + is_torch_available, + is_torch_mlu_available, + is_torch_npu_available, + is_torch_version, + is_torch_xla_available, + is_torch_xla_version, + is_torchao_available, + is_torchao_version, + is_torchsde_available, + is_torchvision_available, + is_transformers_available, + is_transformers_version, + is_unidecode_available, + is_wandb_available, + is_xformers_available, + is_xformers_version, + requires_backends, +) +from .loading_utils import get_module_from_name, get_submodule_by_name, load_image, load_video +from .logging import get_logger +from .outputs import BaseOutput +from .peft_utils import ( + apply_lora_scale, + check_peft_version, + delete_adapter_layers, + get_adapter_name, + get_peft_kwargs, + recurse_remove_peft_layers, + scale_lora_layers, + set_adapter_layers, + set_weights_and_activate_adapters, + unscale_lora_layers, +) +from .pil_utils import PIL_INTERPOLATION, make_image_grid, numpy_to_pil, pt_to_pil +from .remote_utils import remote_decode +from .state_dict_utils import ( + convert_all_state_dict_to_peft, + convert_sai_sd_control_lora_state_dict_to_peft, + convert_state_dict_to_diffusers, + convert_state_dict_to_kohya, + convert_state_dict_to_peft, + convert_unet_state_dict_to_peft, + state_dict_all_zero, +) +from .typing_utils import _get_detailed_type, _is_valid_type + + +logger = get_logger(__name__) + + +def check_min_version(min_version): + if version.parse(__version__) < version.parse(min_version): + if "dev" in min_version: + error_message = ( + "This example requires a source install from HuggingFace diffusers (see " + "`https://huggingface.co/docs/diffusers/installation#install-from-source`)," + ) + else: + error_message = f"This example requires a minimum version of {min_version}," + error_message += f" but the version found is {__version__}.\n" + raise ImportError(error_message) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5af4b139eb70d13b44c7a9df3c8c4e4d0bab31cb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/accelerate_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/accelerate_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58c55eef246e5bed8e7583823fd2ffd79d6614dd Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/accelerate_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/constants.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/constants.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..280707b2a0d7461d5480f58a9f033dcee666753c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/constants.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/deprecation_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/deprecation_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ce91c4272915f8c442540a48d019ba5eafc18e0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/deprecation_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/distributed_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/distributed_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..afc418350a1907f1629fa6b9d8e5b0ad19375c2c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/distributed_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/doc_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/doc_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fac29f6da40e22b22b271adfdbe88b517c15769d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/doc_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_bitsandbytes_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_bitsandbytes_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3972ac9b3f3518569b42f1b39a753c3585cdf5f0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_bitsandbytes_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_flax_and_transformers_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_flax_and_transformers_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d88cc4839cced25c6310a7b4c9891b40e8c90851 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_flax_and_transformers_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_flax_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_flax_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b1358378221f3553e7ffbfb5d51c1bed5f3cdd8d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_flax_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_gguf_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_gguf_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b0002cd8af7f5625e239ce5f5fdc9952a93f83c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_gguf_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_note_seq_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_note_seq_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2d011017cbeda2b1392bf223e40bf90495d52ed Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_note_seq_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_nvidia_modelopt_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_nvidia_modelopt_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..285067f0b8bc74354d116bf263a237f9755d7d09 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_nvidia_modelopt_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_onnx_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_onnx_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc0a269b41d799d1e7efffa85cb24ac28ca65bbd Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_onnx_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_optimum_quanto_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_optimum_quanto_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..194b85d2d456a3333fd854793df0ef24bd630d1a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_optimum_quanto_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_librosa_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_librosa_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49c0aec225a54a50cae78dc97838817a58a5db78 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_librosa_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_scipy_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_scipy_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10d4ad8d764c24fa73a1dd5e8a88452e7ecc1318 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_scipy_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_torchsde_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_torchsde_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff2628a722fa6e65a3035780bd779a0f41cefbf9 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_torchsde_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_onnx_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_onnx_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58b23a87d0da540a6d1d4e7ae270b1c9164e231e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_onnx_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_opencv_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_opencv_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc39c9a18e4fa25b52c2222112b11c730c764b5c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_opencv_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_sentencepiece_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_sentencepiece_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe93c4b3170491dcb8df8dbddda822a74c85d49b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torch_and_transformers_and_sentencepiece_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torchao_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torchao_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6109aeef43a7608de0a1f63c299e18f4c393b30b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_torchao_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_transformers_and_torch_and_note_seq_objects.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_transformers_and_torch_and_note_seq_objects.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..555dcb1a416951f4ca97a5e0e4a746cd609db84c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dummy_transformers_and_torch_and_note_seq_objects.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dynamic_modules_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dynamic_modules_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09a626a8346b2456dc78130aed721d6899a35dd1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/dynamic_modules_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/export_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/export_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d5a6090e24ac8e47917ac45131c6e726135f297 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/export_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/hub_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/hub_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b1698a3893026c82585394ec8d4a401f9ac09e81 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/hub_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/import_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/import_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf49d0de84e98c73b086b62cdafb4a55dc35ffda Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/import_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/loading_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/loading_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95a36c1683633d6b267b543f56f9407a46e18a50 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/loading_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/logging.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/logging.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fcf35279a559880ed1cd957d4068eb14c68a4990 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/logging.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/outputs.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/outputs.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb68a0792ec9ac1895e0022ac9894290d231caaa Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/outputs.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/peft_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/peft_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64d2cc70c8632f3a110cc5213ce3f0da0e0b998f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/peft_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/pil_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/pil_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ac6568ceea16cf6347d24fc2c8aa793d88706aa Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/pil_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/remote_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/remote_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ade0e2eca54ddfe4c4cfc4921f39b8af16f19878 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/remote_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/source_code_parsing_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/source_code_parsing_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71b7b6cd04757d35e4d647592a16437b2f463d82 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/source_code_parsing_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/state_dict_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/state_dict_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca12335d91b4680c98c726432ffe881d0710fbf5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/state_dict_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/testing_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/testing_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..31ee5e5483870b96c229e08370b659afe832a5f5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/testing_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/torch_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/torch_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..696399ef9a092ab2493366a1af01837958f3ab1f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/torch_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/typing_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/typing_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..258168f288dfbf5214c5b7e2b2e44164a281a138 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/typing_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/versions.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/versions.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1535918e3712744b45e93194f63dd095840ee75 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/__pycache__/versions.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/accelerate_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/accelerate_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..af3b712b5a9ea71ace657c5189fb6ad9f10d8148 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/accelerate_utils.py @@ -0,0 +1,48 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Accelerate utilities: Utilities related to accelerate +""" + +from packaging import version + +from .import_utils import is_accelerate_available + + +if is_accelerate_available(): + import accelerate + + +def apply_forward_hook(method): + """ + Decorator that applies a registered CpuOffload hook to an arbitrary function rather than `forward`. This is useful + for cases where a PyTorch module provides functions other than `forward` that should trigger a move to the + appropriate acceleration device. This is the case for `encode` and `decode` in [`AutoencoderKL`]. + + This decorator looks inside the internal `_hf_hook` property to find a registered offload hook. + + :param method: The method to decorate. This method should be a method of a PyTorch module. + """ + if not is_accelerate_available(): + return method + accelerate_version = version.parse(accelerate.__version__).base_version + if version.parse(accelerate_version) < version.parse("0.17.0"): + return method + + def wrapper(self, *args, **kwargs): + if hasattr(self, "_hf_hook") and hasattr(self._hf_hook, "pre_forward"): + self._hf_hook.pre_forward(self) + return method(self, *args, **kwargs) + + return wrapper diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/constants.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..4f94df656a65357fb998395f5a87bbd5b166bea3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/constants.py @@ -0,0 +1,83 @@ +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib +import os + +from huggingface_hub.constants import HF_HOME +from packaging import version + +from ..dependency_versions_check import dep_version_check +from .import_utils import ENV_VARS_TRUE_VALUES, is_peft_available, is_transformers_available + + +MIN_PEFT_VERSION = "0.6.0" +MIN_TRANSFORMERS_VERSION = "4.34.0" +_CHECK_PEFT = os.environ.get("_CHECK_PEFT", "1") in ENV_VARS_TRUE_VALUES + + +CONFIG_NAME = "config.json" +WEIGHTS_NAME = "diffusion_pytorch_model.bin" +WEIGHTS_INDEX_NAME = "diffusion_pytorch_model.bin.index.json" +FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack" +ONNX_WEIGHTS_NAME = "model.onnx" +SAFETENSORS_WEIGHTS_NAME = "diffusion_pytorch_model.safetensors" +SAFE_WEIGHTS_INDEX_NAME = "diffusion_pytorch_model.safetensors.index.json" +SAFETENSORS_FILE_EXTENSION = "safetensors" +GGUF_FILE_EXTENSION = "gguf" +ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb" +HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co") +DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules" +HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(HF_HOME, "modules")) +DEPRECATED_REVISION_ARGS = ["fp16", "non-ema"] +DIFFUSERS_REQUEST_TIMEOUT = 60 +DIFFUSERS_ATTN_BACKEND = os.getenv("DIFFUSERS_ATTN_BACKEND", "native") +DIFFUSERS_ATTN_CHECKS = os.getenv("DIFFUSERS_ATTN_CHECKS", "0").upper() in ENV_VARS_TRUE_VALUES +DEFAULT_HF_PARALLEL_LOADING_WORKERS = 8 +HF_ENABLE_PARALLEL_LOADING = os.environ.get("HF_ENABLE_PARALLEL_LOADING", "").upper() in ENV_VARS_TRUE_VALUES +DIFFUSERS_DISABLE_REMOTE_CODE = os.getenv("DIFFUSERS_DISABLE_REMOTE_CODE", "false").upper() in ENV_VARS_TRUE_VALUES + +# Below should be `True` if the current version of `peft` and `transformers` are compatible with +# PEFT backend. Will automatically fall back to PEFT backend if the correct versions of the libraries are +# available. +# For PEFT it is has to be greater than or equal to 0.6.0 and for transformers it has to be greater than or equal to 4.34.0. +_required_peft_version = is_peft_available() and version.parse( + version.parse(importlib.metadata.version("peft")).base_version +) >= version.parse(MIN_PEFT_VERSION) +_required_transformers_version = is_transformers_available() and version.parse( + version.parse(importlib.metadata.version("transformers")).base_version +) >= version.parse(MIN_TRANSFORMERS_VERSION) + +USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version + +if USE_PEFT_BACKEND and _CHECK_PEFT: + dep_version_check("peft") + + +DECODE_ENDPOINT_SD_V1 = "https://q1bj3bpq6kzilnsu.us-east-1.aws.endpoints.huggingface.cloud/" +DECODE_ENDPOINT_SD_XL = "https://x2dmsqunjd6k9prw.us-east-1.aws.endpoints.huggingface.cloud/" +DECODE_ENDPOINT_FLUX = "https://whhx50ex1aryqvw6.us-east-1.aws.endpoints.huggingface.cloud/" +DECODE_ENDPOINT_HUNYUAN_VIDEO = "https://o7ywnmrahorts457.us-east-1.aws.endpoints.huggingface.cloud/" + + +ENCODE_ENDPOINT_SD_V1 = "https://qc6479g0aac6qwy9.us-east-1.aws.endpoints.huggingface.cloud/" +ENCODE_ENDPOINT_SD_XL = "https://xjqqhmyn62rog84g.us-east-1.aws.endpoints.huggingface.cloud/" +ENCODE_ENDPOINT_FLUX = "https://ptccx55jz97f9zgo.us-east-1.aws.endpoints.huggingface.cloud/" + + +DIFFUSERS_LOAD_ID_FIELDS = [ + "pretrained_model_name_or_path", + "subfolder", + "variant", + "revision", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/deprecation_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/deprecation_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7ec612499e9a204606a70ddc515b7a2116d923bd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/deprecation_utils.py @@ -0,0 +1,97 @@ +import inspect +import warnings +from typing import Any + +from packaging import version + +from ..utils import logging + + +logger = logging.get_logger(__name__) + +# Mapping for deprecated Transformers classes to their replacements +# This is used to handle models that reference deprecated class names in their configs +# Reference: https://github.com/huggingface/transformers/issues/40822 +# Format: { +# "DeprecatedClassName": { +# "new_class": "NewClassName", +# "transformers_version": (">=", "5.0.0"), # (operation, version) tuple +# } +# } +_TRANSFORMERS_CLASS_REMAPPING = { + "CLIPFeatureExtractor": { + "new_class": "CLIPImageProcessor", + "transformers_version": (">", "4.57.0"), + }, +} + + +def _maybe_remap_transformers_class(class_name: str) -> str | None: + """ + Check if a Transformers class should be remapped to a newer version. + + Args: + class_name: The name of the class to check + + Returns: + The new class name if remapping should occur, None otherwise + """ + if class_name not in _TRANSFORMERS_CLASS_REMAPPING: + return None + + from .import_utils import is_transformers_version + + mapping = _TRANSFORMERS_CLASS_REMAPPING[class_name] + operation, required_version = mapping["transformers_version"] + + # Only remap if the transformers version meets the requirement + if is_transformers_version(operation, required_version): + new_class = mapping["new_class"] + logger.warning(f"{class_name} appears to have been deprecated in transformers. Using {new_class} instead.") + return mapping["new_class"] + + return None + + +def deprecate(*args, take_from: dict | Any | None = None, standard_warn=True, stacklevel=2): + from .. import __version__ + + deprecated_kwargs = take_from + values = () + if not isinstance(args[0], tuple): + args = (args,) + + for attribute, version_name, message in args: + if version.parse(version.parse(__version__).base_version) >= version.parse(version_name): + raise ValueError( + f"The deprecation tuple {(attribute, version_name, message)} should be removed since diffusers'" + f" version {__version__} is >= {version_name}" + ) + + warning = None + if isinstance(deprecated_kwargs, dict) and attribute in deprecated_kwargs: + values += (deprecated_kwargs.pop(attribute),) + warning = f"The `{attribute}` argument is deprecated and will be removed in version {version_name}." + elif hasattr(deprecated_kwargs, attribute): + values += (getattr(deprecated_kwargs, attribute),) + warning = f"The `{attribute}` attribute is deprecated and will be removed in version {version_name}." + elif deprecated_kwargs is None: + warning = f"`{attribute}` is deprecated and will be removed in version {version_name}." + + if warning is not None: + warning = warning + " " if standard_warn else "" + warnings.warn(warning + message, FutureWarning, stacklevel=stacklevel) + + if isinstance(deprecated_kwargs, dict) and len(deprecated_kwargs) > 0: + call_frame = inspect.getouterframes(inspect.currentframe())[1] + filename = call_frame.filename + line_number = call_frame.lineno + function = call_frame.function + key, value = next(iter(deprecated_kwargs.items())) + raise TypeError(f"{function} in {filename} line {line_number - 1} got an unexpected keyword argument `{key}`") + + if len(values) == 0: + return + elif len(values) == 1: + return values[0] + return values diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/distributed_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/distributed_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..239b7b26200d6b10a371eeb6e67e11f1e9f70882 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/distributed_utils.py @@ -0,0 +1,36 @@ +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +try: + import torch +except ImportError: + torch = None + + +def is_torch_dist_rank_zero() -> bool: + if torch is None: + return True + + dist_module = getattr(torch, "distributed", None) + if dist_module is None or not dist_module.is_available(): + return True + + if not dist_module.is_initialized(): + return True + + try: + return dist_module.get_rank() == 0 + except (RuntimeError, ValueError): + return True diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/doc_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/doc_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..815083e1425801a5715da8ecd99af017427ff740 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/doc_utils.py @@ -0,0 +1,39 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Doc utilities: Utilities related to documentation +""" + +import re + + +def replace_example_docstring(example_docstring): + def docstring_decorator(fn): + func_doc = fn.__doc__ + lines = func_doc.split("\n") + i = 0 + while i < len(lines) and re.search(r"^\s*Examples?:\s*$", lines[i]) is None: + i += 1 + if i < len(lines): + lines[i] = example_docstring + func_doc = "\n".join(lines) + else: + raise ValueError( + f"The function {fn} should have an empty 'Examples:' in its docstring as placeholder, " + f"current docstring is:\n{func_doc}" + ) + fn.__doc__ = func_doc + return fn + + return docstring_decorator diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_bitsandbytes_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_bitsandbytes_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..2dc589428de960f9c46f5a22874e2ef6475c418c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_bitsandbytes_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class BitsAndBytesConfig(metaclass=DummyObject): + _backends = ["bitsandbytes"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["bitsandbytes"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["bitsandbytes"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["bitsandbytes"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_flax_and_transformers_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_flax_and_transformers_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..5e65e5349bb0a6a0bac62cddf0ce0fad64237c68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_flax_and_transformers_objects.py @@ -0,0 +1,77 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class FlaxStableDiffusionControlNetPipeline(metaclass=DummyObject): + _backends = ["flax", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + +class FlaxStableDiffusionImg2ImgPipeline(metaclass=DummyObject): + _backends = ["flax", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + +class FlaxStableDiffusionInpaintPipeline(metaclass=DummyObject): + _backends = ["flax", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + +class FlaxStableDiffusionPipeline(metaclass=DummyObject): + _backends = ["flax", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + +class FlaxStableDiffusionXLPipeline(metaclass=DummyObject): + _backends = ["flax", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax", "transformers"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_flax_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_flax_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..5fa8dbc819316e96f7483addba43f90b9d8f397b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_flax_objects.py @@ -0,0 +1,212 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class FlaxControlNetModel(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxModelMixin(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxUNet2DConditionModel(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxAutoencoderKL(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxDiffusionPipeline(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxDDIMScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxDDPMScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxDPMSolverMultistepScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxEulerDiscreteScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxKarrasVeScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxLMSDiscreteScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxPNDMScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxSchedulerMixin(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + +class FlaxScoreSdeVeScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_gguf_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_gguf_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..4a6d9a060a1397c5ef0e402e67170960bb3de516 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_gguf_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class GGUFQuantizationConfig(metaclass=DummyObject): + _backends = ["gguf"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["gguf"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["gguf"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["gguf"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_note_seq_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_note_seq_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..c02d0b015aedc37c01fb3b843bc79547aae5da68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_note_seq_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class MidiProcessor(metaclass=DummyObject): + _backends = ["note_seq"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["note_seq"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["note_seq"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["note_seq"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_nvidia_modelopt_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_nvidia_modelopt_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..046b28223b3d4efd541cbe35d6596b746aecca5f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_nvidia_modelopt_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class NVIDIAModelOptConfig(metaclass=DummyObject): + _backends = ["nvidia_modelopt"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["nvidia_modelopt"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["nvidia_modelopt"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["nvidia_modelopt"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_onnx_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_onnx_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..bde5f6ad0793e2d81bc638600b46ff81748d09ee --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_onnx_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class OnnxRuntimeModel(metaclass=DummyObject): + _backends = ["onnx"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["onnx"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["onnx"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["onnx"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_optimum_quanto_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_optimum_quanto_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..44f8eaffc2460360d17720578b8224496f91cf94 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_optimum_quanto_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class QuantoConfig(metaclass=DummyObject): + _backends = ["optimum_quanto"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["optimum_quanto"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["optimum_quanto"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["optimum_quanto"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_pt_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_pt_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..5244273cb596ae17534d8f9de21a5e8fe74309b0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_pt_objects.py @@ -0,0 +1,3073 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class AdaptiveProjectedGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AdaptiveProjectedMixGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class BaseGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ClassifierFreeGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ClassifierFreeZeroStarGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FrequencyDecoupledGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class PerturbedAttentionGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SkipLayerGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SmoothedEnergyGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class TangentialClassifierFreeGuidance(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FasterCacheConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FirstBlockCacheConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HookRegistry(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LayerSkipConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class MagCacheConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class PyramidAttentionBroadcastConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SmoothedEnergyGuidanceConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class TaylorSeerCacheConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +def apply_faster_cache(*args, **kwargs): + requires_backends(apply_faster_cache, ["torch"]) + + +def apply_first_block_cache(*args, **kwargs): + requires_backends(apply_first_block_cache, ["torch"]) + + +def apply_layer_skip(*args, **kwargs): + requires_backends(apply_layer_skip, ["torch"]) + + +def apply_mag_cache(*args, **kwargs): + requires_backends(apply_mag_cache, ["torch"]) + + +def apply_pyramid_attention_broadcast(*args, **kwargs): + requires_backends(apply_pyramid_attention_broadcast, ["torch"]) + + +def apply_taylorseer_cache(*args, **kwargs): + requires_backends(apply_taylorseer_cache, ["torch"]) + + +class AllegroTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AsymmetricAutoencoderKL(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AttentionBackendName(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AuraFlowTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderDC(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKL(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLAllegro(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLCogVideoX(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLCosmos(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLFlux2(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLHunyuanImage(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLHunyuanImageRefiner(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLHunyuanVideo(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLHunyuanVideo15(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLLTX2Audio(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLLTX2Video(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLLTXVideo(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLMagvit(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLMochi(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLQwenImage(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLTemporalDecoder(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderKLWan(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderOobleck(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderRAE(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoencoderTiny(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class BriaFiboTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class BriaTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CacheMixin(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ChromaTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ChronoEditTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CogVideoXTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CogView3PlusTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CogView4Transformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ConsisIDTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ConsistencyDecoderVAE(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ContextParallelConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ControlNetUnionModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ControlNetXSAdapter(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CosmosControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CosmosTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DiTTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class EasyAnimateTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class Flux2Transformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FluxControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FluxMultiControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FluxTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class GlmImageTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HeliosTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HiDreamImageTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HunyuanDiT2DControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HunyuanDiT2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HunyuanDiT2DMultiControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HunyuanImageTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HunyuanVideo15Transformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HunyuanVideoFramepackTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HunyuanVideoTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class I2VGenXLUNet(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class Kandinsky3UNet(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class Kandinsky5Transformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LatteTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LongCatImageTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LTX2VideoTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LTXVideoTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class Lumina2Transformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LuminaNextDiT2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class MochiTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ModelMixin(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class MotionAdapter(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class MultiAdapter(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class MultiControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class OmniGenTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class OvisImageTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ParallelConfig(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class PixArtTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class PriorTransformer(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class PRXTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class QwenImageControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class QwenImageMultiControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class QwenImageTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SanaControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SanaTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SanaVideoTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SD3ControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SD3MultiControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SD3Transformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SkyReelsV2Transformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SparseControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class StableAudioDiTModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class T2IAdapter(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class T5FilmDecoder(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class Transformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class TransformerTemporalModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UNet1DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UNet2DConditionModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UNet2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UNet3DConditionModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UNetControlNetXSModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UNetMotionModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UNetSpatioTemporalConditionModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UVit2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class VQModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class WanAnimateTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class WanTransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class WanVACETransformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ZImageControlNetModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ZImageTransformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +def attention_backend(*args, **kwargs): + requires_backends(attention_backend, ["torch"]) + + +class AutoPipelineBlocks(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ComponentsManager(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ComponentSpec(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ConditionalPipelineBlocks(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ConfigSpec(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class InputParam(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LoopSequentialPipelineBlocks(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ModularPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ModularPipelineBlocks(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class OutputParam(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SequentialPipelineBlocks(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +def get_constant_schedule(*args, **kwargs): + requires_backends(get_constant_schedule, ["torch"]) + + +def get_constant_schedule_with_warmup(*args, **kwargs): + requires_backends(get_constant_schedule_with_warmup, ["torch"]) + + +def get_cosine_schedule_with_warmup(*args, **kwargs): + requires_backends(get_cosine_schedule_with_warmup, ["torch"]) + + +def get_cosine_with_hard_restarts_schedule_with_warmup(*args, **kwargs): + requires_backends(get_cosine_with_hard_restarts_schedule_with_warmup, ["torch"]) + + +def get_linear_schedule_with_warmup(*args, **kwargs): + requires_backends(get_linear_schedule_with_warmup, ["torch"]) + + +def get_polynomial_decay_schedule_with_warmup(*args, **kwargs): + requires_backends(get_polynomial_decay_schedule_with_warmup, ["torch"]) + + +def get_scheduler(*args, **kwargs): + requires_backends(get_scheduler, ["torch"]) + + +class AudioPipelineOutput(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoPipelineForImage2Image(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoPipelineForInpainting(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AutoPipelineForText2Image(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class BlipDiffusionControlNetPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class BlipDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CLIPImageProjection(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ConsistencyModelPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DanceDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DDIMPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DDPMPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DiffusionPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DiTPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ImagePipelineOutput(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class KarrasVePipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LDMPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LDMSuperResolutionPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class PNDMPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class RePaintPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ScoreSdeVePipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class StableDiffusionMixin(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DiffusersQuantizer(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class AmusedScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CMStochasticIterativeScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CogVideoXDDIMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class CogVideoXDPMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DDIMInverseScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DDIMParallelScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DDIMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DDPMParallelScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DDPMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DDPMWuerstchenScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DEISMultistepScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DPMSolverMultistepInverseScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DPMSolverMultistepScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class DPMSolverSinglestepScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class EDMDPMSolverMultistepScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class EDMEulerScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class EulerAncestralDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class EulerDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FlowMatchEulerDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FlowMatchHeunDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class FlowMatchLCMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HeliosDMDScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HeliosScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class HeunDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class IPNDMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class KarrasVeScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class KDPM2AncestralDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class KDPM2DiscreteScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LCMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class LTXEulerAncestralRFScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class PNDMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class RePaintScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SASolverScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SchedulerMixin(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class SCMScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class ScoreSdeVeScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class TCDScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UnCLIPScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class UniPCMultistepScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class VQDiffusionScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + +class EMAModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_librosa_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_librosa_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..2088bc4a744198284f22fe54e6f1055cf3568566 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_librosa_objects.py @@ -0,0 +1,32 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class AudioDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "librosa"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "librosa"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "librosa"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "librosa"]) + + +class Mel(metaclass=DummyObject): + _backends = ["torch", "librosa"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "librosa"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "librosa"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "librosa"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_scipy_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_scipy_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..a1ff25863822b04971d2c6dfdc17f5b28774cf05 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_scipy_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class LMSDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch", "scipy"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "scipy"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "scipy"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "scipy"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_torchsde_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_torchsde_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..6ff14231b9cc8c88effc4d11f7b439fa16bb7f20 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_torchsde_objects.py @@ -0,0 +1,32 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class CosineDPMSolverMultistepScheduler(metaclass=DummyObject): + _backends = ["torch", "torchsde"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "torchsde"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "torchsde"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "torchsde"]) + + +class DPMSolverSDEScheduler(metaclass=DummyObject): + _backends = ["torch", "torchsde"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "torchsde"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "torchsde"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "torchsde"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_onnx_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_onnx_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..b7afad8226b87292100270e3e7daad6885be0e7f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_onnx_objects.py @@ -0,0 +1,92 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class OnnxStableDiffusionImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "onnx"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "onnx"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + +class OnnxStableDiffusionInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "onnx"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "onnx"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + +class OnnxStableDiffusionInpaintPipelineLegacy(metaclass=DummyObject): + _backends = ["torch", "transformers", "onnx"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "onnx"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + +class OnnxStableDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "onnx"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "onnx"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + +class OnnxStableDiffusionUpscalePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "onnx"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "onnx"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + +class StableDiffusionOnnxPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "onnx"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "onnx"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "onnx"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..c0c4d9df5eeeca4cca0a5147058eba70f89bcbc2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class ConsisIDPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "opencv"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "opencv"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "opencv"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "opencv"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..349db10d1fa5b8505173e1578d3c83ea17e95fbc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py @@ -0,0 +1,47 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class KolorsImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "sentencepiece"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "sentencepiece"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "sentencepiece"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "sentencepiece"]) + + +class KolorsPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "sentencepiece"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "sentencepiece"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "sentencepiece"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "sentencepiece"]) + + +class KolorsPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers", "sentencepiece"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers", "sentencepiece"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "sentencepiece"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers", "sentencepiece"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..157b04ef266abe48af58ea7f68d068563cc9869c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torch_and_transformers_objects.py @@ -0,0 +1,4307 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class Flux2AutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Flux2KleinAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Flux2KleinBaseAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Flux2KleinBaseModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Flux2KleinModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Flux2ModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxKontextAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxKontextModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageEditAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageEditModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageEditPlusAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageEditPlusModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageLayeredAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageLayeredModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Wan22Blocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Wan22Image2VideoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Wan22Image2VideoModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Wan22ModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanImage2VideoAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanImage2VideoModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ZImageAutoBlocks(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ZImageModularPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AllegroPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AltDiffusionImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AltDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AmusedImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AmusedInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AmusedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AnimateDiffControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AnimateDiffPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AnimateDiffPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AnimateDiffSDXLPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AnimateDiffSparseControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AnimateDiffVideoToVideoControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AnimateDiffVideoToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AudioLDM2Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AudioLDM2ProjectionModel(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AudioLDM2UNet2DConditionModel(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AudioLDMPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class AuraFlowPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class BriaFiboEditPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class BriaFiboPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class BriaPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ChromaImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ChromaInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ChromaPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ChronoEditPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CLIPImageProjection(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CogVideoXFunControlPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CogVideoXImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CogVideoXPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CogVideoXVideoToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CogView3PlusPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CogView4ControlPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CogView4Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ConsisIDPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Cosmos2_5_PredictBasePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Cosmos2_5_TransferPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Cosmos2TextToImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Cosmos2VideoToWorldPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CosmosTextToWorldPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CosmosVideoToWorldPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class CycleDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class EasyAnimateControlPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class EasyAnimateInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class EasyAnimatePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Flux2KleinPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Flux2Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxControlImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxControlInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxControlNetImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxControlNetInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxControlPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxFillPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxKontextInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxKontextPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class FluxPriorReduxPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class GlmImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HeliosPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HeliosPyramidPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HiDreamImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanDiTControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanDiTPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanDiTPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanImageRefinerPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanSkyreelsImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanVideo15ImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanVideo15Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanVideoFramepackPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanVideoImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class I2VGenXLPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class IFImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class IFImg2ImgSuperResolutionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class IFInpaintingPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class IFInpaintingSuperResolutionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class IFPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class IFSuperResolutionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ImageTextPipelineOutput(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Kandinsky3Img2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Kandinsky3Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Kandinsky5I2IPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Kandinsky5I2VPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Kandinsky5T2IPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Kandinsky5T2VPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyCombinedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyImg2ImgCombinedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyInpaintCombinedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyPriorPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22CombinedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22ControlnetImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22ControlnetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22Img2ImgCombinedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22Img2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22InpaintCombinedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22InpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22PriorEmb2EmbPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class KandinskyV22PriorPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LatentConsistencyModelImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LatentConsistencyModelPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LattePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LDMTextToImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LEditsPPPipelineStableDiffusion(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LEditsPPPipelineStableDiffusionXL(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LongCatImageEditPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LongCatImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTX2ConditionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTX2ImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTX2LatentUpsamplePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTX2Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTXConditionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTXI2VLongMultiPromptPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTXImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTXLatentUpsamplePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LTXPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LucyEditPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Lumina2Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class Lumina2Text2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LuminaPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class LuminaText2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class MarigoldDepthPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class MarigoldIntrinsicsPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class MarigoldNormalsPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class MochiPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class MusicLDMPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class OmniGenPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class OvisImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class PaintByExamplePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class PIAPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class PixArtAlphaPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class PixArtSigmaPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class PixArtSigmaPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class PRXPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageControlNetInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageEditInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageEditPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageEditPlusPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImageLayeredPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class QwenImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ReduxImageEncoder(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SanaControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SanaImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SanaPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SanaPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SanaSprintImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SanaSprintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SanaVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SemanticStableDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ShapEImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ShapEPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SkyReelsV2DiffusionForcingImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SkyReelsV2DiffusionForcingPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SkyReelsV2DiffusionForcingVideoToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SkyReelsV2ImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class SkyReelsV2Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableAudioPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableAudioProjectionModel(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableCascadeCombinedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableCascadeDecoderPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableCascadePriorPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusion3ControlNetInpaintingPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusion3ControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusion3Img2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusion3InpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusion3PAGImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusion3PAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusion3Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionAdapterPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionAttendAndExcitePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionControlNetImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionControlNetInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionControlNetPAGInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionControlNetPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionControlNetXSPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionDepth2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionDiffEditPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionGLIGENPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionGLIGENTextImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionImageVariationPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionInpaintPipelineLegacy(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionInstructPix2PixPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionLatentUpscalePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionLDM3DPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionModelEditingPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionPAGImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionPAGInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionPanoramaPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionParadigmsPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionPipelineSafe(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionPix2PixZeroPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionSAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionUpscalePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLAdapterPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetPAGImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetUnionImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetUnionInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetUnionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLControlNetXSPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLInstructPix2PixPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLPAGImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLPAGInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLPAGPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableDiffusionXLPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableUnCLIPImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableUnCLIPPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class StableVideoDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class TextToVideoSDPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class TextToVideoZeroPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class TextToVideoZeroSDXLPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class UnCLIPImageVariationPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class UnCLIPPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class UniDiffuserModel(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class UniDiffuserPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class UniDiffuserTextDecoder(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class VersatileDiffusionDualGuidedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class VersatileDiffusionImageVariationPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class VersatileDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class VersatileDiffusionTextToImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class VideoToVideoSDPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class VisualClozeGenerationPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class VisualClozePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class VQDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanAnimatePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanVACEPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WanVideoToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WuerstchenCombinedPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WuerstchenDecoderPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class WuerstchenPriorPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ZImageControlNetInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ZImageControlNetPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ZImageImg2ImgPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ZImageInpaintPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ZImageOmniPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class ZImagePipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torchao_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torchao_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..16f0f6a55f64099c73ce4d81af8c87053b833e44 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_torchao_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class TorchAoConfig(metaclass=DummyObject): + _backends = ["torchao"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torchao"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torchao"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torchao"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py new file mode 100644 index 0000000000000000000000000000000000000000..fbde04e33f0abd86d12f3dee048a4f0585c9f19d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py @@ -0,0 +1,17 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +from ..utils import DummyObject, requires_backends + + +class SpectrogramDiffusionPipeline(metaclass=DummyObject): + _backends = ["transformers", "torch", "note_seq"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["transformers", "torch", "note_seq"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["transformers", "torch", "note_seq"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["transformers", "torch", "note_seq"]) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dynamic_modules_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dynamic_modules_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..856966dd29b580ee037f8f61c015f1ea6d997980 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/dynamic_modules_utils.py @@ -0,0 +1,512 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities to dynamically load objects from the Hub.""" + +import importlib +import inspect +import json +import os +import re +import shutil +import sys +import threading +from pathlib import Path +from types import ModuleType +from urllib import request + +from huggingface_hub import hf_hub_download, model_info +from huggingface_hub.utils import RevisionNotFoundError, validate_hf_hub_args +from packaging import version + +from .. import __version__ +from . import DIFFUSERS_DYNAMIC_MODULE_NAME, HF_MODULES_CACHE, logging +from .constants import DIFFUSERS_DISABLE_REMOTE_CODE + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + +# See https://huggingface.co/datasets/diffusers/community-pipelines-mirror +COMMUNITY_PIPELINES_MIRROR_ID = "diffusers/community-pipelines-mirror" +TIME_OUT_REMOTE_CODE = int(os.getenv("DIFFUSERS_TIMEOUT_REMOTE_CODE", 15)) +_HF_REMOTE_CODE_LOCK = threading.Lock() + + +def get_diffusers_versions(): + url = "https://pypi.org/pypi/diffusers/json" + releases = json.loads(request.urlopen(url).read())["releases"].keys() + return sorted(releases, key=lambda x: version.Version(x)) + + +def init_hf_modules(): + """ + Creates the cache directory for modules with an init, and adds it to the Python path. + """ + # This function has already been executed if HF_MODULES_CACHE already is in the Python path. + if HF_MODULES_CACHE in sys.path: + return + + sys.path.append(HF_MODULES_CACHE) + os.makedirs(HF_MODULES_CACHE, exist_ok=True) + init_path = Path(HF_MODULES_CACHE) / "__init__.py" + if not init_path.exists(): + init_path.touch() + + +def create_dynamic_module(name: str | os.PathLike): + """ + Creates a dynamic module in the cache directory for modules. + """ + init_hf_modules() + dynamic_module_path = Path(HF_MODULES_CACHE) / name + # If the parent module does not exist yet, recursively create it. + if not dynamic_module_path.parent.exists(): + create_dynamic_module(dynamic_module_path.parent) + os.makedirs(dynamic_module_path, exist_ok=True) + init_path = dynamic_module_path / "__init__.py" + if not init_path.exists(): + init_path.touch() + + +def get_relative_imports(module_file): + """ + Get the list of modules that are relatively imported in a module file. + + Args: + module_file (`str` or `os.PathLike`): The module file to inspect. + """ + with open(module_file, "r", encoding="utf-8") as f: + content = f.read() + + # Imports of the form `import .xxx` + relative_imports = re.findall(r"^\s*import\s+\.(\S+)\s*$", content, flags=re.MULTILINE) + # Imports of the form `from .xxx import yyy` + relative_imports += re.findall(r"^\s*from\s+\.(\S+)\s+import", content, flags=re.MULTILINE) + # Unique-ify + return list(set(relative_imports)) + + +def get_relative_import_files(module_file): + """ + Get the list of all files that are needed for a given module. Note that this function recurses through the relative + imports (if a imports b and b imports c, it will return module files for b and c). + + Args: + module_file (`str` or `os.PathLike`): The module file to inspect. + """ + no_change = False + files_to_check = [module_file] + all_relative_imports = [] + + # Let's recurse through all relative imports + while not no_change: + new_imports = [] + for f in files_to_check: + new_imports.extend(get_relative_imports(f)) + + module_path = Path(module_file).parent + new_import_files = [str(module_path / m) for m in new_imports] + new_import_files = [f for f in new_import_files if f not in all_relative_imports] + files_to_check = [f"{f}.py" for f in new_import_files] + + no_change = len(new_import_files) == 0 + all_relative_imports.extend(files_to_check) + + return all_relative_imports + + +def check_imports(filename): + """ + Check if the current Python environment contains all the libraries that are imported in a file. + """ + with open(filename, "r", encoding="utf-8") as f: + content = f.read() + + # Imports of the form `import xxx` + imports = re.findall(r"^\s*import\s+(\S+)\s*$", content, flags=re.MULTILINE) + # Imports of the form `from xxx import yyy` + imports += re.findall(r"^\s*from\s+(\S+)\s+import", content, flags=re.MULTILINE) + # Only keep the top-level module + imports = [imp.split(".")[0] for imp in imports if not imp.startswith(".")] + + # Unique-ify and test we got them all + imports = list(set(imports)) + missing_packages = [] + for imp in imports: + try: + importlib.import_module(imp) + except ImportError: + missing_packages.append(imp) + + if len(missing_packages) > 0: + logger.warning( + "This modeling file might require the following packages that were not found in your environment: " + f"{', '.join(missing_packages)}. Run `pip install {' '.join(missing_packages)}`" + ) + + return get_relative_imports(filename) + + +def resolve_trust_remote_code(trust_remote_code, model_name, has_remote_code): + trust_remote_code = trust_remote_code and not DIFFUSERS_DISABLE_REMOTE_CODE + if DIFFUSERS_DISABLE_REMOTE_CODE: + logger.warning( + "Downloading remote code is disabled globally via the DIFFUSERS_DISABLE_REMOTE_CODE environment variable. Ignoring `trust_remote_code`." + ) + + if has_remote_code and not trust_remote_code: + error_msg = f"The repository for {model_name} contains custom code. " + error_msg += ( + "Downloading remote code is disabled globally via the DIFFUSERS_DISABLE_REMOTE_CODE environment variable." + if DIFFUSERS_DISABLE_REMOTE_CODE + else "Pass `trust_remote_code=True` to allow loading remote code modules." + ) + raise ValueError(error_msg) + + elif has_remote_code and trust_remote_code: + logger.warning( + f"`trust_remote_code` is enabled. Downloading code from {model_name}. Please ensure you trust the contents of this repository" + ) + + return trust_remote_code + + +def get_class_in_module(class_name, module_path, force_reload=False): + """ + Import a module on the cache directory for modules and extract a class from it. + """ + name = os.path.normpath(module_path) + if name.endswith(".py"): + name = name[:-3] + name = name.replace(os.path.sep, ".") + module_file: Path = Path(HF_MODULES_CACHE) / module_path + + with _HF_REMOTE_CODE_LOCK: + if force_reload: + sys.modules.pop(name, None) + importlib.invalidate_caches() + cached_module: ModuleType | None = sys.modules.get(name) + module_spec = importlib.util.spec_from_file_location(name, location=module_file) + + module: ModuleType + if cached_module is None: + module = importlib.util.module_from_spec(module_spec) + # insert it into sys.modules before any loading begins + sys.modules[name] = module + else: + module = cached_module + + module_spec.loader.exec_module(module) + + if class_name is None: + return find_pipeline_class(module) + + return getattr(module, class_name) + + +def find_pipeline_class(loaded_module): + """ + Retrieve pipeline class that inherits from `DiffusionPipeline`. Note that there has to be exactly one class + inheriting from `DiffusionPipeline`. + """ + from ..pipelines import DiffusionPipeline + + cls_members = dict(inspect.getmembers(loaded_module, inspect.isclass)) + + pipeline_class = None + for cls_name, cls in cls_members.items(): + if ( + cls_name != DiffusionPipeline.__name__ + and issubclass(cls, DiffusionPipeline) + and cls.__module__.split(".")[0] != "diffusers" + ): + if pipeline_class is not None: + raise ValueError( + f"Multiple classes that inherit from {DiffusionPipeline.__name__} have been found:" + f" {pipeline_class.__name__}, and {cls_name}. Please make sure to define only one in" + f" {loaded_module}." + ) + pipeline_class = cls + + return pipeline_class + + +@validate_hf_hub_args +def get_cached_module_file( + pretrained_model_name_or_path: str | os.PathLike, + module_file: str, + subfolder: str | None = None, + cache_dir: str | os.PathLike | None = None, + force_download: bool = False, + proxies: dict[str, str] | None = None, + token: bool | str | None = None, + revision: str | None = None, + local_files_only: bool = False, + local_dir: str | None = None, +): + """ + Prepares Downloads a module from a local folder or a distant repo and returns its path inside the cached + Transformers module. + + Args: + pretrained_model_name_or_path (`str` or `os.PathLike`): + This can be either: + + - a string, the *model id* of a pretrained model configuration hosted inside a model repo on + huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced + under a user or organization name, like `dbmdz/bert-base-german-cased`. + - a path to a *directory* containing a configuration file saved using the + [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`. + + module_file (`str`): + The name of the module file containing the class to look for. + cache_dir (`str` or `os.PathLike`, *optional*): + Path to a directory in which a downloaded pretrained model configuration should be cached if the standard + cache should not be used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force to (re-)download the configuration files and override the cached versions if they + exist. + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request. + token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `transformers-cli login` (stored in `~/.huggingface`). + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a + git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any + identifier allowed by git. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, will only try to load the tokenizer configuration from local files. + + > [!TIP] > You may pass a token in `token` if you are not logged in (`hf auth login`) and want to use private or + [gated > models](https://huggingface.co/docs/hub/models-gated#gated-models). + + Returns: + `str`: The path to the module inside the cache. + """ + # Download and cache module_file from the repo `pretrained_model_name_or_path` of grab it if it's a local file. + pretrained_model_name_or_path = str(pretrained_model_name_or_path) + + if subfolder is not None: + module_file_or_url = os.path.join(pretrained_model_name_or_path, subfolder, module_file) + else: + module_file_or_url = os.path.join(pretrained_model_name_or_path, module_file) + + if os.path.isfile(module_file_or_url): + resolved_module_file = module_file_or_url + submodule = "local" + elif pretrained_model_name_or_path.count("/") == 0: + available_versions = get_diffusers_versions() + # cut ".dev0" + latest_version = "v" + ".".join(__version__.split(".")[:3]) + + # retrieve github version that matches + if revision is None: + revision = latest_version if latest_version[1:] in available_versions else "main" + logger.info(f"Defaulting to latest_version: {revision}.") + elif revision in available_versions: + revision = f"v{revision}" + elif revision == "main": + revision = revision + else: + raise ValueError( + f"`custom_revision`: {revision} does not exist. Please make sure to choose one of" + f" {', '.join(available_versions + ['main'])}." + ) + + try: + resolved_module_file = hf_hub_download( + repo_id=COMMUNITY_PIPELINES_MIRROR_ID, + repo_type="dataset", + filename=f"{revision}/{pretrained_model_name_or_path}.py", + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + local_dir=local_dir, + ) + submodule = "git" + module_file = pretrained_model_name_or_path + ".py" + except RevisionNotFoundError as e: + raise EnvironmentError( + f"Revision '{revision}' not found in the community pipelines mirror. Check available revisions on" + " https://huggingface.co/datasets/diffusers/community-pipelines-mirror/tree/main." + " If you don't find the revision you are looking for, please open an issue on https://github.com/huggingface/diffusers/issues." + ) from e + except EnvironmentError: + logger.error(f"Could not locate the {module_file} inside {pretrained_model_name_or_path}.") + raise + else: + try: + # Load from URL or cache if already cached + resolved_module_file = hf_hub_download( + pretrained_model_name_or_path, + module_file, + subfolder=subfolder, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + local_dir=local_dir, + revision=revision, + token=token, + ) + submodule = os.path.join("local", "--".join(pretrained_model_name_or_path.split("/"))) + except EnvironmentError: + logger.error(f"Could not locate the {module_file} inside {pretrained_model_name_or_path}.") + raise + + # Check we have all the requirements in our environment + modules_needed = check_imports(resolved_module_file) + + # Now we move the module inside our cached dynamic modules. + full_submodule = DIFFUSERS_DYNAMIC_MODULE_NAME + os.path.sep + submodule + create_dynamic_module(full_submodule) + submodule_path = Path(HF_MODULES_CACHE) / full_submodule + if submodule == "local" or submodule == "git": + # We always copy local files (we could hash the file to see if there was a change, and give them the name of + # that hash, to only copy when there is a modification but it seems overkill for now). + # The only reason we do the copy is to avoid putting too many folders in sys.path. + shutil.copyfile(resolved_module_file, submodule_path / module_file) + for module_needed in modules_needed: + if len(module_needed.split(".")) == 2: + module_needed = "/".join(module_needed.split(".")) + module_folder = module_needed.split("/")[0] + if not os.path.exists(submodule_path / module_folder): + os.makedirs(submodule_path / module_folder) + module_needed = f"{module_needed}.py" + if subfolder is not None: + source_path = os.path.join(pretrained_model_name_or_path, subfolder, module_needed) + else: + source_path = os.path.join(pretrained_model_name_or_path, module_needed) + shutil.copyfile(source_path, submodule_path / module_needed) + else: + # Get the commit hash + # TODO: we will get this info in the etag soon, so retrieve it from there and not here. + commit_hash = model_info(pretrained_model_name_or_path, revision=revision, token=token).sha + + # The module file will end up being placed in a subfolder with the git hash of the repo. This way we get the + # benefit of versioning. + submodule_path = submodule_path / commit_hash + full_submodule = full_submodule + os.path.sep + commit_hash + create_dynamic_module(full_submodule) + + if not (submodule_path / module_file).exists(): + if len(module_file.split("/")) == 2: + module_folder = module_file.split("/")[0] + if not os.path.exists(submodule_path / module_folder): + os.makedirs(submodule_path / module_folder) + shutil.copyfile(resolved_module_file, submodule_path / module_file) + + # Make sure we also have every file with relative + for module_needed in modules_needed: + if len(module_needed.split(".")) == 2: + module_needed = "/".join(module_needed.split(".")) + if not (submodule_path / module_needed).exists(): + get_cached_module_file( + pretrained_model_name_or_path, + f"{module_needed}.py", + subfolder=subfolder, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + local_dir=local_dir, + ) + return os.path.join(full_submodule, module_file) + + +@validate_hf_hub_args +def get_class_from_dynamic_module( + pretrained_model_name_or_path: str | os.PathLike, + module_file: str, + subfolder: str | None = None, + class_name: str | None = None, + cache_dir: str | os.PathLike | None = None, + force_download: bool = False, + proxies: dict[str, str] | None = None, + token: bool | str | None = None, + revision: str | None = None, + local_files_only: bool = False, + local_dir: str | None = None, +): + """ + Extracts a class from a module file, present in the local folder or repository of a model. + + > [!WARNING] > Calling this function will execute the code in the module file found locally or downloaded from the + Hub. It should > therefore only be called on trusted repos. + + Args: + pretrained_model_name_or_path (`str` or `os.PathLike`): + This can be either: + + - a string, the *model id* of a pretrained model configuration hosted inside a model repo on + huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced + under a user or organization name, like `dbmdz/bert-base-german-cased`. + - a path to a *directory* containing a configuration file saved using the + [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`. + + module_file (`str`): + The name of the module file containing the class to look for. + class_name (`str`): + The name of the class to import in the module. + cache_dir (`str` or `os.PathLike`, *optional*): + Path to a directory in which a downloaded pretrained model configuration should be cached if the standard + cache should not be used. + force_download (`bool`, *optional*, defaults to `False`): + Whether or not to force to (re-)download the configuration files and override the cached versions if they + exist. + proxies (`dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request. + token (`str` or `bool`, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `transformers-cli login` (stored in `~/.huggingface`). + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a + git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any + identifier allowed by git. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, will only try to load the tokenizer configuration from local files. + + > [!TIP] > You may pass a token in `token` if you are not logged in (`hf auth login`) and want to use private or + [gated > models](https://huggingface.co/docs/hub/models-gated#gated-models). + + Returns: + `type`: The class, dynamically imported from the module. + + Examples: + + ```python + # Download module `modeling.py` from huggingface.co and cache then extract the class `MyBertModel` from this + # module. + cls = get_class_from_dynamic_module("sgugger/my-bert-model", "modeling.py", "MyBertModel") + ```""" + # And lastly we get the class inside our newly created module + final_module = get_cached_module_file( + pretrained_model_name_or_path, + module_file, + subfolder=subfolder, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + local_dir=local_dir, + ) + return get_class_in_module(class_name, final_module) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/export_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/export_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d9b4e173a862dc5d0d6e0e9f7706350e113abff1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/export_utils.py @@ -0,0 +1,208 @@ +import io +import random +import struct +import tempfile +from contextlib import contextmanager + +import numpy as np +import PIL.Image +import PIL.ImageOps + +from .import_utils import BACKENDS_MAPPING, is_imageio_available, is_opencv_available +from .logging import get_logger + + +global_rng = random.Random() + +logger = get_logger(__name__) + + +@contextmanager +def buffered_writer(raw_f): + f = io.BufferedWriter(raw_f) + yield f + f.flush() + + +def export_to_gif(image: list[PIL.Image.Image], output_gif_path: str = None, fps: int = 10) -> str: + if output_gif_path is None: + output_gif_path = tempfile.NamedTemporaryFile(suffix=".gif").name + + image[0].save( + output_gif_path, + save_all=True, + append_images=image[1:], + optimize=False, + duration=1000 // fps, + loop=0, + ) + return output_gif_path + + +def export_to_ply(mesh, output_ply_path: str = None): + """ + Write a PLY file for a mesh. + """ + if output_ply_path is None: + output_ply_path = tempfile.NamedTemporaryFile(suffix=".ply").name + + coords = mesh.verts.detach().cpu().numpy() + faces = mesh.faces.cpu().numpy() + rgb = np.stack([mesh.vertex_channels[x].detach().cpu().numpy() for x in "RGB"], axis=1) + + with buffered_writer(open(output_ply_path, "wb")) as f: + f.write(b"ply\n") + f.write(b"format binary_little_endian 1.0\n") + f.write(bytes(f"element vertex {len(coords)}\n", "ascii")) + f.write(b"property float x\n") + f.write(b"property float y\n") + f.write(b"property float z\n") + if rgb is not None: + f.write(b"property uchar red\n") + f.write(b"property uchar green\n") + f.write(b"property uchar blue\n") + if faces is not None: + f.write(bytes(f"element face {len(faces)}\n", "ascii")) + f.write(b"property list uchar int vertex_index\n") + f.write(b"end_header\n") + + if rgb is not None: + rgb = (rgb * 255.499).round().astype(int) + vertices = [ + (*coord, *rgb) + for coord, rgb in zip( + coords.tolist(), + rgb.tolist(), + ) + ] + format = struct.Struct("<3f3B") + for item in vertices: + f.write(format.pack(*item)) + else: + format = struct.Struct("<3f") + for vertex in coords.tolist(): + f.write(format.pack(*vertex)) + + if faces is not None: + format = struct.Struct(" str: + """ + quality: + Video output quality. Default is 5. Uses variable bit rate. Highest quality is 10, lowest is 0. Set to None to + prevent variable bitrate flags to FFMPEG so you can manually specify them using output_params instead. + Specifying a fixed bitrate using `bitrate` disables this parameter. + + bitrate: + Set a constant bitrate for the video encoding. Default is None causing `quality` parameter to be used instead. + Better quality videos with smaller file sizes will result from using the `quality` variable bitrate parameter + rather than specifying a fixed bitrate with this parameter. + + macro_block_size: + Size constraint for video. Width and height, must be divisible by this number. If not divisible by this number + imageio will tell ffmpeg to scale the image up to the next closest size divisible by this number. Most codecs + are compatible with a macroblock size of 16 (default), some can go smaller (4, 8). To disable this automatic + feature set it to None or 1, however be warned many players can't decode videos that are odd in size and some + codecs will produce poor results or fail. See https://en.wikipedia.org/wiki/Macroblock. + """ + # TODO: Dhruv. Remove by Diffusers release 0.33.0 + # Added to prevent breaking existing code + if not is_imageio_available(): + logger.warning( + ( + "It is recommended to use `export_to_video` with `imageio` and `imageio-ffmpeg` as a backend. \n" + "These libraries are not present in your environment. Attempting to use legacy OpenCV backend to export video. \n" + "Support for the OpenCV backend will be deprecated in a future Diffusers version" + ) + ) + return _legacy_export_to_video(video_frames, output_video_path, fps) + + if is_imageio_available(): + import imageio + else: + raise ImportError(BACKENDS_MAPPING["imageio"][1].format("export_to_video")) + + try: + imageio.plugins.ffmpeg.get_exe() + except AttributeError: + raise AttributeError( + ( + "Found an existing imageio backend in your environment. Attempting to export video with imageio. \n" + "Unable to find a compatible ffmpeg installation in your environment to use with imageio. Please install via `pip install imageio-ffmpeg" + ) + ) + + if output_video_path is None: + output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4").name + + if isinstance(video_frames[0], np.ndarray): + video_frames = [(frame * 255).astype(np.uint8) for frame in video_frames] + + elif isinstance(video_frames[0], PIL.Image.Image): + video_frames = [np.array(frame) for frame in video_frames] + + with imageio.get_writer( + output_video_path, fps=fps, quality=quality, bitrate=bitrate, macro_block_size=macro_block_size + ) as writer: + for frame in video_frames: + writer.append_data(frame) + + return output_video_path diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/hub_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/hub_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b5eb9ab2e17f5db8fbbf24a1c7683e852d33aa89 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/hub_utils.py @@ -0,0 +1,592 @@ +# coding=utf-8 +# Copyright 2025 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import json +import os +import re +import sys +import tempfile +import warnings +from pathlib import Path +from uuid import uuid4 + +from huggingface_hub import ( + DDUFEntry, + ModelCard, + ModelCardData, + create_repo, + hf_hub_download, + model_info, + snapshot_download, + upload_folder, +) +from huggingface_hub.constants import HF_HUB_DISABLE_TELEMETRY, HF_HUB_OFFLINE +from huggingface_hub.file_download import REGEX_COMMIT_HASH +from huggingface_hub.utils import ( + EntryNotFoundError, + HfHubHTTPError, + RepositoryNotFoundError, + RevisionNotFoundError, + is_jinja_available, + validate_hf_hub_args, +) +from packaging import version + +from .. import __version__ +from .constants import ( + DEPRECATED_REVISION_ARGS, + HUGGINGFACE_CO_RESOLVE_ENDPOINT, + SAFETENSORS_WEIGHTS_NAME, + WEIGHTS_NAME, +) +from .import_utils import ( + ENV_VARS_TRUE_VALUES, + _flax_version, + _jax_version, + _onnxruntime_version, + _torch_version, + is_flax_available, + is_onnx_available, + is_torch_available, +) +from .logging import get_logger + + +logger = get_logger(__name__) + +MODEL_CARD_TEMPLATE_PATH = Path(__file__).parent / "model_card_template.md" +SESSION_ID = uuid4().hex + + +def http_user_agent(user_agent: dict | str | None = None) -> str: + """ + Formats a user-agent string with basic info about a request. + """ + ua = f"diffusers/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}" + if HF_HUB_DISABLE_TELEMETRY or HF_HUB_OFFLINE: + return ua + "; telemetry/off" + if is_torch_available(): + ua += f"; torch/{_torch_version}" + if is_flax_available(): + ua += f"; jax/{_jax_version}" + ua += f"; flax/{_flax_version}" + if is_onnx_available(): + ua += f"; onnxruntime/{_onnxruntime_version}" + # CI will set this value to True + if os.environ.get("DIFFUSERS_IS_CI", "").upper() in ENV_VARS_TRUE_VALUES: + ua += "; is_ci/true" + if isinstance(user_agent, dict): + ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items()) + elif isinstance(user_agent, str): + ua += "; " + user_agent + return ua + + +def load_or_create_model_card( + repo_id_or_path: str = None, + token: str | None = None, + is_pipeline: bool = False, + from_training: bool = False, + model_description: str | None = None, + base_model: str = None, + prompt: str | None = None, + license: str | None = None, + widget: list[dict] | None = None, + inference: bool | None = None, + is_modular: bool = False, + update_model_card: bool = False, +) -> ModelCard: + """ + Loads or creates a model card. + + Args: + repo_id_or_path (`str`): + The repo id (e.g., "stable-diffusion-v1-5/stable-diffusion-v1-5") or local path where to look for the model + card. + token (`str`, *optional*): + Authentication token. Will default to the stored token. See https://huggingface.co/settings/token for more + details. + is_pipeline (`bool`): + Boolean to indicate if we're adding tag to a [`DiffusionPipeline`]. + from_training: (`bool`): Boolean flag to denote if the model card is being created from a training script. + model_description (`str`, *optional*): Model description to add to the model card. Helpful when using + `load_or_create_model_card` from a training script. + base_model (`str`): Base model identifier (e.g., "stabilityai/stable-diffusion-xl-base-1.0"). Useful + for DreamBooth-like training. + prompt (`str`, *optional*): Prompt used for training. Useful for DreamBooth-like training. + license: (`str`, *optional*): License of the output artifact. Helpful when using + `load_or_create_model_card` from a training script. + widget (`list[dict]`, *optional*): Widget to accompany a gallery template. + inference: (`bool`, optional): Whether to turn on inference widget. Helpful when using + `load_or_create_model_card` from a training script. + is_modular: (`bool`, optional): Boolean flag to denote if the model card is for a modular pipeline. + When True, uses model_description as-is without additional template formatting. + update_model_card: (`bool`, optional): When True, regenerates the model card content even if one + already exists on the remote repo. Existing card metadata (tags, license, etc.) is preserved. Only + supported for modular pipelines (i.e., `is_modular=True`). + """ + if not is_jinja_available(): + raise ValueError( + "Modelcard rendering is based on Jinja templates." + " Please make sure to have `jinja` installed before using `load_or_create_model_card`." + " To install it, please run `pip install Jinja2`." + ) + + if update_model_card and not is_modular: + raise ValueError("`update_model_card=True` is only supported for modular pipelines (`is_modular=True`).") + + try: + # Check if the model card is present on the remote repo + model_card = ModelCard.load(repo_id_or_path, token=token) + # For modular pipelines, regenerate card content when requested (preserve existing metadata) + if update_model_card and is_modular and model_description is not None: + existing_data = model_card.data + model_card = ModelCard(model_description) + model_card.data = existing_data + except (EntryNotFoundError, RepositoryNotFoundError): + # Otherwise create a model card from template + if from_training: + model_card = ModelCard.from_template( + card_data=ModelCardData( # Card metadata object that will be converted to YAML block + license=license, + library_name="diffusers", + inference=inference, + base_model=base_model, + instance_prompt=prompt, + widget=widget, + ), + template_path=MODEL_CARD_TEMPLATE_PATH, + model_description=model_description, + ) + else: + card_data = ModelCardData() + if is_modular and model_description is not None: + model_card = ModelCard(model_description) + model_card.data = card_data + else: + component = "pipeline" if is_pipeline else "model" + if model_description is None: + model_description = f"This is the model card of a 🧨 diffusers {component} that has been pushed on the Hub. This model card has been automatically generated." + model_card = ModelCard.from_template(card_data, model_description=model_description) + + return model_card + + +def populate_model_card(model_card: ModelCard, tags: str | list[str] | None = None) -> ModelCard: + """Populates the `model_card` with library name and optional tags.""" + if model_card.data.library_name is None: + model_card.data.library_name = "diffusers" + + if tags is not None: + if isinstance(tags, str): + tags = [tags] + if model_card.data.tags is None: + model_card.data.tags = [] + for tag in tags: + model_card.data.tags.append(tag) + + return model_card + + +def extract_commit_hash(resolved_file: str | None, commit_hash: str | None = None): + """ + Extracts the commit hash from a resolved filename toward a cache file. + """ + if resolved_file is None or commit_hash is not None: + return commit_hash + resolved_file = str(Path(resolved_file).as_posix()) + search = re.search(r"snapshots/([^/]+)/", resolved_file) + if search is None: + return None + commit_hash = search.groups()[0] + return commit_hash if REGEX_COMMIT_HASH.match(commit_hash) else None + + +def _add_variant(weights_name: str, variant: str | None = None) -> str: + if variant is not None: + splits = weights_name.split(".") + splits = splits[:-1] + [variant] + splits[-1:] + weights_name = ".".join(splits) + + return weights_name + + +@validate_hf_hub_args +def _get_model_file( + pretrained_model_name_or_path: str | Path, + *, + weights_name: str, + subfolder: str | None = None, + cache_dir: str | None = None, + force_download: bool = False, + proxies: dict | None = None, + local_files_only: bool = False, + token: str | None = None, + user_agent: dict | str | None = None, + revision: str | None = None, + commit_hash: str | None = None, + dduf_entries: dict[str, DDUFEntry] | None = None, +): + pretrained_model_name_or_path = str(pretrained_model_name_or_path) + + if dduf_entries: + if subfolder is not None: + raise ValueError( + "DDUF file only allow for 1 level of directory (e.g transformer/model1/model.safetentors is not allowed). " + "Please check the DDUF structure" + ) + model_file = ( + weights_name + if pretrained_model_name_or_path == "" + else "/".join([pretrained_model_name_or_path, weights_name]) + ) + if model_file in dduf_entries: + return model_file + else: + raise EnvironmentError(f"Error no file named {weights_name} found in archive {dduf_entries.keys()}.") + elif os.path.isfile(pretrained_model_name_or_path): + return pretrained_model_name_or_path + elif os.path.isdir(pretrained_model_name_or_path): + if os.path.isfile(os.path.join(pretrained_model_name_or_path, weights_name)): + # Load from a PyTorch checkpoint + model_file = os.path.join(pretrained_model_name_or_path, weights_name) + return model_file + elif subfolder is not None and os.path.isfile( + os.path.join(pretrained_model_name_or_path, subfolder, weights_name) + ): + model_file = os.path.join(pretrained_model_name_or_path, subfolder, weights_name) + return model_file + else: + raise EnvironmentError( + f"Error no file named {weights_name} found in directory {pretrained_model_name_or_path}." + ) + else: + # 1. First check if deprecated way of loading from branches is used + if ( + revision in DEPRECATED_REVISION_ARGS + and (weights_name == WEIGHTS_NAME or weights_name == SAFETENSORS_WEIGHTS_NAME) + and version.parse(version.parse(__version__).base_version) >= version.parse("0.22.0") + ): + try: + model_file = hf_hub_download( + pretrained_model_name_or_path, + filename=_add_variant(weights_name, revision), + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + subfolder=subfolder, + revision=revision or commit_hash, + ) + warnings.warn( + f"Loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'` is deprecated. Loading instead from `revision='main'` with `variant={revision}`. Loading model variants via `revision='{revision}'` will be removed in diffusers v1. Please use `variant='{revision}'` instead.", + FutureWarning, + ) + return model_file + except: # noqa: E722 + warnings.warn( + f"You are loading the variant {revision} from {pretrained_model_name_or_path} via `revision='{revision}'`. This behavior is deprecated and will be removed in diffusers v1. One should use `variant='{revision}'` instead. However, it appears that {pretrained_model_name_or_path} currently does not have a {_add_variant(weights_name, revision)} file in the 'main' branch of {pretrained_model_name_or_path}. \n The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title '{pretrained_model_name_or_path} is missing {_add_variant(weights_name, revision)}' so that the correct variant file can be added.", + FutureWarning, + ) + try: + # 2. Load model file as usual + model_file = hf_hub_download( + pretrained_model_name_or_path, + filename=weights_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + local_files_only=local_files_only, + token=token, + user_agent=user_agent, + subfolder=subfolder, + revision=revision or commit_hash, + ) + return model_file + + except RepositoryNotFoundError as e: + raise EnvironmentError( + f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier " + "listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a " + "token having permission to this repo with `token` or log in with `hf auth login`." + ) from e + except RevisionNotFoundError as e: + raise EnvironmentError( + f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for " + "this model name. Check the model page at " + f"'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions." + ) from e + except EntryNotFoundError as e: + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named {weights_name}." + ) from e + except HfHubHTTPError as e: + raise EnvironmentError( + f"There was a specific connection error when trying to load {pretrained_model_name_or_path}:\n{e}" + ) from e + except ValueError as e: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a file named {weights_name} or" + " \nCheckout your internet connection or see how to run the library in" + " offline mode at 'https://huggingface.co/docs/diffusers/installation#offline-mode'." + ) from e + except EnvironmentError as e: + raise EnvironmentError( + f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing a file named {weights_name}" + ) from e + + +def _get_checkpoint_shard_files( + pretrained_model_name_or_path, + index_filename, + cache_dir=None, + proxies=None, + local_files_only=False, + token=None, + user_agent=None, + revision=None, + subfolder="", + dduf_entries: dict[str, DDUFEntry] | None = None, +): + """ + For a given model: + + - download and cache all the shards of a sharded checkpoint if `pretrained_model_name_or_path` is a model ID on the + Hub + - returns the list of paths to all the shards, as well as some metadata. + + For the description of each arg, see [`PreTrainedModel.from_pretrained`]. `index_filename` is the full path to the + index (downloaded and cached if `pretrained_model_name_or_path` is a model ID on the Hub). + """ + if dduf_entries: + if index_filename not in dduf_entries: + raise ValueError(f"Can't find a checkpoint index ({index_filename}) in {pretrained_model_name_or_path}.") + else: + if not os.path.isfile(index_filename): + raise ValueError(f"Can't find a checkpoint index ({index_filename}) in {pretrained_model_name_or_path}.") + + if dduf_entries: + index = json.loads(dduf_entries[index_filename].read_text()) + else: + with open(index_filename, "r") as f: + index = json.loads(f.read()) + + original_shard_filenames = sorted(set(index["weight_map"].values())) + sharded_metadata = index["metadata"] + sharded_metadata["all_checkpoint_keys"] = list(index["weight_map"].keys()) + sharded_metadata["weight_map"] = index["weight_map"].copy() + shards_path = os.path.join(pretrained_model_name_or_path, subfolder) + + # First, let's deal with local folder. + if os.path.isdir(pretrained_model_name_or_path) or dduf_entries: + shard_filenames = [os.path.join(shards_path, f) for f in original_shard_filenames] + for shard_file in shard_filenames: + if dduf_entries: + if shard_file not in dduf_entries: + raise FileNotFoundError( + f"{shards_path} does not appear to have a file named {shard_file} which is " + "required according to the checkpoint index." + ) + else: + if not os.path.exists(shard_file): + raise FileNotFoundError( + f"{shards_path} does not appear to have a file named {shard_file} which is " + "required according to the checkpoint index." + ) + return shard_filenames, sharded_metadata + + # At this stage pretrained_model_name_or_path is a model identifier on the Hub + allow_patterns = original_shard_filenames + if subfolder is not None: + allow_patterns = [os.path.join(subfolder, p) for p in allow_patterns] + + ignore_patterns = ["*.json", "*.md"] + + # If the repo doesn't have the required shards, error out early even before downloading anything. + if not local_files_only: + model_files_info = model_info(pretrained_model_name_or_path, revision=revision, token=token) + for shard_file in original_shard_filenames: + shard_file_present = any(shard_file in k.rfilename for k in model_files_info.siblings) + if not shard_file_present: + raise EnvironmentError( + f"{shards_path} does not appear to have a file named {shard_file} which is " + "required according to the checkpoint index." + ) + + try: + # Load from URL + cached_folder = snapshot_download( + pretrained_model_name_or_path, + cache_dir=cache_dir, + proxies=proxies, + local_files_only=local_files_only, + token=token, + revision=revision, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + user_agent=user_agent, + ) + if subfolder is not None: + cached_folder = os.path.join(cached_folder, subfolder) + + # We have already dealt with RepositoryNotFoundError and RevisionNotFoundError when getting the index, so + # we don't have to catch them here. We have also dealt with EntryNotFoundError. + except HfHubHTTPError as e: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load {pretrained_model_name_or_path}. You should try" + " again after checking your internet connection." + ) from e + + cached_filenames = [os.path.join(cached_folder, f) for f in original_shard_filenames] + for cached_file in cached_filenames: + if not os.path.isfile(cached_file): + raise EnvironmentError( + f"{cached_folder} does not have a file named {cached_file} which is required according to the checkpoint index." + ) + + return cached_filenames, sharded_metadata + + +def _check_legacy_sharding_variant_format(folder: str = None, filenames: list[str] = None, variant: str = None): + if filenames and folder: + raise ValueError("Both `filenames` and `folder` cannot be provided.") + if not filenames: + filenames = [] + for _, _, files in os.walk(folder): + for file in files: + filenames.append(os.path.basename(file)) + transformers_index_format = r"\d{5}-of-\d{5}" + variant_file_re = re.compile(rf".*-{transformers_index_format}\.{variant}\.[a-z]+$") + return any(variant_file_re.match(f) is not None for f in filenames) + + +class PushToHubMixin: + """ + A Mixin to push a model, scheduler, or pipeline to the Hugging Face Hub. + """ + + def _upload_folder( + self, + working_dir: str | os.PathLike, + repo_id: str, + token: str | None = None, + commit_message: str | None = None, + create_pr: bool = False, + subfolder: str | None = None, + ): + """ + Uploads all files in `working_dir` to `repo_id`. + """ + if commit_message is None: + if "Model" in self.__class__.__name__: + commit_message = "Upload model" + elif "Scheduler" in self.__class__.__name__: + commit_message = "Upload scheduler" + else: + commit_message = f"Upload {self.__class__.__name__}" + + logger.info(f"Uploading the files of {working_dir} to {repo_id}.") + return upload_folder( + repo_id=repo_id, + folder_path=working_dir, + token=token, + commit_message=commit_message, + create_pr=create_pr, + path_in_repo=subfolder, + ) + + def push_to_hub( + self, + repo_id: str, + commit_message: str | None = None, + private: bool | None = None, + token: str | None = None, + create_pr: bool = False, + safe_serialization: bool = True, + variant: str | None = None, + subfolder: str | None = None, + ) -> str: + """ + Upload model, scheduler, or pipeline files to the 🤗 Hugging Face Hub. + + Parameters: + repo_id (`str`): + The name of the repository you want to push your model, scheduler, or pipeline files to. It should + contain your organization name when pushing to an organization. `repo_id` can also be a path to a local + directory. + commit_message (`str`, *optional*): + Message to commit while pushing. Default to `"Upload {object}"`. + private (`bool`, *optional*): + Whether to make the repo private. If `None` (default), the repo will be public unless the + organization's default is private. This value is ignored if the repo already exists. + token (`str`, *optional*): + The token to use as HTTP bearer authorization for remote files. The token generated when running `hf + auth login` (stored in `~/.huggingface`). + create_pr (`bool`, *optional*, defaults to `False`): + Whether or not to create a PR with the uploaded files or directly commit. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether or not to convert the model weights to the `safetensors` format. + variant (`str`, *optional*): + If specified, weights are saved in the format `pytorch_model..bin`. + + Examples: + + ```python + from diffusers import UNet2DConditionModel + + unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="unet") + + # Push the `unet` to your namespace with the name "my-finetuned-unet". + unet.push_to_hub("my-finetuned-unet") + + # Push the `unet` to an organization with the name "my-finetuned-unet". + unet.push_to_hub("your-org/my-finetuned-unet") + ``` + """ + repo_id = create_repo(repo_id, private=private, token=token, exist_ok=True).repo_id + + # Create a new empty model card and eventually tag it + if not subfolder: + model_card = load_or_create_model_card(repo_id, token=token) + model_card = populate_model_card(model_card) + + # Save all files. + save_kwargs = {"safe_serialization": safe_serialization} + if "Scheduler" not in self.__class__.__name__: + save_kwargs.update({"variant": variant}) + + with tempfile.TemporaryDirectory() as tmpdir: + self.save_pretrained(tmpdir, **save_kwargs) + + # Update model card if needed: + if not subfolder: + model_card.save(os.path.join(tmpdir, "README.md")) + + return self._upload_folder( + tmpdir, + repo_id, + token=token, + commit_message=commit_message, + create_pr=create_pr, + subfolder=subfolder, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/import_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/import_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..551fa358a28d3c6ca66813a45cc0a04106a9ce9a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/import_utils.py @@ -0,0 +1,1018 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Import utilities: Utilities related to imports and our lazy inits. +""" + +import importlib.util +import inspect +import operator as op +import os +import sys +from collections import OrderedDict, defaultdict +from functools import lru_cache as cache +from itertools import chain +from types import ModuleType +from typing import Any + +from huggingface_hub.utils import is_jinja_available # noqa: F401 +from packaging.version import Version, parse + +from . import logging + + +# The package importlib_metadata is in a different place, depending on the python version. +if sys.version_info < (3, 8): + import importlib_metadata +else: + import importlib.metadata as importlib_metadata +try: + _package_map = importlib_metadata.packages_distributions() # load-once to avoid expensive calls +except Exception: + _package_map = None + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + +ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"} +ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"}) + +USE_TF = os.environ.get("USE_TF", "AUTO").upper() +USE_TORCH = os.environ.get("USE_TORCH", "AUTO").upper() +USE_JAX = os.environ.get("USE_FLAX", "AUTO").upper() +USE_SAFETENSORS = os.environ.get("USE_SAFETENSORS", "AUTO").upper() +DIFFUSERS_SLOW_IMPORT = os.environ.get("DIFFUSERS_SLOW_IMPORT", "FALSE").upper() +DIFFUSERS_SLOW_IMPORT = DIFFUSERS_SLOW_IMPORT in ENV_VARS_TRUE_VALUES + +STR_OPERATION_TO_FUNC = {">": op.gt, ">=": op.ge, "==": op.eq, "!=": op.ne, "<=": op.le, "<": op.lt} + +_is_google_colab = "google.colab" in sys.modules or any(k.startswith("COLAB_") for k in os.environ) + + +def _is_package_available(pkg_name: str, get_dist_name: bool = False) -> tuple[bool, str]: + global _package_map + pkg_exists = importlib.util.find_spec(pkg_name) is not None + pkg_version = "N/A" + + if pkg_exists: + if _package_map is None: + _package_map = defaultdict(list) + try: + # Fallback for Python < 3.10 + for dist in importlib_metadata.distributions(): + _top_level_declared = (dist.read_text("top_level.txt") or "").split() + # Infer top-level package names from file structure + _inferred_opt_names = { + f.parts[0] if len(f.parts) > 1 else inspect.getmodulename(f) for f in (dist.files or []) + } - {None} + _top_level_inferred = filter(lambda name: "." not in name, _inferred_opt_names) + for pkg in _top_level_declared or _top_level_inferred: + _package_map[pkg].append(dist.metadata["Name"]) + except Exception as _: + pass + try: + if get_dist_name and pkg_name in _package_map and _package_map[pkg_name]: + if len(_package_map[pkg_name]) > 1: + logger.warning( + f"Multiple distributions found for package {pkg_name}. Picked distribution: {_package_map[pkg_name][0]}" + ) + pkg_name = _package_map[pkg_name][0] + pkg_version = importlib_metadata.version(pkg_name) + logger.debug(f"Successfully imported {pkg_name} version {pkg_version}") + except (ImportError, importlib_metadata.PackageNotFoundError): + pkg_exists = False + + return pkg_exists, pkg_version + + +if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES: + _torch_available, _torch_version = _is_package_available("torch") + +else: + logger.info("Disabling PyTorch because USE_TORCH is set") + _torch_available = False + _torch_version = "N/A" + +_jax_version = "N/A" +_flax_version = "N/A" +if USE_JAX in ENV_VARS_TRUE_AND_AUTO_VALUES: + _flax_available = importlib.util.find_spec("jax") is not None and importlib.util.find_spec("flax") is not None + if _flax_available: + try: + _jax_version = importlib_metadata.version("jax") + _flax_version = importlib_metadata.version("flax") + logger.info(f"JAX version {_jax_version}, Flax version {_flax_version} available.") + except importlib_metadata.PackageNotFoundError: + _flax_available = False +else: + _flax_available = False + +if USE_SAFETENSORS in ENV_VARS_TRUE_AND_AUTO_VALUES: + _safetensors_available, _safetensors_version = _is_package_available("safetensors") + +else: + logger.info("Disabling Safetensors because USE_SAFETENSORS is set") + _safetensors_available = False + +_onnxruntime_version = "N/A" +_onnx_available = importlib.util.find_spec("onnxruntime") is not None +if _onnx_available: + candidates = ( + "onnxruntime", + "onnxruntime-cann", + "onnxruntime-directml", + "ort_nightly_directml", + "onnxruntime-gpu", + "ort_nightly_gpu", + "onnxruntime-migraphx", + "onnxruntime-openvino", + "onnxruntime-qnn", + "onnxruntime-rocm", + "onnxruntime-training", + "onnxruntime-vitisai", + ) + _onnxruntime_version = None + # For the metadata, we have to look for both onnxruntime and onnxruntime-x + for pkg in candidates: + try: + _onnxruntime_version = importlib_metadata.version(pkg) + break + except importlib_metadata.PackageNotFoundError: + pass + _onnx_available = _onnxruntime_version is not None + if _onnx_available: + logger.debug(f"Successfully imported onnxruntime version {_onnxruntime_version}") + +# (sayakpaul): importlib.util.find_spec("opencv-python") returns None even when it's installed. +# _opencv_available = importlib.util.find_spec("opencv-python") is not None +try: + candidates = ( + "opencv-python", + "opencv-contrib-python", + "opencv-python-headless", + "opencv-contrib-python-headless", + ) + _opencv_version = None + for pkg in candidates: + try: + _opencv_version = importlib_metadata.version(pkg) + break + except importlib_metadata.PackageNotFoundError: + pass + _opencv_available = _opencv_version is not None + if _opencv_available: + logger.debug(f"Successfully imported cv2 version {_opencv_version}") +except importlib_metadata.PackageNotFoundError: + _opencv_available = False + +_bs4_available = importlib.util.find_spec("bs4") is not None +try: + # importlib metadata under different name + _bs4_version = importlib_metadata.version("beautifulsoup4") + logger.debug(f"Successfully imported ftfy version {_bs4_version}") +except importlib_metadata.PackageNotFoundError: + _bs4_available = False + +_invisible_watermark_available = importlib.util.find_spec("imwatermark") is not None +try: + _invisible_watermark_version = importlib_metadata.version("invisible-watermark") + logger.debug(f"Successfully imported invisible-watermark version {_invisible_watermark_version}") +except importlib_metadata.PackageNotFoundError: + _invisible_watermark_available = False + +_torch_xla_available, _torch_xla_version = _is_package_available("torch_xla") +_torch_npu_available, _torch_npu_version = _is_package_available("torch_npu") +_torch_mlu_available, _torch_mlu_version = _is_package_available("torch_mlu") +_transformers_available, _transformers_version = _is_package_available("transformers") +_hf_hub_available, _hf_hub_version = _is_package_available("huggingface_hub") +_kernels_available, _kernels_version = _is_package_available("kernels") +_inflect_available, _inflect_version = _is_package_available("inflect") +_unidecode_available, _unidecode_version = _is_package_available("unidecode") + +_note_seq_available, _note_seq_version = _is_package_available("note_seq") +_wandb_available, _wandb_version = _is_package_available("wandb") +_tensorboard_available, _tensorboard_version = _is_package_available("tensorboard") +_compel_available, _compel_version = _is_package_available("compel") +_sentencepiece_available, _sentencepiece_version = _is_package_available("sentencepiece") +_torchsde_available, _torchsde_version = _is_package_available("torchsde") +_peft_available, _peft_version = _is_package_available("peft") +_torchvision_available, _torchvision_version = _is_package_available("torchvision") +_matplotlib_available, _matplotlib_version = _is_package_available("matplotlib") +_timm_available, _timm_version = _is_package_available("timm") +_bitsandbytes_available, _bitsandbytes_version = _is_package_available("bitsandbytes") +_imageio_available, _imageio_version = _is_package_available("imageio") +_ftfy_available, _ftfy_version = _is_package_available("ftfy") +_scipy_available, _scipy_version = _is_package_available("scipy") +_librosa_available, _librosa_version = _is_package_available("librosa") +_accelerate_available, _accelerate_version = _is_package_available("accelerate") +_xformers_available, _xformers_version = _is_package_available("xformers") +_gguf_available, _gguf_version = _is_package_available("gguf") +_torchao_available, _torchao_version = _is_package_available("torchao") +_bitsandbytes_available, _bitsandbytes_version = _is_package_available("bitsandbytes") +_optimum_quanto_available, _optimum_quanto_version = _is_package_available("optimum", get_dist_name=True) +_pytorch_retinaface_available, _pytorch_retinaface_version = _is_package_available("pytorch_retinaface") +_better_profanity_available, _better_profanity_version = _is_package_available("better_profanity") +_nltk_available, _nltk_version = _is_package_available("nltk") +_cosmos_guardrail_available, _cosmos_guardrail_version = _is_package_available("cosmos_guardrail") +_sageattention_available, _sageattention_version = _is_package_available("sageattention") +_flash_attn_available, _flash_attn_version = _is_package_available("flash_attn") +_flash_attn_3_available, _flash_attn_3_version = _is_package_available("flash_attn_3") +_aiter_available, _aiter_version = _is_package_available("aiter", get_dist_name=True) +_kornia_available, _kornia_version = _is_package_available("kornia") +_nvidia_modelopt_available, _nvidia_modelopt_version = _is_package_available("modelopt", get_dist_name=True) +_av_available, _av_version = _is_package_available("av") + + +def is_torch_available(): + return _torch_available + + +def is_torch_xla_available(): + return _torch_xla_available + + +def is_torch_npu_available(): + return _torch_npu_available + + +def is_torch_mlu_available(): + return _torch_mlu_available + + +def is_flax_available(): + return _flax_available + + +def is_transformers_available(): + return _transformers_available + + +def is_inflect_available(): + return _inflect_available + + +def is_unidecode_available(): + return _unidecode_available + + +def is_onnx_available(): + return _onnx_available + + +def is_opencv_available(): + return _opencv_available + + +def is_scipy_available(): + return _scipy_available + + +def is_librosa_available(): + return _librosa_available + + +def is_xformers_available(): + return _xformers_available + + +def is_accelerate_available(): + return _accelerate_available + + +def is_kernels_available(): + return _kernels_available + + +def is_note_seq_available(): + return _note_seq_available + + +def is_wandb_available(): + return _wandb_available + + +def is_tensorboard_available(): + return _tensorboard_available + + +def is_compel_available(): + return _compel_available + + +def is_ftfy_available(): + return _ftfy_available + + +def is_bs4_available(): + return _bs4_available + + +def is_torchsde_available(): + return _torchsde_available + + +def is_invisible_watermark_available(): + return _invisible_watermark_available + + +def is_peft_available(): + return _peft_available + + +def is_torchvision_available(): + return _torchvision_available + + +def is_matplotlib_available(): + return _matplotlib_available + + +def is_safetensors_available(): + return _safetensors_available + + +def is_bitsandbytes_available(): + return _bitsandbytes_available + + +def is_google_colab(): + return _is_google_colab + + +def is_sentencepiece_available(): + return _sentencepiece_available + + +def is_imageio_available(): + return _imageio_available + + +def is_gguf_available(): + return _gguf_available + + +def is_torchao_available(): + return _torchao_available + + +def is_optimum_quanto_available(): + return _optimum_quanto_available + + +def is_nvidia_modelopt_available(): + return _nvidia_modelopt_available + + +def is_timm_available(): + return _timm_available + + +def is_pytorch_retinaface_available(): + return _pytorch_retinaface_available + + +def is_better_profanity_available(): + return _better_profanity_available + + +def is_nltk_available(): + return _nltk_available + + +def is_cosmos_guardrail_available(): + return _cosmos_guardrail_available + + +def is_hpu_available(): + return all(importlib.util.find_spec(lib) for lib in ("habana_frameworks", "habana_frameworks.torch")) + + +def is_sageattention_available(): + return _sageattention_available + + +def is_flash_attn_available(): + return _flash_attn_available + + +def is_flash_attn_3_available(): + return _flash_attn_3_available + + +def is_aiter_available(): + return _aiter_available + + +def is_kornia_available(): + return _kornia_available + + +def is_av_available(): + return _av_available + + +# docstyle-ignore +FLAX_IMPORT_ERROR = """ +{0} requires the FLAX library but it was not found in your environment. Checkout the instructions on the +installation page: https://github.com/google/flax and follow the ones that match your environment. +""" + +# docstyle-ignore +INFLECT_IMPORT_ERROR = """ +{0} requires the inflect library but it was not found in your environment. You can install it with pip: `pip install +inflect` +""" + +# docstyle-ignore +PYTORCH_IMPORT_ERROR = """ +{0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the +installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment. +""" + +# docstyle-ignore +ONNX_IMPORT_ERROR = """ +{0} requires the onnxruntime library but it was not found in your environment. You can install it with pip: `pip +install onnxruntime` +""" + +# docstyle-ignore +OPENCV_IMPORT_ERROR = """ +{0} requires the OpenCV library but it was not found in your environment. You can install it with pip: `pip +install opencv-python` +""" + +# docstyle-ignore +SCIPY_IMPORT_ERROR = """ +{0} requires the scipy library but it was not found in your environment. You can install it with pip: `pip install +scipy` +""" + +# docstyle-ignore +LIBROSA_IMPORT_ERROR = """ +{0} requires the librosa library but it was not found in your environment. Checkout the instructions on the +installation page: https://librosa.org/doc/latest/install.html and follow the ones that match your environment. +""" + +# docstyle-ignore +TRANSFORMERS_IMPORT_ERROR = """ +{0} requires the transformers library but it was not found in your environment. You can install it with pip: `pip +install transformers` +""" + +# docstyle-ignore +UNIDECODE_IMPORT_ERROR = """ +{0} requires the unidecode library but it was not found in your environment. You can install it with pip: `pip install +Unidecode` +""" + +# docstyle-ignore +NOTE_SEQ_IMPORT_ERROR = """ +{0} requires the note-seq library but it was not found in your environment. You can install it with pip: `pip +install note-seq` +""" + +# docstyle-ignore +WANDB_IMPORT_ERROR = """ +{0} requires the wandb library but it was not found in your environment. You can install it with pip: `pip +install wandb` +""" + +# docstyle-ignore +TENSORBOARD_IMPORT_ERROR = """ +{0} requires the tensorboard library but it was not found in your environment. You can install it with pip: `pip +install tensorboard` +""" + + +# docstyle-ignore +COMPEL_IMPORT_ERROR = """ +{0} requires the compel library but it was not found in your environment. You can install it with pip: `pip install compel` +""" + +# docstyle-ignore +BS4_IMPORT_ERROR = """ +{0} requires the Beautiful Soup library but it was not found in your environment. You can install it with pip: +`pip install beautifulsoup4`. Please note that you may need to restart your runtime after installation. +""" + +# docstyle-ignore +FTFY_IMPORT_ERROR = """ +{0} requires the ftfy library but it was not found in your environment. Checkout the instructions on the +installation section: https://github.com/rspeer/python-ftfy/tree/master#installing and follow the ones +that match your environment. Please note that you may need to restart your runtime after installation. +""" + +# docstyle-ignore +TORCHSDE_IMPORT_ERROR = """ +{0} requires the torchsde library but it was not found in your environment. You can install it with pip: `pip install torchsde` +""" + +# docstyle-ignore +INVISIBLE_WATERMARK_IMPORT_ERROR = """ +{0} requires the invisible-watermark library but it was not found in your environment. You can install it with pip: `pip install invisible-watermark>=0.2.0` +""" + +# docstyle-ignore +PEFT_IMPORT_ERROR = """ +{0} requires the peft library but it was not found in your environment. You can install it with pip: `pip install peft` +""" + +# docstyle-ignore +SAFETENSORS_IMPORT_ERROR = """ +{0} requires the safetensors library but it was not found in your environment. You can install it with pip: `pip install safetensors` +""" + +# docstyle-ignore +SENTENCEPIECE_IMPORT_ERROR = """ +{0} requires the sentencepiece library but it was not found in your environment. You can install it with pip: `pip install sentencepiece` +""" + + +# docstyle-ignore +BITSANDBYTES_IMPORT_ERROR = """ +{0} requires the bitsandbytes library but it was not found in your environment. You can install it with pip: `pip install bitsandbytes` +""" + +# docstyle-ignore +IMAGEIO_IMPORT_ERROR = """ +{0} requires the imageio library and ffmpeg but it was not found in your environment. You can install it with pip: `pip install imageio imageio-ffmpeg` +""" + +# docstyle-ignore +GGUF_IMPORT_ERROR = """ +{0} requires the gguf library but it was not found in your environment. You can install it with pip: `pip install gguf` +""" + +TORCHAO_IMPORT_ERROR = """ +{0} requires the torchao library but it was not found in your environment. You can install it with pip: `pip install +torchao` +""" + +QUANTO_IMPORT_ERROR = """ +{0} requires the optimum-quanto library but it was not found in your environment. You can install it with pip: `pip +install optimum-quanto` +""" + +# docstyle-ignore +PYTORCH_RETINAFACE_IMPORT_ERROR = """ +{0} requires the pytorch_retinaface library but it was not found in your environment. You can install it with pip: `pip install pytorch_retinaface` +""" + +# docstyle-ignore +BETTER_PROFANITY_IMPORT_ERROR = """ +{0} requires the better_profanity library but it was not found in your environment. You can install it with pip: `pip install better_profanity` +""" + +# docstyle-ignore +NLTK_IMPORT_ERROR = """ +{0} requires the nltk library but it was not found in your environment. You can install it with pip: `pip install nltk` +""" + + +BACKENDS_MAPPING = OrderedDict( + [ + ("bs4", (is_bs4_available, BS4_IMPORT_ERROR)), + ("flax", (is_flax_available, FLAX_IMPORT_ERROR)), + ("inflect", (is_inflect_available, INFLECT_IMPORT_ERROR)), + ("onnx", (is_onnx_available, ONNX_IMPORT_ERROR)), + ("opencv", (is_opencv_available, OPENCV_IMPORT_ERROR)), + ("scipy", (is_scipy_available, SCIPY_IMPORT_ERROR)), + ("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)), + ("transformers", (is_transformers_available, TRANSFORMERS_IMPORT_ERROR)), + ("unidecode", (is_unidecode_available, UNIDECODE_IMPORT_ERROR)), + ("librosa", (is_librosa_available, LIBROSA_IMPORT_ERROR)), + ("note_seq", (is_note_seq_available, NOTE_SEQ_IMPORT_ERROR)), + ("wandb", (is_wandb_available, WANDB_IMPORT_ERROR)), + ("tensorboard", (is_tensorboard_available, TENSORBOARD_IMPORT_ERROR)), + ("compel", (is_compel_available, COMPEL_IMPORT_ERROR)), + ("ftfy", (is_ftfy_available, FTFY_IMPORT_ERROR)), + ("torchsde", (is_torchsde_available, TORCHSDE_IMPORT_ERROR)), + ("invisible_watermark", (is_invisible_watermark_available, INVISIBLE_WATERMARK_IMPORT_ERROR)), + ("peft", (is_peft_available, PEFT_IMPORT_ERROR)), + ("safetensors", (is_safetensors_available, SAFETENSORS_IMPORT_ERROR)), + ("bitsandbytes", (is_bitsandbytes_available, BITSANDBYTES_IMPORT_ERROR)), + ("sentencepiece", (is_sentencepiece_available, SENTENCEPIECE_IMPORT_ERROR)), + ("imageio", (is_imageio_available, IMAGEIO_IMPORT_ERROR)), + ("gguf", (is_gguf_available, GGUF_IMPORT_ERROR)), + ("torchao", (is_torchao_available, TORCHAO_IMPORT_ERROR)), + ("quanto", (is_optimum_quanto_available, QUANTO_IMPORT_ERROR)), + ("pytorch_retinaface", (is_pytorch_retinaface_available, PYTORCH_RETINAFACE_IMPORT_ERROR)), + ("better_profanity", (is_better_profanity_available, BETTER_PROFANITY_IMPORT_ERROR)), + ("nltk", (is_nltk_available, NLTK_IMPORT_ERROR)), + ] +) + + +def requires_backends(obj, backends): + if not isinstance(backends, (list, tuple)): + backends = [backends] + + name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__ + checks = (BACKENDS_MAPPING[backend] for backend in backends) + failed = [msg.format(name) for available, msg in checks if not available()] + if failed: + raise ImportError("".join(failed)) + + if name in [ + "VersatileDiffusionTextToImagePipeline", + "VersatileDiffusionPipeline", + "VersatileDiffusionDualGuidedPipeline", + "StableDiffusionImageVariationPipeline", + "UnCLIPPipeline", + ] and is_transformers_version("<", "4.25.0"): + raise ImportError( + f"You need to install `transformers>=4.25` in order to use {name}: \n```\n pip install" + " --upgrade transformers \n```" + ) + + if name in ["StableDiffusionDepth2ImgPipeline", "StableDiffusionPix2PixZeroPipeline"] and is_transformers_version( + "<", "4.26.0" + ): + raise ImportError( + f"You need to install `transformers>=4.26` in order to use {name}: \n```\n pip install" + " --upgrade transformers \n```" + ) + + +class DummyObject(type): + """ + Metaclass for the dummy objects. Any class inheriting from it will return the ImportError generated by + `requires_backend` each time a user tries to access any method of that class. + """ + + def __getattr__(cls, key): + if key.startswith("_") and key not in ["_load_connected_pipes", "_is_onnx"]: + return super().__getattr__(cls, key) + requires_backends(cls, cls._backends) + + +# This function was copied from: https://github.com/huggingface/accelerate/blob/874c4967d94badd24f893064cc3bef45f57cadf7/src/accelerate/utils/versions.py#L319 +def compare_versions(library_or_version: str | Version, operation: str, requirement_version: str): + """ + Compares a library version to some requirement using a given operation. + + Args: + library_or_version (`str` or `packaging.version.Version`): + A library name or a version to check. + operation (`str`): + A string representation of an operator, such as `">"` or `"<="`. + requirement_version (`str`): + The version to compare the library version against + """ + if operation not in STR_OPERATION_TO_FUNC.keys(): + raise ValueError(f"`operation` must be one of {list(STR_OPERATION_TO_FUNC.keys())}, received {operation}") + operation = STR_OPERATION_TO_FUNC[operation] + if isinstance(library_or_version, str): + library_or_version = parse(importlib_metadata.version(library_or_version)) + return operation(library_or_version, parse(requirement_version)) + + +# This function was copied from: https://github.com/huggingface/accelerate/blob/874c4967d94badd24f893064cc3bef45f57cadf7/src/accelerate/utils/versions.py#L338 +@cache +def is_torch_version(operation: str, version: str): + """ + Compares the current PyTorch version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A string version of PyTorch + """ + return compare_versions(parse(_torch_version), operation, version) + + +@cache +def is_torch_xla_version(operation: str, version: str): + """ + Compares the current torch_xla version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A string version of torch_xla + """ + if not is_torch_xla_available: + return False + return compare_versions(parse(_torch_xla_version), operation, version) + + +@cache +def is_transformers_version(operation: str, version: str): + """ + Compares the current Transformers version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _transformers_available: + return False + return compare_versions(parse(_transformers_version), operation, version) + + +@cache +def is_kernels_version(operation: str, version: str): + """ + Compares the current Kernels version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _kernels_available: + return False + return compare_versions(parse(_kernels_version), operation, version) + + +@cache +def is_hf_hub_version(operation: str, version: str): + """ + Compares the current Hugging Face Hub version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _hf_hub_available: + return False + return compare_versions(parse(_hf_hub_version), operation, version) + + +@cache +def is_accelerate_version(operation: str, version: str): + """ + Compares the current Accelerate version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _accelerate_available: + return False + return compare_versions(parse(_accelerate_version), operation, version) + + +@cache +def is_peft_version(operation: str, version: str): + """ + Compares the current PEFT version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _peft_available: + return False + return compare_versions(parse(_peft_version), operation, version) + + +@cache +def is_bitsandbytes_version(operation: str, version: str): + """ + Args: + Compares the current bitsandbytes version to a given reference with an operation. + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _bitsandbytes_available: + return False + return compare_versions(parse(_bitsandbytes_version), operation, version) + + +@cache +def is_gguf_version(operation: str, version: str): + """ + Compares the current Accelerate version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _gguf_available: + return False + return compare_versions(parse(_gguf_version), operation, version) + + +@cache +def is_torchao_version(operation: str, version: str): + """ + Compares the current torchao version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _torchao_available: + return False + return compare_versions(parse(_torchao_version), operation, version) + + +@cache +def is_optimum_quanto_version(operation: str, version: str): + """ + Compares the current Accelerate version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _optimum_quanto_available: + return False + return compare_versions(parse(_optimum_quanto_version), operation, version) + + +@cache +def is_nvidia_modelopt_version(operation: str, version: str): + """ + Compares the current Nvidia ModelOpt version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _nvidia_modelopt_available: + return False + return compare_versions(parse(_nvidia_modelopt_version), operation, version) + + +@cache +def is_xformers_version(operation: str, version: str): + """ + Compares the current xformers version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _xformers_available: + return False + return compare_versions(parse(_xformers_version), operation, version) + + +@cache +def is_sageattention_version(operation: str, version: str): + """ + Compares the current sageattention version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _sageattention_available: + return False + return compare_versions(parse(_sageattention_version), operation, version) + + +@cache +def is_flash_attn_version(operation: str, version: str): + """ + Compares the current flash-attention version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _flash_attn_available: + return False + return compare_versions(parse(_flash_attn_version), operation, version) + + +@cache +def is_aiter_version(operation: str, version: str): + """ + Compares the current aiter version to a given reference with an operation. + + Args: + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A version string + """ + if not _aiter_available: + return False + return compare_versions(parse(_aiter_version), operation, version) + + +def get_objects_from_module(module): + """ + Returns a dict of object names and values in a module, while skipping private/internal objects + + Args: + module (ModuleType): + Module to extract the objects from. + + Returns: + dict: Dictionary of object names and corresponding values + """ + + objects = {} + for name in dir(module): + if name.startswith("_"): + continue + objects[name] = getattr(module, name) + + return objects + + +class OptionalDependencyNotAvailable(BaseException): + """ + An error indicating that an optional dependency of Diffusers was not found in the environment. + """ + + +class _LazyModule(ModuleType): + """ + Module class that surfaces all objects but only performs associated imports when the objects are requested. + """ + + # Very heavily inspired by optuna.integration._IntegrationModule + # https://github.com/optuna/optuna/blob/master/optuna/integration/__init__.py + def __init__(self, name, module_file, import_structure, module_spec=None, extra_objects=None): + super().__init__(name) + self._modules = set(import_structure.keys()) + self._class_to_module = {} + for key, values in import_structure.items(): + for value in values: + self._class_to_module[value] = key + # Needed for autocompletion in an IDE + self.__all__ = list(import_structure.keys()) + list(chain(*import_structure.values())) + self.__file__ = module_file + self.__spec__ = module_spec + self.__path__ = [os.path.dirname(module_file)] + self._objects = {} if extra_objects is None else extra_objects + self._name = name + self._import_structure = import_structure + + # Needed for autocompletion in an IDE + def __dir__(self): + result = super().__dir__() + # The elements of self.__all__ that are submodules may or may not be in the dir already, depending on whether + # they have been accessed or not. So we only add the elements of self.__all__ that are not already in the dir. + for attr in self.__all__: + if attr not in result: + result.append(attr) + return result + + def __getattr__(self, name: str) -> Any: + if name in self._objects: + return self._objects[name] + if name in self._modules: + value = self._get_module(name) + elif name in self._class_to_module.keys(): + module = self._get_module(self._class_to_module[name]) + value = getattr(module, name) + else: + raise AttributeError(f"module {self.__name__} has no attribute {name}") + + setattr(self, name, value) + return value + + def _get_module(self, module_name: str): + try: + return importlib.import_module("." + module_name, self.__name__) + except Exception as e: + raise RuntimeError( + f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its" + f" traceback):\n{e}" + ) from e + + def __reduce__(self): + return (self.__class__, (self._name, self.__file__, self._import_structure)) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/loading_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/loading_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c4fee0cfdd833a2bd013e982f8039ca54490b1ad --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/loading_utils.py @@ -0,0 +1,163 @@ +import os +import tempfile +from typing import Any, Callable +from urllib.parse import unquote, urlparse + +import PIL.Image +import PIL.ImageOps +import requests + +from .constants import DIFFUSERS_REQUEST_TIMEOUT +from .import_utils import BACKENDS_MAPPING, is_imageio_available + + +def load_image( + image: str | PIL.Image.Image, convert_method: Callable[[PIL.Image.Image], PIL.Image.Image] | None = None +) -> PIL.Image.Image: + """ + Loads `image` to a PIL Image. + + Args: + image (`str` or `PIL.Image.Image`): + The image to convert to the PIL Image format. + convert_method (Callable[[PIL.Image.Image], PIL.Image.Image], *optional*): + A conversion method to apply to the image after loading it. When set to `None` the image will be converted + "RGB". + + Returns: + `PIL.Image.Image`: + A PIL Image. + """ + if isinstance(image, str): + if image.startswith("http://") or image.startswith("https://"): + image = PIL.Image.open(requests.get(image, stream=True, timeout=DIFFUSERS_REQUEST_TIMEOUT).raw) + elif os.path.isfile(image): + image = PIL.Image.open(image) + else: + raise ValueError( + f"Incorrect path or URL. URLs must start with `http://` or `https://`, and {image} is not a valid path." + ) + elif isinstance(image, PIL.Image.Image): + image = image + else: + raise ValueError( + "Incorrect format used for the image. Should be a URL linking to an image, a local path, or a PIL image." + ) + + image = PIL.ImageOps.exif_transpose(image) + + if convert_method is not None: + image = convert_method(image) + else: + image = image.convert("RGB") + + return image + + +def load_video( + video: str, + convert_method: Callable[[list[PIL.Image.Image]], list[PIL.Image.Image]] | None = None, +) -> list[PIL.Image.Image]: + """ + Loads `video` to a list of PIL Image. + + Args: + video (`str`): + A URL or Path to a video to convert to a list of PIL Image format. + convert_method (Callable[[list[PIL.Image.Image]], list[PIL.Image.Image]], *optional*): + A conversion method to apply to the video after loading it. When set to `None` the images will be converted + to "RGB". + + Returns: + `list[PIL.Image.Image]`: + The video as a list of PIL images. + """ + is_url = video.startswith("http://") or video.startswith("https://") + is_file = os.path.isfile(video) + was_tempfile_created = False + + if not (is_url or is_file): + raise ValueError( + f"Incorrect path or URL. URLs must start with `http://` or `https://`, and {video} is not a valid path." + ) + + if is_url: + response = requests.get(video, stream=True) + if response.status_code != 200: + raise ValueError(f"Failed to download video. Status code: {response.status_code}") + + parsed_url = urlparse(video) + file_name = os.path.basename(unquote(parsed_url.path)) + + suffix = os.path.splitext(file_name)[1] or ".mp4" + video_path = tempfile.NamedTemporaryFile(suffix=suffix, delete=False).name + + was_tempfile_created = True + + video_data = response.iter_content(chunk_size=8192) + with open(video_path, "wb") as f: + for chunk in video_data: + f.write(chunk) + + video = video_path + + pil_images = [] + if video.endswith(".gif"): + gif = PIL.Image.open(video) + try: + while True: + pil_images.append(gif.copy()) + gif.seek(gif.tell() + 1) + except EOFError: + pass + + else: + if is_imageio_available(): + import imageio + else: + raise ImportError(BACKENDS_MAPPING["imageio"][1].format("load_video")) + + try: + imageio.plugins.ffmpeg.get_exe() + except AttributeError: + raise AttributeError( + "`Unable to find an ffmpeg installation on your machine. Please install via `pip install imageio-ffmpeg" + ) + + with imageio.get_reader(video) as reader: + # Read all frames + for frame in reader: + pil_images.append(PIL.Image.fromarray(frame)) + + if was_tempfile_created: + os.remove(video_path) + + if convert_method is not None: + pil_images = convert_method(pil_images) + + return pil_images + + +# Taken from `transformers`. +def get_module_from_name(module, tensor_name: str) -> tuple[Any, str]: + if "." in tensor_name: + splits = tensor_name.split(".") + for split in splits[:-1]: + new_module = getattr(module, split) + if new_module is None: + raise ValueError(f"{module} has no attribute {split}.") + module = new_module + tensor_name = splits[-1] + return module, tensor_name + + +def get_submodule_by_name(root_module, module_path: str): + current = root_module + parts = module_path.split(".") + for part in parts: + if part.isdigit(): + idx = int(part) + current = current[idx] # e.g., for nn.ModuleList or nn.Sequential + else: + current = getattr(current, part) + return current diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/logging.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..21c4bba8a830e6f687dd35bdaf61743c151dacff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/logging.py @@ -0,0 +1,361 @@ +# coding=utf-8 +# Copyright 2025 Optuna, Hugging Face +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Logging utilities.""" + +import logging +import os +import sys +import threading +from logging import ( + CRITICAL, # NOQA + DEBUG, # NOQA + ERROR, # NOQA + FATAL, # NOQA + INFO, # NOQA + NOTSET, # NOQA + WARN, # NOQA + WARNING, # NOQA +) + +from tqdm import auto as tqdm_lib + +from .distributed_utils import is_torch_dist_rank_zero + + +_lock = threading.Lock() +_default_handler: logging.Handler | None = None + +log_levels = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warning": logging.WARNING, + "error": logging.ERROR, + "critical": logging.CRITICAL, +} + +_default_log_level = logging.WARNING + +_tqdm_active = True +_rank_zero_filter = None + + +class _RankZeroFilter(logging.Filter): + def filter(self, record): + # Always allow rank-zero logs, but keep debug-level messages from all ranks for troubleshooting. + return is_torch_dist_rank_zero() or record.levelno <= logging.DEBUG + + +def _ensure_rank_zero_filter(logger: logging.Logger) -> None: + global _rank_zero_filter + + if _rank_zero_filter is None: + _rank_zero_filter = _RankZeroFilter() + + if not any(isinstance(f, _RankZeroFilter) for f in logger.filters): + logger.addFilter(_rank_zero_filter) + + +def _get_default_logging_level() -> int: + """ + If DIFFUSERS_VERBOSITY env var is set to one of the valid choices return that as the new default level. If it is + not - fall back to `_default_log_level` + """ + env_level_str = os.getenv("DIFFUSERS_VERBOSITY", None) + if env_level_str: + if env_level_str in log_levels: + return log_levels[env_level_str] + else: + logging.getLogger().warning( + f"Unknown option DIFFUSERS_VERBOSITY={env_level_str}, has to be one of: {', '.join(log_levels.keys())}" + ) + return _default_log_level + + +def _get_library_name() -> str: + return __name__.split(".")[0] + + +def _get_library_root_logger() -> logging.Logger: + return logging.getLogger(_get_library_name()) + + +def _configure_library_root_logger() -> None: + global _default_handler + + with _lock: + if _default_handler: + # This library has already configured the library root logger. + return + _default_handler = logging.StreamHandler() # Set sys.stderr as stream. + + if sys.stderr: # only if sys.stderr exists, e.g. when not using pythonw in windows + _default_handler.flush = sys.stderr.flush + + # Apply our default configuration to the library root logger. + library_root_logger = _get_library_root_logger() + library_root_logger.addHandler(_default_handler) + library_root_logger.setLevel(_get_default_logging_level()) + library_root_logger.propagate = False + _ensure_rank_zero_filter(library_root_logger) + + +def _reset_library_root_logger() -> None: + global _default_handler + + with _lock: + if not _default_handler: + return + + library_root_logger = _get_library_root_logger() + library_root_logger.removeHandler(_default_handler) + library_root_logger.setLevel(logging.NOTSET) + _default_handler = None + + +def get_log_levels_dict() -> dict[str, int]: + return log_levels + + +def get_logger(name: str | None = None) -> logging.Logger: + """ + Return a logger with the specified name. + + This function is not supposed to be directly accessed unless you are writing a custom diffusers module. + """ + + if name is None: + name = _get_library_name() + + _configure_library_root_logger() + logger = logging.getLogger(name) + _ensure_rank_zero_filter(logger) + return logger + + +def get_verbosity() -> int: + """ + Return the current level for the 🤗 Diffusers' root logger as an `int`. + + Returns: + `int`: + Logging level integers which can be one of: + + - `50`: `diffusers.logging.CRITICAL` or `diffusers.logging.FATAL` + - `40`: `diffusers.logging.ERROR` + - `30`: `diffusers.logging.WARNING` or `diffusers.logging.WARN` + - `20`: `diffusers.logging.INFO` + - `10`: `diffusers.logging.DEBUG` + + """ + + _configure_library_root_logger() + return _get_library_root_logger().getEffectiveLevel() + + +def set_verbosity(verbosity: int) -> None: + """ + Set the verbosity level for the 🤗 Diffusers' root logger. + + Args: + verbosity (`int`): + Logging level which can be one of: + + - `diffusers.logging.CRITICAL` or `diffusers.logging.FATAL` + - `diffusers.logging.ERROR` + - `diffusers.logging.WARNING` or `diffusers.logging.WARN` + - `diffusers.logging.INFO` + - `diffusers.logging.DEBUG` + """ + + _configure_library_root_logger() + _get_library_root_logger().setLevel(verbosity) + + +def set_verbosity_info() -> None: + """Set the verbosity to the `INFO` level.""" + return set_verbosity(INFO) + + +def set_verbosity_warning() -> None: + """Set the verbosity to the `WARNING` level.""" + return set_verbosity(WARNING) + + +def set_verbosity_debug() -> None: + """Set the verbosity to the `DEBUG` level.""" + return set_verbosity(DEBUG) + + +def set_verbosity_error() -> None: + """Set the verbosity to the `ERROR` level.""" + return set_verbosity(ERROR) + + +def disable_default_handler() -> None: + """Disable the default handler of the 🤗 Diffusers' root logger.""" + + _configure_library_root_logger() + + assert _default_handler is not None + _get_library_root_logger().removeHandler(_default_handler) + + +def enable_default_handler() -> None: + """Enable the default handler of the 🤗 Diffusers' root logger.""" + + _configure_library_root_logger() + + assert _default_handler is not None + _get_library_root_logger().addHandler(_default_handler) + + +def add_handler(handler: logging.Handler) -> None: + """adds a handler to the HuggingFace Diffusers' root logger.""" + + _configure_library_root_logger() + + assert handler is not None + _get_library_root_logger().addHandler(handler) + + +def remove_handler(handler: logging.Handler) -> None: + """removes given handler from the HuggingFace Diffusers' root logger.""" + + _configure_library_root_logger() + + assert handler is not None and handler in _get_library_root_logger().handlers + _get_library_root_logger().removeHandler(handler) + + +def disable_propagation() -> None: + """ + Disable propagation of the library log outputs. Note that log propagation is disabled by default. + """ + + _configure_library_root_logger() + _get_library_root_logger().propagate = False + + +def enable_propagation() -> None: + """ + Enable propagation of the library log outputs. Please disable the HuggingFace Diffusers' default handler to prevent + double logging if the root logger has been configured. + """ + + _configure_library_root_logger() + _get_library_root_logger().propagate = True + + +def enable_explicit_format() -> None: + """ + Enable explicit formatting for every 🤗 Diffusers' logger. The explicit formatter is as follows: + ``` + [LEVELNAME|FILENAME|LINE NUMBER] TIME >> MESSAGE + ``` + All handlers currently bound to the root logger are affected by this method. + """ + handlers = _get_library_root_logger().handlers + + for handler in handlers: + formatter = logging.Formatter("[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s >> %(message)s") + handler.setFormatter(formatter) + + +def reset_format() -> None: + """ + Resets the formatting for 🤗 Diffusers' loggers. + + All handlers currently bound to the root logger are affected by this method. + """ + handlers = _get_library_root_logger().handlers + + for handler in handlers: + handler.setFormatter(None) + + +def warning_advice(self, *args, **kwargs) -> None: + """ + This method is identical to `logger.warning()`, but if env var DIFFUSERS_NO_ADVISORY_WARNINGS=1 is set, this + warning will not be printed + """ + no_advisory_warnings = os.getenv("DIFFUSERS_NO_ADVISORY_WARNINGS", False) + if no_advisory_warnings: + return + self.warning(*args, **kwargs) + + +logging.Logger.warning_advice = warning_advice + + +class EmptyTqdm: + """Dummy tqdm which doesn't do anything.""" + + def __init__(self, *args, **kwargs): # pylint: disable=unused-argument + self._iterator = args[0] if args else None + + def __iter__(self): + return iter(self._iterator) + + def __getattr__(self, _): + """Return empty function.""" + + def empty_fn(*args, **kwargs): # pylint: disable=unused-argument + return + + return empty_fn + + def __enter__(self): + return self + + def __exit__(self, type_, value, traceback): + return + + +class _tqdm_cls: + def __call__(self, *args, **kwargs): + if _tqdm_active: + return tqdm_lib.tqdm(*args, **kwargs) + else: + return EmptyTqdm(*args, **kwargs) + + def set_lock(self, *args, **kwargs): + self._lock = None + if _tqdm_active: + return tqdm_lib.tqdm.set_lock(*args, **kwargs) + + def get_lock(self): + if _tqdm_active: + return tqdm_lib.tqdm.get_lock() + + +tqdm = _tqdm_cls() + + +def is_progress_bar_enabled() -> bool: + """Return a boolean indicating whether tqdm progress bars are enabled.""" + global _tqdm_active + return bool(_tqdm_active) + + +def enable_progress_bar() -> None: + """Enable tqdm progress bar.""" + global _tqdm_active + _tqdm_active = True + + +def disable_progress_bar() -> None: + """Disable tqdm progress bar.""" + global _tqdm_active + _tqdm_active = False diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/model_card_template.md b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/model_card_template.md new file mode 100644 index 0000000000000000000000000000000000000000..f41b71e24e2081425d3049ae51b50036d5d28b6a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/model_card_template.md @@ -0,0 +1,24 @@ +--- +{{ card_data }} +--- + + + +{{ model_description }} + +## Intended uses & limitations + +#### How to use + +```python +# TODO: add an example code snippet for running this diffusion pipeline +``` + +#### Limitations and bias + +[TODO: provide examples of latent issues and potential remediations] + +## Training details + +[TODO: describe the data used to train the model] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/outputs.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/outputs.py new file mode 100644 index 0000000000000000000000000000000000000000..b7d9a29ccfce16113989ac09566831192e797b49 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/outputs.py @@ -0,0 +1,134 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Generic utilities +""" + +from collections import OrderedDict +from dataclasses import fields, is_dataclass +from typing import Any + +import numpy as np + +from .import_utils import is_torch_available, is_torch_version + + +def is_tensor(x) -> bool: + """ + Tests if `x` is a `torch.Tensor` or `np.ndarray`. + """ + if is_torch_available(): + import torch + + if isinstance(x, torch.Tensor): + return True + + return isinstance(x, np.ndarray) + + +class BaseOutput(OrderedDict): + """ + Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a + tuple) or strings (like a dictionary) that will ignore the `None` attributes. Otherwise behaves like a regular + Python dictionary. + + > [!WARNING] > You can't unpack a [`BaseOutput`] directly. Use the [`~utils.BaseOutput.to_tuple`] method to convert + it to a tuple > first. + """ + + def __init_subclass__(cls) -> None: + """Register subclasses as pytree nodes. + + This is necessary to synchronize gradients when using `torch.nn.parallel.DistributedDataParallel` with + `static_graph=True` with modules that output `ModelOutput` subclasses. + """ + if is_torch_available(): + import torch.utils._pytree + + if is_torch_version("<", "2.2"): + torch.utils._pytree._register_pytree_node( + cls, + torch.utils._pytree._dict_flatten, + lambda values, context: cls(**torch.utils._pytree._dict_unflatten(values, context)), + ) + else: + torch.utils._pytree.register_pytree_node( + cls, + torch.utils._pytree._dict_flatten, + lambda values, context: cls(**torch.utils._pytree._dict_unflatten(values, context)), + serialized_type_name=f"{cls.__module__}.{cls.__name__}", + ) + + def __post_init__(self) -> None: + class_fields = fields(self) + + # Safety and consistency checks + if not len(class_fields): + raise ValueError(f"{self.__class__.__name__} has no fields.") + + first_field = getattr(self, class_fields[0].name) + other_fields_are_none = all(getattr(self, field.name) is None for field in class_fields[1:]) + + if other_fields_are_none and isinstance(first_field, dict): + for key, value in first_field.items(): + self[key] = value + else: + for field in class_fields: + v = getattr(self, field.name) + if v is not None: + self[field.name] = v + + def __delitem__(self, *args, **kwargs): + raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.") + + def setdefault(self, *args, **kwargs): + raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.") + + def pop(self, *args, **kwargs): + raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.") + + def update(self, *args, **kwargs): + raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.") + + def __getitem__(self, k: Any) -> Any: + if isinstance(k, str): + inner_dict = dict(self.items()) + return inner_dict[k] + else: + return self.to_tuple()[k] + + def __setattr__(self, name: Any, value: Any) -> None: + if name in self.keys() and value is not None: + # Don't call self.__setitem__ to avoid recursion errors + super().__setitem__(name, value) + super().__setattr__(name, value) + + def __setitem__(self, key, value): + # Will raise a KeyException if needed + super().__setitem__(key, value) + # Don't call self.__setattr__ to avoid recursion errors + super().__setattr__(key, value) + + def __reduce__(self): + if not is_dataclass(self): + return super().__reduce__() + callable, _args, *remaining = super().__reduce__() + args = tuple(getattr(self, field.name) for field in fields(self)) + return callable, args, *remaining + + def to_tuple(self) -> tuple[Any, ...]: + """ + Convert self to a tuple containing all the attributes/keys that are not `None`. + """ + return tuple(self[k] for k in self.keys()) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/peft_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/peft_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..65bcfe631e979a93ad5c5de45fdbef1ec88f0a40 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/peft_utils.py @@ -0,0 +1,425 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +PEFT utilities: Utilities related to peft library +""" + +import collections +import functools +import importlib + +from packaging import version + +from . import logging +from .import_utils import is_peft_available, is_peft_version, is_torch_available +from .torch_utils import empty_device_cache + + +logger = logging.get_logger(__name__) + +if is_torch_available(): + import torch + + +def recurse_remove_peft_layers(model): + r""" + Recursively replace all instances of `LoraLayer` with corresponding new layers in `model`. + """ + from peft.tuners.tuners_utils import BaseTunerLayer + + has_base_layer_pattern = False + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + has_base_layer_pattern = hasattr(module, "base_layer") + break + + if has_base_layer_pattern: + from peft.utils import _get_submodules + + key_list = [key for key, _ in model.named_modules() if "lora" not in key] + for key in key_list: + try: + parent, target, target_name = _get_submodules(model, key) + except AttributeError: + continue + if hasattr(target, "base_layer"): + setattr(parent, target_name, target.get_base_layer()) + else: + # This is for backwards compatibility with PEFT <= 0.6.2. + # TODO can be removed once that PEFT version is no longer supported. + from peft.tuners.lora import LoraLayer + + for name, module in model.named_children(): + if len(list(module.children())) > 0: + ## compound module, go inside it + recurse_remove_peft_layers(module) + + module_replaced = False + + if isinstance(module, LoraLayer) and isinstance(module, torch.nn.Linear): + new_module = torch.nn.Linear( + module.in_features, + module.out_features, + bias=module.bias is not None, + ).to(module.weight.device) + new_module.weight = module.weight + if module.bias is not None: + new_module.bias = module.bias + + module_replaced = True + elif isinstance(module, LoraLayer) and isinstance(module, torch.nn.Conv2d): + new_module = torch.nn.Conv2d( + module.in_channels, + module.out_channels, + module.kernel_size, + module.stride, + module.padding, + module.dilation, + module.groups, + ).to(module.weight.device) + + new_module.weight = module.weight + if module.bias is not None: + new_module.bias = module.bias + + module_replaced = True + + if module_replaced: + setattr(model, name, new_module) + del module + + empty_device_cache() + return model + + +def scale_lora_layers(model, weight): + """ + Adjust the weightage given to the LoRA layers of the model. + + Args: + model (`torch.nn.Module`): + The model to scale. + weight (`float`): + The weight to be given to the LoRA layers. + """ + from peft.tuners.tuners_utils import BaseTunerLayer + + if weight == 1.0: + return + + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + module.scale_layer(weight) + + +def unscale_lora_layers(model, weight: float | None = None): + """ + Removes the previously passed weight given to the LoRA layers of the model. + + Args: + model (`torch.nn.Module`): + The model to scale. + weight (`float`, *optional*): + The weight to be given to the LoRA layers. If no scale is passed the scale of the lora layer will be + re-initialized to the correct value. If 0.0 is passed, we will re-initialize the scale with the correct + value. + """ + from peft.tuners.tuners_utils import BaseTunerLayer + + if weight is None or weight == 1.0: + return + + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + if weight != 0: + module.unscale_layer(weight) + else: + for adapter_name in module.active_adapters: + # if weight == 0 unscale should re-set the scale to the original value. + module.set_scale(adapter_name, 1.0) + + +def get_peft_kwargs( + rank_dict, network_alpha_dict, peft_state_dict, is_unet=True, model_state_dict=None, adapter_name=None +): + rank_pattern = {} + alpha_pattern = {} + r = lora_alpha = list(rank_dict.values())[0] + + if len(set(rank_dict.values())) > 1: + # get the rank occurring the most number of times + r = collections.Counter(rank_dict.values()).most_common()[0][0] + + # for modules with rank different from the most occurring rank, add it to the `rank_pattern` + rank_pattern = dict(filter(lambda x: x[1] != r, rank_dict.items())) + rank_pattern = {k.split(".lora_B.")[0]: v for k, v in rank_pattern.items()} + + if network_alpha_dict is not None and len(network_alpha_dict) > 0: + if len(set(network_alpha_dict.values())) > 1: + # get the alpha occurring the most number of times + lora_alpha = collections.Counter(network_alpha_dict.values()).most_common()[0][0] + + # for modules with alpha different from the most occurring alpha, add it to the `alpha_pattern` + alpha_pattern = dict(filter(lambda x: x[1] != lora_alpha, network_alpha_dict.items())) + if is_unet: + alpha_pattern = { + ".".join(k.split(".lora_A.")[0].split(".")).replace(".alpha", ""): v + for k, v in alpha_pattern.items() + } + else: + alpha_pattern = {".".join(k.split(".down.")[0].split(".")[:-1]): v for k, v in alpha_pattern.items()} + else: + lora_alpha = set(network_alpha_dict.values()).pop() + + target_modules = list({name.split(".lora")[0] for name in peft_state_dict.keys()}) + use_dora = any("lora_magnitude_vector" in k for k in peft_state_dict) + # for now we know that the "bias" keys are only associated with `lora_B`. + lora_bias = any("lora_B" in k and k.endswith(".bias") for k in peft_state_dict) + + lora_config_kwargs = { + "r": r, + "lora_alpha": lora_alpha, + "rank_pattern": rank_pattern, + "alpha_pattern": alpha_pattern, + "target_modules": target_modules, + "use_dora": use_dora, + "lora_bias": lora_bias, + } + + return lora_config_kwargs + + +def get_adapter_name(model): + from peft.tuners.tuners_utils import BaseTunerLayer + + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + return f"default_{len(module.r)}" + return "default_0" + + +def set_adapter_layers(model, enabled=True): + from peft.tuners.tuners_utils import BaseTunerLayer + + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + # The recent version of PEFT needs to call `enable_adapters` instead + if hasattr(module, "enable_adapters"): + module.enable_adapters(enabled=enabled) + else: + module.disable_adapters = not enabled + + +def delete_adapter_layers(model, adapter_name): + from peft.tuners.tuners_utils import BaseTunerLayer + + for module in model.modules(): + if isinstance(module, BaseTunerLayer): + if hasattr(module, "delete_adapter"): + module.delete_adapter(adapter_name) + else: + raise ValueError( + "The version of PEFT you are using is not compatible, please use a version that is greater than 0.6.1" + ) + + # For transformers integration - we need to pop the adapter from the config + if getattr(model, "_hf_peft_config_loaded", False) and hasattr(model, "peft_config"): + model.peft_config.pop(adapter_name, None) + # In case all adapters are deleted, we need to delete the config + # and make sure to set the flag to False + if len(model.peft_config) == 0: + del model.peft_config + model._hf_peft_config_loaded = None + + +def set_weights_and_activate_adapters(model, adapter_names, weights): + from peft.tuners.tuners_utils import BaseTunerLayer + + def get_module_weight(weight_for_adapter, module_name): + if not isinstance(weight_for_adapter, dict): + # If weight_for_adapter is a single number, always return it. + return weight_for_adapter + + for layer_name, weight_ in weight_for_adapter.items(): + if layer_name in module_name: + return weight_ + + parts = module_name.split(".") + # e.g. key = "down_blocks.1.attentions.0" + key = f"{parts[0]}.{parts[1]}.attentions.{parts[3]}" + block_weight = weight_for_adapter.get(key, 1.0) + + return block_weight + + for module_name, module in model.named_modules(): + if isinstance(module, BaseTunerLayer): + # For backward compatibility with previous PEFT versions, set multiple active adapters + if hasattr(module, "set_adapter"): + module.set_adapter(adapter_names) + else: + module.active_adapter = adapter_names + + # Set the scaling weight for each adapter for this module + for adapter_name, weight in zip(adapter_names, weights): + module.set_scale(adapter_name, get_module_weight(weight, module_name)) + + +def apply_lora_scale(kwargs_name: str = "joint_attention_kwargs"): + """ + Decorator to automatically handle LoRA layer scaling/unscaling in forward methods. + + This decorator extracts the `lora_scale` from the specified kwargs parameter, applies scaling before the forward + pass, and ensures unscaling happens after, even if an exception occurs. + + Args: + kwargs_name (`str`, defaults to `"joint_attention_kwargs"`): + The name of the keyword argument that contains the LoRA scale. Common values include + "joint_attention_kwargs", "attention_kwargs", "cross_attention_kwargs", etc. + """ + + def decorator(forward_fn): + @functools.wraps(forward_fn) + def wrapper(self, *args, **kwargs): + from . import USE_PEFT_BACKEND + + lora_scale = 1.0 + attention_kwargs = kwargs.get(kwargs_name) + + if attention_kwargs is not None: + attention_kwargs = attention_kwargs.copy() + kwargs[kwargs_name] = attention_kwargs + lora_scale = attention_kwargs.pop("scale", 1.0) + + if not USE_PEFT_BACKEND and lora_scale != 1.0: + logger.warning( + f"Passing `scale` via `{kwargs_name}` when not using the PEFT backend is ineffective." + ) + + # Apply LoRA scaling if using PEFT backend + if USE_PEFT_BACKEND: + scale_lora_layers(self, lora_scale) + + try: + # Execute the forward pass + result = forward_fn(self, *args, **kwargs) + return result + finally: + # Always unscale, even if forward pass raises an exception + if USE_PEFT_BACKEND: + unscale_lora_layers(self, lora_scale) + + return wrapper + + return decorator + + +def check_peft_version(min_version: str) -> None: + r""" + Checks if the version of PEFT is compatible. + + Args: + version (`str`): + The version of PEFT to check against. + """ + if not is_peft_available(): + raise ValueError("PEFT is not installed. Please install it with `pip install peft`") + + is_peft_version_compatible = version.parse(importlib.metadata.version("peft")) > version.parse(min_version) + + if not is_peft_version_compatible: + raise ValueError( + f"The version of PEFT you are using is not compatible, please use a version that is greater" + f" than {min_version}" + ) + + +def _create_lora_config( + state_dict, network_alphas, metadata, rank_pattern_dict, is_unet=True, model_state_dict=None, adapter_name=None +): + from peft import LoraConfig + + if metadata is not None: + lora_config_kwargs = metadata + else: + lora_config_kwargs = get_peft_kwargs( + rank_pattern_dict, + network_alpha_dict=network_alphas, + peft_state_dict=state_dict, + is_unet=is_unet, + model_state_dict=model_state_dict, + adapter_name=adapter_name, + ) + + _maybe_raise_error_for_ambiguous_keys(lora_config_kwargs) + + # Version checks for DoRA and lora_bias + if "use_dora" in lora_config_kwargs and lora_config_kwargs["use_dora"]: + if is_peft_version("<", "0.9.0"): + raise ValueError("DoRA requires PEFT >= 0.9.0. Please upgrade.") + + if "lora_bias" in lora_config_kwargs and lora_config_kwargs["lora_bias"]: + if is_peft_version("<=", "0.13.2"): + raise ValueError("lora_bias requires PEFT >= 0.14.0. Please upgrade.") + + try: + return LoraConfig(**lora_config_kwargs) + except TypeError as e: + raise TypeError("`LoraConfig` class could not be instantiated.") from e + + +def _maybe_raise_error_for_ambiguous_keys(config): + rank_pattern = config["rank_pattern"].copy() + target_modules = config["target_modules"] + + for key in list(rank_pattern.keys()): + # try to detect ambiguity + # `target_modules` can also be a str, in which case this loop would loop + # over the chars of the str. The technically correct way to match LoRA keys + # in PEFT is to use LoraModel._check_target_module_exists (lora_config, key). + # But this cuts it for now. + exact_matches = [mod for mod in target_modules if mod == key] + substring_matches = [mod for mod in target_modules if key in mod and mod != key] + + if exact_matches and substring_matches: + if is_peft_version("<", "0.14.1"): + raise ValueError( + "There are ambiguous keys present in this LoRA. To load it, please update your `peft` installation - `pip install -U peft`." + ) + + +def _maybe_warn_for_unhandled_keys(incompatible_keys, adapter_name): + warn_msg = "" + if incompatible_keys is not None: + # Check only for unexpected keys. + unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None) + if unexpected_keys: + lora_unexpected_keys = [k for k in unexpected_keys if "lora_" in k and adapter_name in k] + if lora_unexpected_keys: + warn_msg = ( + f"Loading adapter weights from state_dict led to unexpected keys found in the model:" + f" {', '.join(lora_unexpected_keys)}. " + ) + + # Filter missing keys specific to the current adapter. + missing_keys = getattr(incompatible_keys, "missing_keys", None) + if missing_keys: + lora_missing_keys = [k for k in missing_keys if "lora_" in k and adapter_name in k] + if lora_missing_keys: + warn_msg += ( + f"Loading adapter weights from state_dict led to missing keys in the model:" + f" {', '.join(lora_missing_keys)}." + ) + + if warn_msg: + logger.warning(warn_msg) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/pil_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/pil_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..72d4704fa945f0cc4edfa8e6a15955cd75fd18f1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/pil_utils.py @@ -0,0 +1,65 @@ +import PIL.Image +import PIL.ImageOps +from packaging import version +from PIL import Image + + +if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"): + PIL_INTERPOLATION = { + "linear": PIL.Image.Resampling.BILINEAR, + "bilinear": PIL.Image.Resampling.BILINEAR, + "bicubic": PIL.Image.Resampling.BICUBIC, + "lanczos": PIL.Image.Resampling.LANCZOS, + "nearest": PIL.Image.Resampling.NEAREST, + } +else: + PIL_INTERPOLATION = { + "linear": PIL.Image.LINEAR, + "bilinear": PIL.Image.BILINEAR, + "bicubic": PIL.Image.BICUBIC, + "lanczos": PIL.Image.LANCZOS, + "nearest": PIL.Image.NEAREST, + } + + +def pt_to_pil(images): + """ + Convert a torch image to a PIL image. + """ + images = (images / 2 + 0.5).clamp(0, 1) + images = images.cpu().permute(0, 2, 3, 1).float().numpy() + images = numpy_to_pil(images) + return images + + +def numpy_to_pil(images): + """ + Convert a numpy image or a batch of images to a PIL image. + """ + if images.ndim == 3: + images = images[None, ...] + images = (images * 255).round().astype("uint8") + if images.shape[-1] == 1: + # special case for grayscale (single channel) images + pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images] + else: + pil_images = [Image.fromarray(image) for image in images] + + return pil_images + + +def make_image_grid(images: list[PIL.Image.Image], rows: int, cols: int, resize: int = None) -> PIL.Image.Image: + """ + Prepares a single grid of images. Useful for visualization purposes. + """ + assert len(images) == rows * cols + + if resize is not None: + images = [img.resize((resize, resize)) for img in images] + + w, h = images[0].size + grid = Image.new("RGB", size=(cols * w, rows * h)) + + for i, img in enumerate(images): + grid.paste(img, box=(i % cols * w, i // cols * h)) + return grid diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/remote_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/remote_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2580bbf5dda9c8c2fb66d75f2e54c2a50a73100d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/remote_utils.py @@ -0,0 +1,427 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import io +import json +from typing import Literal, cast + +import requests + +from .deprecation_utils import deprecate +from .import_utils import is_safetensors_available, is_torch_available + + +if is_torch_available(): + import torch + + from ..image_processor import VaeImageProcessor + from ..video_processor import VideoProcessor + + if is_safetensors_available(): + import safetensors.torch + + DTYPE_MAP = { + "float16": torch.float16, + "float32": torch.float32, + "bfloat16": torch.bfloat16, + "uint8": torch.uint8, + } + + +from PIL import Image + + +def detect_image_type(data: bytes) -> str: + if data.startswith(b"\xff\xd8"): + return "jpeg" + elif data.startswith(b"\x89PNG\r\n\x1a\n"): + return "png" + elif data.startswith(b"GIF87a") or data.startswith(b"GIF89a"): + return "gif" + elif data.startswith(b"BM"): + return "bmp" + return "unknown" + + +def check_inputs_decode( + endpoint: str, + tensor: "torch.Tensor", + processor: "VaeImageProcessor" | "VideoProcessor" | None = None, + do_scaling: bool = True, + scaling_factor: float | None = None, + shift_factor: float | None = None, + output_type: Literal["mp4", "pil", "pt"] = "pil", + return_type: Literal["mp4", "pil", "pt"] = "pil", + image_format: Literal["png", "jpg"] = "jpg", + partial_postprocess: bool = False, + input_tensor_type: Literal["binary"] = "binary", + output_tensor_type: Literal["binary"] = "binary", + height: int | None = None, + width: int | None = None, +): + if tensor.ndim == 3 and height is None and width is None: + raise ValueError("`height` and `width` required for packed latents.") + if ( + output_type == "pt" + and return_type == "pil" + and not partial_postprocess + and not isinstance(processor, (VaeImageProcessor, VideoProcessor)) + ): + raise ValueError("`processor` is required.") + if do_scaling and scaling_factor is None: + deprecate( + "do_scaling", + "1.0.0", + "`do_scaling` is deprecated, pass `scaling_factor` and `shift_factor` if required.", + standard_warn=False, + ) + + +def postprocess_decode( + response: requests.Response, + processor: "VaeImageProcessor" | "VideoProcessor" | None = None, + output_type: Literal["mp4", "pil", "pt"] = "pil", + return_type: Literal["mp4", "pil", "pt"] = "pil", + partial_postprocess: bool = False, +): + if output_type == "pt" or (output_type == "pil" and processor is not None): + output_tensor = response.content + parameters = response.headers + shape = json.loads(parameters["shape"]) + dtype = parameters["dtype"] + torch_dtype = DTYPE_MAP[dtype] + output_tensor = torch.frombuffer(bytearray(output_tensor), dtype=torch_dtype).reshape(shape) + if output_type == "pt": + if partial_postprocess: + if return_type == "pil": + output = [Image.fromarray(image.numpy()) for image in output_tensor] + if len(output) == 1: + output = output[0] + elif return_type == "pt": + output = output_tensor + else: + if processor is None or return_type == "pt": + output = output_tensor + else: + if isinstance(processor, VideoProcessor): + output = cast( + list[Image.Image], + processor.postprocess_video(output_tensor, output_type="pil")[0], + ) + else: + output = cast( + Image.Image, + processor.postprocess(output_tensor, output_type="pil")[0], + ) + elif output_type == "pil" and return_type == "pil" and processor is None: + output = Image.open(io.BytesIO(response.content)).convert("RGB") + detected_format = detect_image_type(response.content) + output.format = detected_format + elif output_type == "pil" and processor is not None: + if return_type == "pil": + output = [ + Image.fromarray(image) + for image in (output_tensor.permute(0, 2, 3, 1).float().numpy() * 255).round().astype("uint8") + ] + elif return_type == "pt": + output = output_tensor + elif output_type == "mp4" and return_type == "mp4": + output = response.content + return output + + +def prepare_decode( + tensor: "torch.Tensor", + processor: "VaeImageProcessor" | "VideoProcessor" | None = None, + do_scaling: bool = True, + scaling_factor: float | None = None, + shift_factor: float | None = None, + output_type: Literal["mp4", "pil", "pt"] = "pil", + image_format: Literal["png", "jpg"] = "jpg", + partial_postprocess: bool = False, + height: int | None = None, + width: int | None = None, +): + headers = {} + parameters = { + "image_format": image_format, + "output_type": output_type, + "partial_postprocess": partial_postprocess, + "shape": list(tensor.shape), + "dtype": str(tensor.dtype).split(".")[-1], + } + if do_scaling and scaling_factor is not None: + parameters["scaling_factor"] = scaling_factor + if do_scaling and shift_factor is not None: + parameters["shift_factor"] = shift_factor + if do_scaling and scaling_factor is None: + parameters["do_scaling"] = do_scaling + elif do_scaling and scaling_factor is None and shift_factor is None: + parameters["do_scaling"] = do_scaling + if height is not None and width is not None: + parameters["height"] = height + parameters["width"] = width + headers["Content-Type"] = "tensor/binary" + headers["Accept"] = "tensor/binary" + if output_type == "pil" and image_format == "jpg" and processor is None: + headers["Accept"] = "image/jpeg" + elif output_type == "pil" and image_format == "png" and processor is None: + headers["Accept"] = "image/png" + elif output_type == "mp4": + headers["Accept"] = "text/plain" + tensor_data = safetensors.torch._tobytes(tensor, "tensor") + return {"data": tensor_data, "params": parameters, "headers": headers} + + +def remote_decode( + endpoint: str, + tensor: "torch.Tensor", + processor: "VaeImageProcessor" | "VideoProcessor" | None = None, + do_scaling: bool = True, + scaling_factor: float | None = None, + shift_factor: float | None = None, + output_type: Literal["mp4", "pil", "pt"] = "pil", + return_type: Literal["mp4", "pil", "pt"] = "pil", + image_format: Literal["png", "jpg"] = "jpg", + partial_postprocess: bool = False, + input_tensor_type: Literal["binary"] = "binary", + output_tensor_type: Literal["binary"] = "binary", + height: int | None = None, + width: int | None = None, +) -> Image.Image | list[Image.Image] | bytes | "torch.Tensor": + """ + Hugging Face Hybrid Inference that allow running VAE decode remotely. + + Args: + endpoint (`str`): + Endpoint for Remote Decode. + tensor (`torch.Tensor`): + Tensor to be decoded. + processor (`VaeImageProcessor` or `VideoProcessor`, *optional*): + Used with `return_type="pt"`, and `return_type="pil"` for Video models. + do_scaling (`bool`, default `True`, *optional*): + **DEPRECATED**. **pass `scaling_factor`/`shift_factor` instead.** **still set + do_scaling=None/do_scaling=False for no scaling until option is removed** When `True` scaling e.g. `latents + / self.vae.config.scaling_factor` is applied remotely. If `False`, input must be passed with scaling + applied. + scaling_factor (`float`, *optional*): + Scaling is applied when passed e.g. [`latents / + self.vae.config.scaling_factor`](https://github.com/huggingface/diffusers/blob/7007febae5cff000d4df9059d9cf35133e8b2ca9/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L1083C37-L1083C77). + - SD v1: 0.18215 + - SD XL: 0.13025 + - Flux: 0.3611 + If `None`, input must be passed with scaling applied. + shift_factor (`float`, *optional*): + Shift is applied when passed e.g. `latents + self.vae.config.shift_factor`. + - Flux: 0.1159 + If `None`, input must be passed with scaling applied. + output_type (`"mp4"` or `"pil"` or `"pt", default `"pil"): + **Endpoint** output type. Subject to change. Report feedback on preferred type. + + `"mp4": Supported by video models. Endpoint returns `bytes` of video. `"pil"`: Supported by image and video + models. + Image models: Endpoint returns `bytes` of an image in `image_format`. Video models: Endpoint returns + `torch.Tensor` with partial `postprocessing` applied. + Requires `processor` as a flag (any `None` value will work). + `"pt"`: Support by image and video models. Endpoint returns `torch.Tensor`. + With `partial_postprocess=True` the tensor is postprocessed `uint8` image tensor. + + Recommendations: + `"pt"` with `partial_postprocess=True` is the smallest transfer for full quality. `"pt"` with + `partial_postprocess=False` is the most compatible with third party code. `"pil"` with + `image_format="jpg"` is the smallest transfer overall. + + return_type (`"mp4"` or `"pil"` or `"pt", default `"pil"): + **Function** return type. + + `"mp4": Function returns `bytes` of video. `"pil"`: Function returns `PIL.Image.Image`. + With `output_type="pil" no further processing is applied. With `output_type="pt" a `PIL.Image.Image` is + created. + `partial_postprocess=False` `processor` is required. `partial_postprocess=True` `processor` is + **not** required. + `"pt"`: Function returns `torch.Tensor`. + `processor` is **not** required. `partial_postprocess=False` tensor is `float16` or `bfloat16`, without + denormalization. `partial_postprocess=True` tensor is `uint8`, denormalized. + + image_format (`"png"` or `"jpg"`, default `jpg`): + Used with `output_type="pil"`. Endpoint returns `jpg` or `png`. + + partial_postprocess (`bool`, default `False`): + Used with `output_type="pt"`. `partial_postprocess=False` tensor is `float16` or `bfloat16`, without + denormalization. `partial_postprocess=True` tensor is `uint8`, denormalized. + + input_tensor_type (`"binary"`, default `"binary"`): + Tensor transfer type. + + output_tensor_type (`"binary"`, default `"binary"`): + Tensor transfer type. + + height (`int`, **optional**): + Required for `"packed"` latents. + + width (`int`, **optional**): + Required for `"packed"` latents. + + Returns: + output (`Image.Image` or `list[Image.Image]` or `bytes` or `torch.Tensor`). + """ + if input_tensor_type == "base64": + deprecate( + "input_tensor_type='base64'", + "1.0.0", + "input_tensor_type='base64' is deprecated. Using `binary`.", + standard_warn=False, + ) + input_tensor_type = "binary" + if output_tensor_type == "base64": + deprecate( + "output_tensor_type='base64'", + "1.0.0", + "output_tensor_type='base64' is deprecated. Using `binary`.", + standard_warn=False, + ) + output_tensor_type = "binary" + check_inputs_decode( + endpoint, + tensor, + processor, + do_scaling, + scaling_factor, + shift_factor, + output_type, + return_type, + image_format, + partial_postprocess, + input_tensor_type, + output_tensor_type, + height, + width, + ) + kwargs = prepare_decode( + tensor=tensor, + processor=processor, + do_scaling=do_scaling, + scaling_factor=scaling_factor, + shift_factor=shift_factor, + output_type=output_type, + image_format=image_format, + partial_postprocess=partial_postprocess, + height=height, + width=width, + ) + response = requests.post(endpoint, **kwargs) + if not response.ok: + raise RuntimeError(response.json()) + output = postprocess_decode( + response=response, + processor=processor, + output_type=output_type, + return_type=return_type, + partial_postprocess=partial_postprocess, + ) + return output + + +def check_inputs_encode( + endpoint: str, + image: "torch.Tensor" | Image.Image, + scaling_factor: float | None = None, + shift_factor: float | None = None, +): + pass + + +def postprocess_encode( + response: requests.Response, +): + output_tensor = response.content + parameters = response.headers + shape = json.loads(parameters["shape"]) + dtype = parameters["dtype"] + torch_dtype = DTYPE_MAP[dtype] + output_tensor = torch.frombuffer(bytearray(output_tensor), dtype=torch_dtype).reshape(shape) + return output_tensor + + +def prepare_encode( + image: "torch.Tensor" | Image.Image, + scaling_factor: float | None = None, + shift_factor: float | None = None, +): + headers = {} + parameters = {} + if scaling_factor is not None: + parameters["scaling_factor"] = scaling_factor + if shift_factor is not None: + parameters["shift_factor"] = shift_factor + if isinstance(image, torch.Tensor): + data = safetensors.torch._tobytes(image.contiguous(), "tensor") + parameters["shape"] = list(image.shape) + parameters["dtype"] = str(image.dtype).split(".")[-1] + else: + buffer = io.BytesIO() + image.save(buffer, format="PNG") + data = buffer.getvalue() + return {"data": data, "params": parameters, "headers": headers} + + +def remote_encode( + endpoint: str, + image: "torch.Tensor" | Image.Image, + scaling_factor: float | None = None, + shift_factor: float | None = None, +) -> "torch.Tensor": + """ + Hugging Face Hybrid Inference that allow running VAE encode remotely. + + Args: + endpoint (`str`): + Endpoint for Remote Decode. + image (`torch.Tensor` or `PIL.Image.Image`): + Image to be encoded. + scaling_factor (`float`, *optional*): + Scaling is applied when passed e.g. [`latents * self.vae.config.scaling_factor`]. + - SD v1: 0.18215 + - SD XL: 0.13025 + - Flux: 0.3611 + If `None`, input must be passed with scaling applied. + shift_factor (`float`, *optional*): + Shift is applied when passed e.g. `latents - self.vae.config.shift_factor`. + - Flux: 0.1159 + If `None`, input must be passed with scaling applied. + + Returns: + output (`torch.Tensor`). + """ + check_inputs_encode( + endpoint, + image, + scaling_factor, + shift_factor, + ) + kwargs = prepare_encode( + image=image, + scaling_factor=scaling_factor, + shift_factor=shift_factor, + ) + response = requests.post(endpoint, **kwargs) + if not response.ok: + raise RuntimeError(response.json()) + output = postprocess_encode( + response=response, + ) + return output diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/source_code_parsing_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/source_code_parsing_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5f94711c21d825f4baa0b32bbc37fd94c89aba03 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/source_code_parsing_utils.py @@ -0,0 +1,52 @@ +import ast +import importlib +import inspect +import textwrap + + +class ReturnNameVisitor(ast.NodeVisitor): + """Thanks to ChatGPT for pairing.""" + + def __init__(self): + self.return_names = [] + + def visit_Return(self, node): + # Check if the return value is a tuple. + if isinstance(node.value, ast.Tuple): + for elt in node.value.elts: + if isinstance(elt, ast.Name): + self.return_names.append(elt.id) + else: + try: + self.return_names.append(ast.unparse(elt)) + except Exception: + self.return_names.append(str(elt)) + else: + if isinstance(node.value, ast.Name): + self.return_names.append(node.value.id) + else: + try: + self.return_names.append(ast.unparse(node.value)) + except Exception: + self.return_names.append(str(node.value)) + self.generic_visit(node) + + def _determine_parent_module(self, cls): + from diffusers import DiffusionPipeline + from diffusers.models.modeling_utils import ModelMixin + + if issubclass(cls, DiffusionPipeline): + return "pipelines" + elif issubclass(cls, ModelMixin): + return "models" + else: + raise NotImplementedError + + def get_ast_tree(self, cls, attribute_name="encode_prompt"): + parent_module_name = self._determine_parent_module(cls) + main_module = importlib.import_module(f"diffusers.{parent_module_name}") + current_cls_module = getattr(main_module, cls.__name__) + source_code = inspect.getsource(getattr(current_cls_module, attribute_name)) + source_code = textwrap.dedent(source_code) + tree = ast.parse(source_code) + return tree diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/state_dict_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/state_dict_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c9bf5fec289b4c6b1ed21092662cd06e1672a5db --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/state_dict_utils.py @@ -0,0 +1,545 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +State dict utilities: utility methods for converting state dicts easily +""" + +import enum +import json + +from .import_utils import is_torch_available +from .logging import get_logger + + +if is_torch_available(): + import torch + + +logger = get_logger(__name__) + + +class StateDictType(enum.Enum): + """ + The mode to use when converting state dicts. + """ + + DIFFUSERS_OLD = "diffusers_old" + KOHYA_SS = "kohya_ss" + PEFT = "peft" + DIFFUSERS = "diffusers" + + +# We need to define a proper mapping for Unet since it uses different output keys than text encoder +# e.g. to_q_lora -> q_proj / to_q +UNET_TO_DIFFUSERS = { + ".to_out_lora.up": ".to_out.0.lora_B", + ".to_out_lora.down": ".to_out.0.lora_A", + ".to_q_lora.down": ".to_q.lora_A", + ".to_q_lora.up": ".to_q.lora_B", + ".to_k_lora.down": ".to_k.lora_A", + ".to_k_lora.up": ".to_k.lora_B", + ".to_v_lora.down": ".to_v.lora_A", + ".to_v_lora.up": ".to_v.lora_B", + ".lora.up": ".lora_B", + ".lora.down": ".lora_A", + ".to_out.lora_magnitude_vector": ".to_out.0.lora_magnitude_vector", +} + +CONTROL_LORA_TO_DIFFUSERS = { + ".to_q.down": ".to_q.lora_A.weight", + ".to_q.up": ".to_q.lora_B.weight", + ".to_k.down": ".to_k.lora_A.weight", + ".to_k.up": ".to_k.lora_B.weight", + ".to_v.down": ".to_v.lora_A.weight", + ".to_v.up": ".to_v.lora_B.weight", + ".to_out.0.down": ".to_out.0.lora_A.weight", + ".to_out.0.up": ".to_out.0.lora_B.weight", + ".ff.net.0.proj.down": ".ff.net.0.proj.lora_A.weight", + ".ff.net.0.proj.up": ".ff.net.0.proj.lora_B.weight", + ".ff.net.2.down": ".ff.net.2.lora_A.weight", + ".ff.net.2.up": ".ff.net.2.lora_B.weight", + ".proj_in.down": ".proj_in.lora_A.weight", + ".proj_in.up": ".proj_in.lora_B.weight", + ".proj_out.down": ".proj_out.lora_A.weight", + ".proj_out.up": ".proj_out.lora_B.weight", + ".conv.down": ".conv.lora_A.weight", + ".conv.up": ".conv.lora_B.weight", + **{f".conv{i}.down": f".conv{i}.lora_A.weight" for i in range(1, 3)}, + **{f".conv{i}.up": f".conv{i}.lora_B.weight" for i in range(1, 3)}, + "conv_in.down": "conv_in.lora_A.weight", + "conv_in.up": "conv_in.lora_B.weight", + ".conv_shortcut.down": ".conv_shortcut.lora_A.weight", + ".conv_shortcut.up": ".conv_shortcut.lora_B.weight", + **{f".linear_{i}.down": f".linear_{i}.lora_A.weight" for i in range(1, 3)}, + **{f".linear_{i}.up": f".linear_{i}.lora_B.weight" for i in range(1, 3)}, + "time_emb_proj.down": "time_emb_proj.lora_A.weight", + "time_emb_proj.up": "time_emb_proj.lora_B.weight", +} + +DIFFUSERS_TO_PEFT = { + ".q_proj.lora_linear_layer.up": ".q_proj.lora_B", + ".q_proj.lora_linear_layer.down": ".q_proj.lora_A", + ".k_proj.lora_linear_layer.up": ".k_proj.lora_B", + ".k_proj.lora_linear_layer.down": ".k_proj.lora_A", + ".v_proj.lora_linear_layer.up": ".v_proj.lora_B", + ".v_proj.lora_linear_layer.down": ".v_proj.lora_A", + ".out_proj.lora_linear_layer.up": ".out_proj.lora_B", + ".out_proj.lora_linear_layer.down": ".out_proj.lora_A", + ".lora_linear_layer.up": ".lora_B", + ".lora_linear_layer.down": ".lora_A", + "text_projection.lora.down.weight": "text_projection.lora_A.weight", + "text_projection.lora.up.weight": "text_projection.lora_B.weight", +} + +DIFFUSERS_OLD_TO_PEFT = { + ".to_q_lora.up": ".q_proj.lora_B", + ".to_q_lora.down": ".q_proj.lora_A", + ".to_k_lora.up": ".k_proj.lora_B", + ".to_k_lora.down": ".k_proj.lora_A", + ".to_v_lora.up": ".v_proj.lora_B", + ".to_v_lora.down": ".v_proj.lora_A", + ".to_out_lora.up": ".out_proj.lora_B", + ".to_out_lora.down": ".out_proj.lora_A", + ".lora_linear_layer.up": ".lora_B", + ".lora_linear_layer.down": ".lora_A", +} + +PEFT_TO_DIFFUSERS = { + ".q_proj.lora_B": ".q_proj.lora_linear_layer.up", + ".q_proj.lora_A": ".q_proj.lora_linear_layer.down", + ".k_proj.lora_B": ".k_proj.lora_linear_layer.up", + ".k_proj.lora_A": ".k_proj.lora_linear_layer.down", + ".v_proj.lora_B": ".v_proj.lora_linear_layer.up", + ".v_proj.lora_A": ".v_proj.lora_linear_layer.down", + ".out_proj.lora_B": ".out_proj.lora_linear_layer.up", + ".out_proj.lora_A": ".out_proj.lora_linear_layer.down", + "to_k.lora_A": "to_k.lora.down", + "to_k.lora_B": "to_k.lora.up", + "to_q.lora_A": "to_q.lora.down", + "to_q.lora_B": "to_q.lora.up", + "to_v.lora_A": "to_v.lora.down", + "to_v.lora_B": "to_v.lora.up", + "to_out.0.lora_A": "to_out.0.lora.down", + "to_out.0.lora_B": "to_out.0.lora.up", +} + +DIFFUSERS_OLD_TO_DIFFUSERS = { + ".to_q_lora.up": ".q_proj.lora_linear_layer.up", + ".to_q_lora.down": ".q_proj.lora_linear_layer.down", + ".to_k_lora.up": ".k_proj.lora_linear_layer.up", + ".to_k_lora.down": ".k_proj.lora_linear_layer.down", + ".to_v_lora.up": ".v_proj.lora_linear_layer.up", + ".to_v_lora.down": ".v_proj.lora_linear_layer.down", + ".to_out_lora.up": ".out_proj.lora_linear_layer.up", + ".to_out_lora.down": ".out_proj.lora_linear_layer.down", + ".to_k.lora_magnitude_vector": ".k_proj.lora_magnitude_vector", + ".to_v.lora_magnitude_vector": ".v_proj.lora_magnitude_vector", + ".to_q.lora_magnitude_vector": ".q_proj.lora_magnitude_vector", + ".to_out.lora_magnitude_vector": ".out_proj.lora_magnitude_vector", +} + +PEFT_TO_KOHYA_SS = { + "lora_A": "lora_down", + "lora_B": "lora_up", + # This is not a comprehensive dict as kohya format requires replacing `.` with `_` in keys, + # adding prefixes and adding alpha values + # Check `convert_state_dict_to_kohya` for more +} + +PEFT_STATE_DICT_MAPPINGS = { + StateDictType.DIFFUSERS_OLD: DIFFUSERS_OLD_TO_PEFT, + StateDictType.DIFFUSERS: DIFFUSERS_TO_PEFT, +} + +DIFFUSERS_STATE_DICT_MAPPINGS = { + StateDictType.DIFFUSERS_OLD: DIFFUSERS_OLD_TO_DIFFUSERS, + StateDictType.PEFT: PEFT_TO_DIFFUSERS, +} + +KOHYA_STATE_DICT_MAPPINGS = {StateDictType.PEFT: PEFT_TO_KOHYA_SS} + +KEYS_TO_ALWAYS_REPLACE = { + ".processor.": ".", +} + + +def convert_state_dict(state_dict, mapping): + r""" + Simply iterates over the state dict and replaces the patterns in `mapping` with the corresponding values. + + Args: + state_dict (`dict[str, torch.Tensor]`): + The state dict to convert. + mapping (`dict[str, str]`): + The mapping to use for conversion, the mapping should be a dictionary with the following structure: + - key: the pattern to replace + - value: the pattern to replace with + + Returns: + converted_state_dict (`dict`) + The converted state dict. + """ + converted_state_dict = {} + for k, v in state_dict.items(): + # First, filter out the keys that we always want to replace + for pattern in KEYS_TO_ALWAYS_REPLACE.keys(): + if pattern in k: + new_pattern = KEYS_TO_ALWAYS_REPLACE[pattern] + k = k.replace(pattern, new_pattern) + + for pattern in mapping.keys(): + if pattern in k: + new_pattern = mapping[pattern] + k = k.replace(pattern, new_pattern) + break + converted_state_dict[k] = v + return converted_state_dict + + +def convert_state_dict_to_peft(state_dict, original_type=None, **kwargs): + r""" + Converts a state dict to the PEFT format The state dict can be from previous diffusers format (`OLD_DIFFUSERS`), or + new diffusers format (`DIFFUSERS`). The method only supports the conversion from diffusers old/new to PEFT for now. + + Args: + state_dict (`dict[str, torch.Tensor]`): + The state dict to convert. + original_type (`StateDictType`, *optional*): + The original type of the state dict, if not provided, the method will try to infer it automatically. + """ + if original_type is None: + # Old diffusers to PEFT + if any("to_out_lora" in k for k in state_dict.keys()): + original_type = StateDictType.DIFFUSERS_OLD + elif any("lora_linear_layer" in k for k in state_dict.keys()): + original_type = StateDictType.DIFFUSERS + else: + raise ValueError("Could not automatically infer state dict type") + + if original_type not in PEFT_STATE_DICT_MAPPINGS.keys(): + raise ValueError(f"Original type {original_type} is not supported") + + mapping = PEFT_STATE_DICT_MAPPINGS[original_type] + return convert_state_dict(state_dict, mapping) + + +def convert_state_dict_to_diffusers(state_dict, original_type=None, **kwargs): + r""" + Converts a state dict to new diffusers format. The state dict can be from previous diffusers format + (`OLD_DIFFUSERS`), or PEFT format (`PEFT`) or new diffusers format (`DIFFUSERS`). In the last case the method will + return the state dict as is. + + The method only supports the conversion from diffusers old, PEFT to diffusers new for now. + + Args: + state_dict (`dict[str, torch.Tensor]`): + The state dict to convert. + original_type (`StateDictType`, *optional*): + The original type of the state dict, if not provided, the method will try to infer it automatically. + kwargs (`dict`, *args*): + Additional arguments to pass to the method. + + - **adapter_name**: For example, in case of PEFT, some keys will be prepended + with the adapter name, therefore needs a special handling. By default PEFT also takes care of that in + `get_peft_model_state_dict` method: + https://github.com/huggingface/peft/blob/ba0477f2985b1ba311b83459d29895c809404e99/src/peft/utils/save_and_load.py#L92 + but we add it here in case we don't want to rely on that method. + """ + peft_adapter_name = kwargs.pop("adapter_name", None) + if peft_adapter_name is not None: + peft_adapter_name = "." + peft_adapter_name + else: + peft_adapter_name = "" + + if original_type is None: + # Old diffusers to PEFT + if any("to_out_lora" in k for k in state_dict.keys()): + original_type = StateDictType.DIFFUSERS_OLD + elif any(f".lora_A{peft_adapter_name}.weight" in k for k in state_dict.keys()): + original_type = StateDictType.PEFT + elif any("lora_linear_layer" in k for k in state_dict.keys()): + # nothing to do + return state_dict + else: + raise ValueError("Could not automatically infer state dict type") + + if original_type not in DIFFUSERS_STATE_DICT_MAPPINGS.keys(): + raise ValueError(f"Original type {original_type} is not supported") + + mapping = DIFFUSERS_STATE_DICT_MAPPINGS[original_type] + return convert_state_dict(state_dict, mapping) + + +def convert_unet_state_dict_to_peft(state_dict): + r""" + Converts a state dict from UNet format to diffusers format - i.e. by removing some keys + """ + mapping = UNET_TO_DIFFUSERS + return convert_state_dict(state_dict, mapping) + + +def convert_sai_sd_control_lora_state_dict_to_peft(state_dict): + def _convert_controlnet_to_diffusers(state_dict): + is_sdxl = "input_blocks.11.0.in_layers.0.weight" not in state_dict + logger.info(f"Using ControlNet lora ({'SDXL' if is_sdxl else 'SD15'})") + + # Retrieves the keys for the input blocks only + num_input_blocks = len({".".join(layer.split(".")[:2]) for layer in state_dict if "input_blocks" in layer}) + input_blocks = { + layer_id: [key for key in state_dict if f"input_blocks.{layer_id}" in key] + for layer_id in range(num_input_blocks) + } + layers_per_block = 2 + + # op blocks + op_blocks = [key for key in state_dict if "0.op" in key] + + converted_state_dict = {} + # Conv in layers + for key in input_blocks[0]: + diffusers_key = key.replace("input_blocks.0.0", "conv_in") + converted_state_dict[diffusers_key] = state_dict.get(key) + + # controlnet time embedding blocks + time_embedding_blocks = [key for key in state_dict if "time_embed" in key] + for key in time_embedding_blocks: + diffusers_key = key.replace("time_embed.0", "time_embedding.linear_1").replace( + "time_embed.2", "time_embedding.linear_2" + ) + converted_state_dict[diffusers_key] = state_dict.get(key) + + # controlnet label embedding blocks + label_embedding_blocks = [key for key in state_dict if "label_emb" in key] + for key in label_embedding_blocks: + diffusers_key = key.replace("label_emb.0.0", "add_embedding.linear_1").replace( + "label_emb.0.2", "add_embedding.linear_2" + ) + converted_state_dict[diffusers_key] = state_dict.get(key) + + # Down blocks + for i in range(1, num_input_blocks): + block_id = (i - 1) // (layers_per_block + 1) + layer_in_block_id = (i - 1) % (layers_per_block + 1) + + resnets = [ + key for key in input_blocks[i] if f"input_blocks.{i}.0" in key and f"input_blocks.{i}.0.op" not in key + ] + for key in resnets: + diffusers_key = ( + key.replace("in_layers.0", "norm1") + .replace("in_layers.2", "conv1") + .replace("out_layers.0", "norm2") + .replace("out_layers.3", "conv2") + .replace("emb_layers.1", "time_emb_proj") + .replace("skip_connection", "conv_shortcut") + ) + diffusers_key = diffusers_key.replace( + f"input_blocks.{i}.0", f"down_blocks.{block_id}.resnets.{layer_in_block_id}" + ) + converted_state_dict[diffusers_key] = state_dict.get(key) + + if f"input_blocks.{i}.0.op.bias" in state_dict: + for key in [key for key in op_blocks if f"input_blocks.{i}.0.op" in key]: + diffusers_key = key.replace( + f"input_blocks.{i}.0.op", f"down_blocks.{block_id}.downsamplers.0.conv" + ) + converted_state_dict[diffusers_key] = state_dict.get(key) + + attentions = [key for key in input_blocks[i] if f"input_blocks.{i}.1" in key] + if attentions: + for key in attentions: + diffusers_key = key.replace( + f"input_blocks.{i}.1", f"down_blocks.{block_id}.attentions.{layer_in_block_id}" + ) + converted_state_dict[diffusers_key] = state_dict.get(key) + + # controlnet down blocks + for i in range(num_input_blocks): + converted_state_dict[f"controlnet_down_blocks.{i}.weight"] = state_dict.get(f"zero_convs.{i}.0.weight") + converted_state_dict[f"controlnet_down_blocks.{i}.bias"] = state_dict.get(f"zero_convs.{i}.0.bias") + + # Retrieves the keys for the middle blocks only + num_middle_blocks = len({".".join(layer.split(".")[:2]) for layer in state_dict if "middle_block" in layer}) + middle_blocks = { + layer_id: [key for key in state_dict if f"middle_block.{layer_id}" in key] + for layer_id in range(num_middle_blocks) + } + + # Mid blocks + for key in middle_blocks.keys(): + diffusers_key = max(key - 1, 0) + if key % 2 == 0: + for k in middle_blocks[key]: + diffusers_key_hf = ( + k.replace("in_layers.0", "norm1") + .replace("in_layers.2", "conv1") + .replace("out_layers.0", "norm2") + .replace("out_layers.3", "conv2") + .replace("emb_layers.1", "time_emb_proj") + .replace("skip_connection", "conv_shortcut") + ) + diffusers_key_hf = diffusers_key_hf.replace( + f"middle_block.{key}", f"mid_block.resnets.{diffusers_key}" + ) + converted_state_dict[diffusers_key_hf] = state_dict.get(k) + else: + for k in middle_blocks[key]: + diffusers_key_hf = k.replace(f"middle_block.{key}", f"mid_block.attentions.{diffusers_key}") + converted_state_dict[diffusers_key_hf] = state_dict.get(k) + + # mid block + converted_state_dict["controlnet_mid_block.weight"] = state_dict.get("middle_block_out.0.weight") + converted_state_dict["controlnet_mid_block.bias"] = state_dict.get("middle_block_out.0.bias") + + # controlnet cond embedding blocks + cond_embedding_blocks = { + ".".join(layer.split(".")[:2]) + for layer in state_dict + if "input_hint_block" in layer + and ("input_hint_block.0" not in layer) + and ("input_hint_block.14" not in layer) + } + num_cond_embedding_blocks = len(cond_embedding_blocks) + + for idx in range(1, num_cond_embedding_blocks + 1): + diffusers_idx = idx - 1 + cond_block_id = 2 * idx + + converted_state_dict[f"controlnet_cond_embedding.blocks.{diffusers_idx}.weight"] = state_dict.get( + f"input_hint_block.{cond_block_id}.weight" + ) + converted_state_dict[f"controlnet_cond_embedding.blocks.{diffusers_idx}.bias"] = state_dict.get( + f"input_hint_block.{cond_block_id}.bias" + ) + + for key in [key for key in state_dict if "input_hint_block.0" in key]: + diffusers_key = key.replace("input_hint_block.0", "controlnet_cond_embedding.conv_in") + converted_state_dict[diffusers_key] = state_dict.get(key) + + for key in [key for key in state_dict if "input_hint_block.14" in key]: + diffusers_key = key.replace("input_hint_block.14", "controlnet_cond_embedding.conv_out") + converted_state_dict[diffusers_key] = state_dict.get(key) + + return converted_state_dict + + state_dict = _convert_controlnet_to_diffusers(state_dict) + mapping = CONTROL_LORA_TO_DIFFUSERS + return convert_state_dict(state_dict, mapping) + + +def convert_all_state_dict_to_peft(state_dict): + r""" + Attempts to first `convert_state_dict_to_peft`, and if it doesn't detect `lora_linear_layer` for a valid + `DIFFUSERS` LoRA for example, attempts to exclusively convert the Unet `convert_unet_state_dict_to_peft` + """ + try: + peft_dict = convert_state_dict_to_peft(state_dict) + except Exception as e: + if str(e) == "Could not automatically infer state dict type": + peft_dict = convert_unet_state_dict_to_peft(state_dict) + else: + raise + + if not any("lora_A" in key or "lora_B" in key for key in peft_dict.keys()): + raise ValueError("Your LoRA was not converted to PEFT") + + return peft_dict + + +def convert_state_dict_to_kohya(state_dict, original_type=None, **kwargs): + r""" + Converts a `PEFT` state dict to `Kohya` format that can be used in AUTOMATIC1111, ComfyUI, SD.Next, InvokeAI, etc. + The method only supports the conversion from PEFT to Kohya for now. + + Args: + state_dict (`dict[str, torch.Tensor]`): + The state dict to convert. + original_type (`StateDictType`, *optional*): + The original type of the state dict, if not provided, the method will try to infer it automatically. + kwargs (`dict`, *args*): + Additional arguments to pass to the method. + + - **adapter_name**: For example, in case of PEFT, some keys will be prepended + with the adapter name, therefore needs a special handling. By default PEFT also takes care of that in + `get_peft_model_state_dict` method: + https://github.com/huggingface/peft/blob/ba0477f2985b1ba311b83459d29895c809404e99/src/peft/utils/save_and_load.py#L92 + but we add it here in case we don't want to rely on that method. + """ + try: + import torch + except ImportError: + logger.error("Converting PEFT state dicts to Kohya requires torch to be installed.") + raise + + peft_adapter_name = kwargs.pop("adapter_name", None) + if peft_adapter_name is not None: + peft_adapter_name = "." + peft_adapter_name + else: + peft_adapter_name = "" + + if original_type is None: + if any(f".lora_A{peft_adapter_name}.weight" in k for k in state_dict.keys()): + original_type = StateDictType.PEFT + + if original_type not in KOHYA_STATE_DICT_MAPPINGS.keys(): + raise ValueError(f"Original type {original_type} is not supported") + + # Use the convert_state_dict function with the appropriate mapping + kohya_ss_partial_state_dict = convert_state_dict(state_dict, KOHYA_STATE_DICT_MAPPINGS[StateDictType.PEFT]) + kohya_ss_state_dict = {} + + # Additional logic for replacing header, alpha parameters `.` with `_` in all keys + for kohya_key, weight in kohya_ss_partial_state_dict.items(): + if "text_encoder_2." in kohya_key: + kohya_key = kohya_key.replace("text_encoder_2.", "lora_te2.") + elif "text_encoder." in kohya_key: + kohya_key = kohya_key.replace("text_encoder.", "lora_te1.") + elif "unet" in kohya_key: + kohya_key = kohya_key.replace("unet", "lora_unet") + elif "lora_magnitude_vector" in kohya_key: + kohya_key = kohya_key.replace("lora_magnitude_vector", "dora_scale") + + kohya_key = kohya_key.replace(".", "_", kohya_key.count(".") - 2) + kohya_key = kohya_key.replace(peft_adapter_name, "") # Kohya doesn't take names + kohya_ss_state_dict[kohya_key] = weight + if "lora_down" in kohya_key: + alpha_key = f"{kohya_key.split('.')[0]}.alpha" + kohya_ss_state_dict[alpha_key] = torch.tensor(len(weight)) + + return kohya_ss_state_dict + + +def state_dict_all_zero(state_dict, filter_str=None): + if filter_str is not None: + if isinstance(filter_str, str): + filter_str = [filter_str] + state_dict = {k: v for k, v in state_dict.items() if any(f in k for f in filter_str)} + + return all(torch.all(param == 0).item() for param in state_dict.values()) + + +def _load_sft_state_dict_metadata(model_file: str): + import safetensors.torch + + from ..loaders.lora_base import LORA_ADAPTER_METADATA_KEY + + with safetensors.torch.safe_open(model_file, framework="pt", device="cpu") as f: + metadata = f.metadata() or {} + + metadata.pop("format", None) + if metadata: + raw = metadata.get(LORA_ADAPTER_METADATA_KEY) + return json.loads(raw) if raw else None + else: + return None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/testing_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/testing_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cb7bf942c64832b95b4e8306ca9800f63bdaf113 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/testing_utils.py @@ -0,0 +1,1614 @@ +import functools +import glob +import importlib +import importlib.metadata +import inspect +import io +import logging +import multiprocessing +import os +import random +import re +import struct +import sys +import tempfile +import time +import unittest +import urllib.parse +from collections import UserDict +from contextlib import contextmanager +from io import BytesIO, StringIO +from pathlib import Path +from typing import TYPE_CHECKING, Any, Callable, Set + +import numpy as np +import PIL.Image +import PIL.ImageOps +import requests +from numpy.linalg import norm +from packaging import version + +from .constants import DIFFUSERS_REQUEST_TIMEOUT +from .import_utils import ( + BACKENDS_MAPPING, + is_accelerate_available, + is_bitsandbytes_available, + is_compel_available, + is_flax_available, + is_gguf_available, + is_kernels_available, + is_note_seq_available, + is_nvidia_modelopt_available, + is_onnx_available, + is_opencv_available, + is_optimum_quanto_available, + is_peft_available, + is_timm_available, + is_torch_available, + is_torch_version, + is_torchao_available, + is_torchsde_available, + is_transformers_available, +) +from .logging import get_logger + + +if is_torch_available(): + import torch + + IS_ROCM_SYSTEM = torch.version.hip is not None + IS_CUDA_SYSTEM = torch.version.cuda is not None + IS_XPU_SYSTEM = getattr(torch.version, "xpu", None) is not None +else: + IS_ROCM_SYSTEM = False + IS_CUDA_SYSTEM = False + IS_XPU_SYSTEM = False + +global_rng = random.Random() + +logger = get_logger(__name__) +logger.warning( + "diffusers.utils.testing_utils' is deprecated and will be removed in a future version. " + "Determinism and device backend utilities have been moved to `diffusers.utils.torch_utils`. " +) +_required_peft_version = is_peft_available() and version.parse( + version.parse(importlib.metadata.version("peft")).base_version +) > version.parse("0.5") +_required_transformers_version = is_transformers_available() and version.parse( + version.parse(importlib.metadata.version("transformers")).base_version +) > version.parse("4.33") + +USE_PEFT_BACKEND = _required_peft_version and _required_transformers_version +BIG_GPU_MEMORY = int(os.getenv("BIG_GPU_MEMORY", 40)) + +if is_torch_available(): + import torch + + # Set a backend environment variable for any extra module import required for a custom accelerator + if "DIFFUSERS_TEST_BACKEND" in os.environ: + backend = os.environ["DIFFUSERS_TEST_BACKEND"] + try: + _ = importlib.import_module(backend) + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + f"Failed to import `DIFFUSERS_TEST_BACKEND` '{backend}'! This should be the name of an installed module \ + to enable a specified backend.):\n{e}" + ) from e + + if "DIFFUSERS_TEST_DEVICE" in os.environ: + torch_device = os.environ["DIFFUSERS_TEST_DEVICE"] + try: + # try creating device to see if provided device is valid + _ = torch.device(torch_device) + except RuntimeError as e: + raise RuntimeError( + f"Unknown testing device specified by environment variable `DIFFUSERS_TEST_DEVICE`: {torch_device}" + ) from e + logger.info(f"torch_device overrode to {torch_device}") + else: + if torch.cuda.is_available(): + torch_device = "cuda" + elif torch.xpu.is_available(): + torch_device = "xpu" + else: + torch_device = "cpu" + is_torch_higher_equal_than_1_12 = version.parse( + version.parse(torch.__version__).base_version + ) >= version.parse("1.12") + + if is_torch_higher_equal_than_1_12: + # Some builds of torch 1.12 don't have the mps backend registered. See #892 for more details + mps_backend_registered = hasattr(torch.backends, "mps") + torch_device = "mps" if (mps_backend_registered and torch.backends.mps.is_available()) else torch_device + + from .torch_utils import get_torch_cuda_device_capability + + +def torch_all_close(a, b, *args, **kwargs): + if not is_torch_available(): + raise ValueError("PyTorch needs to be installed to use this function.") + if not torch.allclose(a, b, *args, **kwargs): + assert False, f"Max diff is absolute {(a - b).abs().max()}. Diff tensor is {(a - b).abs()}." + return True + + +def numpy_cosine_similarity_distance(a, b): + similarity = np.dot(a, b) / (norm(a) * norm(b)) + distance = 1.0 - similarity.mean() + + return distance + + +def check_if_dicts_are_equal(dict1, dict2): + dict1, dict2 = dict1.copy(), dict2.copy() + + for key, value in dict1.items(): + if isinstance(value, set): + dict1[key] = sorted(value) + for key, value in dict2.items(): + if isinstance(value, set): + dict2[key] = sorted(value) + + for key in dict1: + if key not in dict2: + return False + if dict1[key] != dict2[key]: + return False + + for key in dict2: + if key not in dict1: + return False + + return True + + +def print_tensor_test( + tensor, + limit_to_slices=None, + max_torch_print=None, + filename="test_corrections.txt", + expected_tensor_name="expected_slice", +): + if max_torch_print: + torch.set_printoptions(threshold=10_000) + + test_name = os.environ.get("PYTEST_CURRENT_TEST") + if not torch.is_tensor(tensor): + tensor = torch.from_numpy(tensor) + if limit_to_slices: + tensor = tensor[0, -3:, -3:, -1] + + tensor_str = str(tensor.detach().cpu().flatten().to(torch.float32)).replace("\n", "") + # format is usually: + # expected_slice = np.array([-0.5713, -0.3018, -0.9814, 0.04663, -0.879, 0.76, -1.734, 0.1044, 1.161]) + output_str = tensor_str.replace("tensor", f"{expected_tensor_name} = np.array") + test_file, test_class, test_fn = test_name.split("::") + test_fn = test_fn.split()[0] + with open(filename, "a") as f: + print("::".join([test_file, test_class, test_fn, output_str]), file=f) + + +def get_tests_dir(append_path=None): + """ + Args: + append_path: optional path to append to the tests dir path + Return: + The full path to the `tests` dir, so that the tests can be invoked from anywhere. Optionally `append_path` is + joined after the `tests` dir the former is provided. + """ + # this function caller's __file__ + caller__file__ = inspect.stack()[1][1] + tests_dir = os.path.abspath(os.path.dirname(caller__file__)) + + while not tests_dir.endswith("tests"): + tests_dir = os.path.dirname(tests_dir) + + if append_path: + return Path(tests_dir, append_path).as_posix() + else: + return tests_dir + + +# Taken from the following PR: +# https://github.com/huggingface/accelerate/pull/1964 +def str_to_bool(value) -> int: + """ + Converts a string representation of truth to `True` (1) or `False` (0). True values are `y`, `yes`, `t`, `true`, + `on`, and `1`; False value are `n`, `no`, `f`, `false`, `off`, and `0`; + """ + value = value.lower() + if value in ("y", "yes", "t", "true", "on", "1"): + return 1 + elif value in ("n", "no", "f", "false", "off", "0"): + return 0 + else: + raise ValueError(f"invalid truth value {value}") + + +def parse_flag_from_env(key, default=False): + try: + value = os.environ[key] + except KeyError: + # KEY isn't set, default to `default`. + _value = default + else: + # KEY is set, convert it to True or False. + try: + _value = str_to_bool(value) + except ValueError: + # More values are supported, but let's keep the message simple. + raise ValueError(f"If set, {key} must be yes or no.") + return _value + + +_run_slow_tests = parse_flag_from_env("RUN_SLOW", default=False) +_run_nightly_tests = parse_flag_from_env("RUN_NIGHTLY", default=False) +_run_compile_tests = parse_flag_from_env("RUN_COMPILE", default=False) + + +def floats_tensor(shape, scale=1.0, rng=None, name=None): + """Creates a random float32 tensor""" + if rng is None: + rng = global_rng + + total_dims = 1 + for dim in shape: + total_dims *= dim + + values = [] + for _ in range(total_dims): + values.append(rng.random() * scale) + + return torch.tensor(data=values, dtype=torch.float).view(shape).contiguous() + + +def slow(test_case): + """ + Decorator marking a test as slow. + + Slow tests are skipped by default. Set the RUN_SLOW environment variable to a truthy value to run them. + + """ + return unittest.skipUnless(_run_slow_tests, "test is slow")(test_case) + + +def nightly(test_case): + """ + Decorator marking a test that runs nightly in the diffusers CI. + + Slow tests are skipped by default. Set the RUN_NIGHTLY environment variable to a truthy value to run them. + + """ + return unittest.skipUnless(_run_nightly_tests, "test is nightly")(test_case) + + +def is_torch_compile(test_case): + """ + Decorator marking a test that runs compile tests in the diffusers CI. + + Compile tests are skipped by default. Set the RUN_COMPILE environment variable to a truthy value to run them. + + """ + return unittest.skipUnless(_run_compile_tests, "test is torch compile")(test_case) + + +def require_torch(test_case): + """ + Decorator marking a test that requires PyTorch. These tests are skipped when PyTorch isn't installed. + """ + return unittest.skipUnless(is_torch_available(), "test requires PyTorch")(test_case) + + +def require_torch_2(test_case): + """ + Decorator marking a test that requires PyTorch 2. These tests are skipped when it isn't installed. + """ + return unittest.skipUnless(is_torch_available() and is_torch_version(">=", "2.0.0"), "test requires PyTorch 2")( + test_case + ) + + +def require_torch_version_greater_equal(torch_version): + """Decorator marking a test that requires torch with a specific version or greater.""" + + def decorator(test_case): + correct_torch_version = is_torch_available() and is_torch_version(">=", torch_version) + return unittest.skipUnless( + correct_torch_version, f"test requires torch with the version greater than or equal to {torch_version}" + )(test_case) + + return decorator + + +def require_torch_version_greater(torch_version): + """Decorator marking a test that requires torch with a specific version greater.""" + + def decorator(test_case): + correct_torch_version = is_torch_available() and is_torch_version(">", torch_version) + return unittest.skipUnless( + correct_torch_version, f"test requires torch with the version greater than {torch_version}" + )(test_case) + + return decorator + + +def require_torch_gpu(test_case): + """Decorator marking a test that requires CUDA and PyTorch.""" + return unittest.skipUnless(is_torch_available() and torch_device == "cuda", "test requires PyTorch+CUDA")( + test_case + ) + + +def require_torch_cuda_compatibility(expected_compute_capability): + def decorator(test_case): + if torch.cuda.is_available(): + current_compute_capability = get_torch_cuda_device_capability() + return unittest.skipUnless( + float(current_compute_capability) == float(expected_compute_capability), + "Test not supported for this compute capability.", + ) + + return decorator + + +# These decorators are for accelerator-specific behaviours that are not GPU-specific +def require_torch_accelerator(test_case): + """Decorator marking a test that requires an accelerator backend and PyTorch.""" + return unittest.skipUnless(is_torch_available() and torch_device != "cpu", "test requires accelerator+PyTorch")( + test_case + ) + + +def require_torch_multi_gpu(test_case): + """ + Decorator marking a test that requires a multi-GPU setup (in PyTorch). These tests are skipped on a machine without + multiple GPUs. To run *only* the multi_gpu tests, assuming all test names contain multi_gpu: $ pytest -sv ./tests + -k "multi_gpu" + """ + if not is_torch_available(): + return unittest.skip("test requires PyTorch")(test_case) + + import torch + + return unittest.skipUnless(torch.cuda.device_count() > 1, "test requires multiple GPUs")(test_case) + + +def require_torch_multi_accelerator(test_case): + """ + Decorator marking a test that requires a multi-accelerator setup (in PyTorch). These tests are skipped on a machine + without multiple hardware accelerators. + """ + if not is_torch_available(): + return unittest.skip("test requires PyTorch")(test_case) + + import torch + + return unittest.skipUnless( + torch.cuda.device_count() > 1 or torch.xpu.device_count() > 1, "test requires multiple hardware accelerators" + )(test_case) + + +def require_torch_accelerator_with_fp16(test_case): + """Decorator marking a test that requires an accelerator with support for the FP16 data type.""" + return unittest.skipUnless(_is_torch_fp16_available(torch_device), "test requires accelerator with fp16 support")( + test_case + ) + + +def require_torch_accelerator_with_fp64(test_case): + """Decorator marking a test that requires an accelerator with support for the FP64 data type.""" + return unittest.skipUnless(_is_torch_fp64_available(torch_device), "test requires accelerator with fp64 support")( + test_case + ) + + +def require_big_gpu_with_torch_cuda(test_case): + """ + Decorator marking a test that requires a bigger GPU (24GB) for execution. Some example pipelines: Flux, SD3, Cog, + etc. + """ + if not is_torch_available(): + return unittest.skip("test requires PyTorch")(test_case) + + import torch + + if not torch.cuda.is_available(): + return unittest.skip("test requires PyTorch CUDA")(test_case) + + device_properties = torch.cuda.get_device_properties(0) + total_memory = device_properties.total_memory / (1024**3) + return unittest.skipUnless( + total_memory >= BIG_GPU_MEMORY, f"test requires a GPU with at least {BIG_GPU_MEMORY} GB memory" + )(test_case) + + +def require_big_accelerator(test_case): + """ + Decorator marking a test that requires a bigger hardware accelerator (24GB) for execution. Some example pipelines: + Flux, SD3, Cog, etc. + """ + import pytest + + test_case = pytest.mark.big_accelerator(test_case) + + if not is_torch_available(): + return unittest.skip("test requires PyTorch")(test_case) + + import torch + + if not (torch.cuda.is_available() or torch.xpu.is_available()): + return unittest.skip("test requires PyTorch CUDA")(test_case) + + if torch.xpu.is_available(): + device_properties = torch.xpu.get_device_properties(0) + else: + device_properties = torch.cuda.get_device_properties(0) + + total_memory = device_properties.total_memory / (1024**3) + return unittest.skipUnless( + total_memory >= BIG_GPU_MEMORY, + f"test requires a hardware accelerator with at least {BIG_GPU_MEMORY} GB memory", + )(test_case) + + +def require_torch_accelerator_with_training(test_case): + """Decorator marking a test that requires an accelerator with support for training.""" + return unittest.skipUnless( + is_torch_available() and backend_supports_training(torch_device), + "test requires accelerator with training support", + )(test_case) + + +def skip_mps(test_case): + """Decorator marking a test to skip if torch_device is 'mps'""" + return unittest.skipUnless(torch_device != "mps", "test requires non 'mps' device")(test_case) + + +def require_flax(test_case): + """ + Decorator marking a test that requires JAX & Flax. These tests are skipped when one / both are not installed + """ + return unittest.skipUnless(is_flax_available(), "test requires JAX & Flax")(test_case) + + +def require_compel(test_case): + """ + Decorator marking a test that requires compel: https://github.com/damian0815/compel. These tests are skipped when + the library is not installed. + """ + return unittest.skipUnless(is_compel_available(), "test requires compel")(test_case) + + +def require_onnxruntime(test_case): + """ + Decorator marking a test that requires onnxruntime. These tests are skipped when onnxruntime isn't installed. + """ + return unittest.skipUnless(is_onnx_available(), "test requires onnxruntime")(test_case) + + +def require_note_seq(test_case): + """ + Decorator marking a test that requires note_seq. These tests are skipped when note_seq isn't installed. + """ + return unittest.skipUnless(is_note_seq_available(), "test requires note_seq")(test_case) + + +def require_accelerator(test_case): + """ + Decorator marking a test that requires a hardware accelerator backend. These tests are skipped when there are no + hardware accelerator available. + """ + return unittest.skipUnless(torch_device != "cpu", "test requires a hardware accelerator")(test_case) + + +def require_torchsde(test_case): + """ + Decorator marking a test that requires torchsde. These tests are skipped when torchsde isn't installed. + """ + return unittest.skipUnless(is_torchsde_available(), "test requires torchsde")(test_case) + + +def require_peft_backend(test_case): + """ + Decorator marking a test that requires PEFT backend, this would require some specific versions of PEFT and + transformers. + """ + return unittest.skipUnless(USE_PEFT_BACKEND, "test requires PEFT backend")(test_case) + + +def require_timm(test_case): + """ + Decorator marking a test that requires timm. These tests are skipped when timm isn't installed. + """ + return unittest.skipUnless(is_timm_available(), "test requires timm")(test_case) + + +def require_bitsandbytes(test_case): + """ + Decorator marking a test that requires bitsandbytes. These tests are skipped when bitsandbytes isn't installed. + """ + return unittest.skipUnless(is_bitsandbytes_available(), "test requires bitsandbytes")(test_case) + + +def require_quanto(test_case): + """ + Decorator marking a test that requires quanto. These tests are skipped when quanto isn't installed. + """ + return unittest.skipUnless(is_optimum_quanto_available(), "test requires quanto")(test_case) + + +def require_accelerate(test_case): + """ + Decorator marking a test that requires accelerate. These tests are skipped when accelerate isn't installed. + """ + return unittest.skipUnless(is_accelerate_available(), "test requires accelerate")(test_case) + + +def require_peft_version_greater(peft_version): + """ + Decorator marking a test that requires PEFT backend with a specific version, this would require some specific + versions of PEFT and transformers. + """ + + def decorator(test_case): + correct_peft_version = is_peft_available() and version.parse( + version.parse(importlib.metadata.version("peft")).base_version + ) > version.parse(peft_version) + return unittest.skipUnless( + correct_peft_version, f"test requires PEFT backend with the version greater than {peft_version}" + )(test_case) + + return decorator + + +def require_transformers_version_greater(transformers_version): + """ + Decorator marking a test that requires transformers with a specific version, this would require some specific + versions of PEFT and transformers. + """ + + def decorator(test_case): + correct_transformers_version = is_transformers_available() and version.parse( + version.parse(importlib.metadata.version("transformers")).base_version + ) > version.parse(transformers_version) + return unittest.skipUnless( + correct_transformers_version, + f"test requires transformers with the version greater than {transformers_version}", + )(test_case) + + return decorator + + +def require_accelerate_version_greater(accelerate_version): + def decorator(test_case): + correct_accelerate_version = is_accelerate_available() and version.parse( + version.parse(importlib.metadata.version("accelerate")).base_version + ) > version.parse(accelerate_version) + return unittest.skipUnless( + correct_accelerate_version, f"Test requires accelerate with the version greater than {accelerate_version}." + )(test_case) + + return decorator + + +def require_bitsandbytes_version_greater(bnb_version): + def decorator(test_case): + correct_bnb_version = is_bitsandbytes_available() and version.parse( + version.parse(importlib.metadata.version("bitsandbytes")).base_version + ) > version.parse(bnb_version) + return unittest.skipUnless( + correct_bnb_version, f"Test requires bitsandbytes with the version greater than {bnb_version}." + )(test_case) + + return decorator + + +def require_hf_hub_version_greater(hf_hub_version): + def decorator(test_case): + correct_hf_hub_version = version.parse( + version.parse(importlib.metadata.version("huggingface_hub")).base_version + ) > version.parse(hf_hub_version) + return unittest.skipUnless( + correct_hf_hub_version, f"Test requires huggingface_hub with the version greater than {hf_hub_version}." + )(test_case) + + return decorator + + +def require_gguf_version_greater_or_equal(gguf_version): + def decorator(test_case): + correct_gguf_version = is_gguf_available() and version.parse( + version.parse(importlib.metadata.version("gguf")).base_version + ) >= version.parse(gguf_version) + return unittest.skipUnless( + correct_gguf_version, f"Test requires gguf with the version greater than {gguf_version}." + )(test_case) + + return decorator + + +def require_torchao_version_greater_or_equal(torchao_version): + def decorator(test_case): + correct_torchao_version = is_torchao_available() and version.parse( + version.parse(importlib.metadata.version("torchao")).base_version + ) >= version.parse(torchao_version) + return unittest.skipUnless( + correct_torchao_version, f"Test requires torchao with version greater than {torchao_version}." + )(test_case) + + return decorator + + +def require_modelopt_version_greater_or_equal(modelopt_version): + def decorator(test_case): + correct_nvidia_modelopt_version = is_nvidia_modelopt_available() and version.parse( + version.parse(importlib.metadata.version("modelopt")).base_version + ) >= version.parse(modelopt_version) + return unittest.skipUnless( + correct_nvidia_modelopt_version, f"Test requires modelopt with version greater than {modelopt_version}." + )(test_case) + + return decorator + + +def require_kernels_version_greater_or_equal(kernels_version): + def decorator(test_case): + correct_kernels_version = is_kernels_available() and version.parse( + version.parse(importlib.metadata.version("kernels")).base_version + ) >= version.parse(kernels_version) + return unittest.skipUnless( + correct_kernels_version, f"Test requires kernels with version greater than {kernels_version}." + )(test_case) + + return decorator + + +def deprecate_after_peft_backend(test_case): + """ + Decorator marking a test that will be skipped after PEFT backend + """ + return unittest.skipUnless(not USE_PEFT_BACKEND, "test skipped in favor of PEFT backend")(test_case) + + +def get_python_version(): + sys_info = sys.version_info + major, minor = sys_info.major, sys_info.minor + return major, minor + + +def load_numpy(arry: str | np.ndarray, local_path: str | None = None) -> np.ndarray: + if isinstance(arry, str): + if local_path is not None: + # local_path can be passed to correct images of tests + return Path(local_path, arry.split("/")[-5], arry.split("/")[-2], arry.split("/")[-1]).as_posix() + elif arry.startswith("http://") or arry.startswith("https://"): + response = requests.get(arry, timeout=DIFFUSERS_REQUEST_TIMEOUT) + response.raise_for_status() + arry = np.load(BytesIO(response.content)) + elif os.path.isfile(arry): + arry = np.load(arry) + else: + raise ValueError( + f"Incorrect path or url, URLs must start with `http://` or `https://`, and {arry} is not a valid path" + ) + elif isinstance(arry, np.ndarray): + pass + else: + raise ValueError( + "Incorrect format used for numpy ndarray. Should be an url linking to an image, a local path, or a" + " ndarray." + ) + + return arry + + +def load_pt(url: str, map_location: str | None = None, weights_only: bool | None = True): + response = requests.get(url, timeout=DIFFUSERS_REQUEST_TIMEOUT) + response.raise_for_status() + arry = torch.load(BytesIO(response.content), map_location=map_location, weights_only=weights_only) + return arry + + +def load_image(image: str | PIL.Image.Image) -> PIL.Image.Image: + """ + Loads `image` to a PIL Image. + + Args: + image (`str` or `PIL.Image.Image`): + The image to convert to the PIL Image format. + Returns: + `PIL.Image.Image`: + A PIL Image. + """ + if isinstance(image, str): + if image.startswith("http://") or image.startswith("https://"): + image = PIL.Image.open(requests.get(image, stream=True, timeout=DIFFUSERS_REQUEST_TIMEOUT).raw) + elif os.path.isfile(image): + image = PIL.Image.open(image) + else: + raise ValueError( + f"Incorrect path or url, URLs must start with `http://` or `https://`, and {image} is not a valid path" + ) + elif isinstance(image, PIL.Image.Image): + image = image + else: + raise ValueError( + "Incorrect format used for image. Should be an url linking to an image, a local path, or a PIL image." + ) + image = PIL.ImageOps.exif_transpose(image) + image = image.convert("RGB") + return image + + +def preprocess_image(image: PIL.Image, batch_size: int): + w, h = image.size + w, h = (x - x % 8 for x in (w, h)) # resize to integer multiple of 8 + image = image.resize((w, h), resample=PIL.Image.LANCZOS) + image = np.array(image).astype(np.float32) / 255.0 + image = np.vstack([image[None].transpose(0, 3, 1, 2)] * batch_size) + image = torch.from_numpy(image) + return 2.0 * image - 1.0 + + +def export_to_gif(image: list[PIL.Image.Image], output_gif_path: str = None) -> str: + if output_gif_path is None: + output_gif_path = tempfile.NamedTemporaryFile(suffix=".gif").name + + image[0].save( + output_gif_path, + save_all=True, + append_images=image[1:], + optimize=False, + duration=100, + loop=0, + ) + return output_gif_path + + +@contextmanager +def buffered_writer(raw_f): + f = io.BufferedWriter(raw_f) + yield f + f.flush() + + +def export_to_ply(mesh, output_ply_path: str = None): + """ + Write a PLY file for a mesh. + """ + if output_ply_path is None: + output_ply_path = tempfile.NamedTemporaryFile(suffix=".ply").name + + coords = mesh.verts.detach().cpu().numpy() + faces = mesh.faces.cpu().numpy() + rgb = np.stack([mesh.vertex_channels[x].detach().cpu().numpy() for x in "RGB"], axis=1) + + with buffered_writer(open(output_ply_path, "wb")) as f: + f.write(b"ply\n") + f.write(b"format binary_little_endian 1.0\n") + f.write(bytes(f"element vertex {len(coords)}\n", "ascii")) + f.write(b"property float x\n") + f.write(b"property float y\n") + f.write(b"property float z\n") + if rgb is not None: + f.write(b"property uchar red\n") + f.write(b"property uchar green\n") + f.write(b"property uchar blue\n") + if faces is not None: + f.write(bytes(f"element face {len(faces)}\n", "ascii")) + f.write(b"property list uchar int vertex_index\n") + f.write(b"end_header\n") + + if rgb is not None: + rgb = (rgb * 255.499).round().astype(int) + vertices = [ + (*coord, *rgb) + for coord, rgb in zip( + coords.tolist(), + rgb.tolist(), + ) + ] + format = struct.Struct("<3f3B") + for item in vertices: + f.write(format.pack(*item)) + else: + format = struct.Struct("<3f") + for vertex in coords.tolist(): + f.write(format.pack(*vertex)) + + if faces is not None: + for tri in faces.tolist(): + f.write(format.pack(len(tri), *tri)) + format = struct.Struct(" str: + if is_opencv_available(): + import cv2 + else: + raise ImportError(BACKENDS_MAPPING["opencv"][1].format("export_to_video")) + if output_video_path is None: + output_video_path = tempfile.NamedTemporaryFile(suffix=".mp4").name + + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + h, w, c = video_frames[0].shape + video_writer = cv2.VideoWriter(output_video_path, fourcc, fps=8, frameSize=(w, h)) + for i in range(len(video_frames)): + img = cv2.cvtColor(video_frames[i], cv2.COLOR_RGB2BGR) + video_writer.write(img) + return output_video_path + + +def load_hf_numpy(path) -> np.ndarray: + base_url = "https://huggingface.co/datasets/fusing/diffusers-testing/resolve/main" + + if not path.startswith("http://") and not path.startswith("https://"): + path = os.path.join(base_url, urllib.parse.quote(path)) + + return load_numpy(path) + + +# --- pytest conf functions --- # + +# to avoid multiple invocation from tests/conftest.py and examples/conftest.py - make sure it's called only once +pytest_opt_registered = {} + + +def pytest_addoption_shared(parser): + """ + This function is to be called from `conftest.py` via `pytest_addoption` wrapper that has to be defined there. + + It allows loading both `conftest.py` files at once without causing a failure due to adding the same `pytest` + option. + + """ + option = "--make-reports" + if option not in pytest_opt_registered: + parser.addoption( + option, + action="store", + default=False, + help="generate report files. The value of this option is used as a prefix to report names", + ) + pytest_opt_registered[option] = 1 + + +def pytest_terminal_summary_main(tr, id): + """ + Generate multiple reports at the end of test suite run - each report goes into a dedicated file in the current + directory. The report files are prefixed with the test suite name. + + This function emulates --duration and -rA pytest arguments. + + This function is to be called from `conftest.py` via `pytest_terminal_summary` wrapper that has to be defined + there. + + Args: + - tr: `terminalreporter` passed from `conftest.py` + - id: unique id like `tests` or `examples` that will be incorporated into the final reports filenames - this is + needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other. + + NB: this functions taps into a private _pytest API and while unlikely, it could break should + pytest do internal changes - also it calls default internal methods of terminalreporter which + can be hijacked by various `pytest-` plugins and interfere. + + """ + from _pytest.config import create_terminal_writer + + if not len(id): + id = "tests" + + config = tr.config + orig_writer = config.get_terminal_writer() + orig_tbstyle = config.option.tbstyle + orig_reportchars = tr.reportchars + + dir = "reports" + Path(dir).mkdir(parents=True, exist_ok=True) + report_files = { + k: f"{dir}/{id}_{k}.txt" + for k in [ + "durations", + "errors", + "failures_long", + "failures_short", + "failures_line", + "passes", + "stats", + "summary_short", + "warnings", + ] + } + + # custom durations report + # note: there is no need to call pytest --durations=XX to get this separate report + # adapted from https://github.com/pytest-dev/pytest/blob/897f151e/src/_pytest/runner.py#L66 + dlist = [] + for replist in tr.stats.values(): + for rep in replist: + if hasattr(rep, "duration"): + dlist.append(rep) + if dlist: + dlist.sort(key=lambda x: x.duration, reverse=True) + with open(report_files["durations"], "w") as f: + durations_min = 0.05 # sec + f.write("slowest durations\n") + for i, rep in enumerate(dlist): + if rep.duration < durations_min: + f.write(f"{len(dlist) - i} durations < {durations_min} secs were omitted") + break + f.write(f"{rep.duration:02.2f}s {rep.when:<8} {rep.nodeid}\n") + + def summary_failures_short(tr): + # expecting that the reports were --tb=long (default) so we chop them off here to the last frame + reports = tr.getreports("failed") + if not reports: + return + tr.write_sep("=", "FAILURES SHORT STACK") + for rep in reports: + msg = tr._getfailureheadline(rep) + tr.write_sep("_", msg, red=True, bold=True) + # chop off the optional leading extra frames, leaving only the last one + longrepr = re.sub(r".*_ _ _ (_ ){10,}_ _ ", "", rep.longreprtext, 0, re.M | re.S) + tr._tw.line(longrepr) + # note: not printing out any rep.sections to keep the report short + + # use ready-made report funcs, we are just hijacking the filehandle to log to a dedicated file each + # adapted from https://github.com/pytest-dev/pytest/blob/897f151e/src/_pytest/terminal.py#L814 + # note: some pytest plugins may interfere by hijacking the default `terminalreporter` (e.g. + # pytest-instafail does that) + + # report failures with line/short/long styles + config.option.tbstyle = "auto" # full tb + with open(report_files["failures_long"], "w") as f: + tr._tw = create_terminal_writer(config, f) + tr.summary_failures() + + # config.option.tbstyle = "short" # short tb + with open(report_files["failures_short"], "w") as f: + tr._tw = create_terminal_writer(config, f) + summary_failures_short(tr) + + config.option.tbstyle = "line" # one line per error + with open(report_files["failures_line"], "w") as f: + tr._tw = create_terminal_writer(config, f) + tr.summary_failures() + + with open(report_files["errors"], "w") as f: + tr._tw = create_terminal_writer(config, f) + tr.summary_errors() + + with open(report_files["warnings"], "w") as f: + tr._tw = create_terminal_writer(config, f) + tr.summary_warnings() # normal warnings + tr.summary_warnings() # final warnings + + tr.reportchars = "wPpsxXEf" # emulate -rA (used in summary_passes() and short_test_summary()) + with open(report_files["passes"], "w") as f: + tr._tw = create_terminal_writer(config, f) + tr.summary_passes() + + with open(report_files["summary_short"], "w") as f: + tr._tw = create_terminal_writer(config, f) + tr.short_test_summary() + + with open(report_files["stats"], "w") as f: + tr._tw = create_terminal_writer(config, f) + tr.summary_stats() + + # restore: + tr._tw = orig_writer + tr.reportchars = orig_reportchars + config.option.tbstyle = orig_tbstyle + + +# Adapted from https://github.com/huggingface/transformers/blob/000e52aec8850d3fe2f360adc6fd256e5b47fe4c/src/transformers/testing_utils.py#L1905 +def is_flaky(max_attempts: int = 5, wait_before_retry: float | None = None, description: str | None = None): + """ + To decorate flaky tests (methods or entire classes). They will be retried on failures. + + Args: + max_attempts (`int`, *optional*, defaults to 5): + The maximum number of attempts to retry the flaky test. + wait_before_retry (`float`, *optional*): + If provided, will wait that number of seconds before retrying the test. + description (`str`, *optional*): + A string to describe the situation (what / where / why is flaky, link to GH issue/PR comments, errors, + etc.) + """ + + def decorator(obj): + # If decorating a class, wrap each test method on it + if inspect.isclass(obj): + for attr_name, attr_value in list(obj.__dict__.items()): + if callable(attr_value) and attr_name.startswith("test"): + # recursively decorate the method + setattr(obj, attr_name, decorator(attr_value)) + return obj + + # Otherwise we're decorating a single test function / method + @functools.wraps(obj) + def wrapper(*args, **kwargs): + retry_count = 1 + while retry_count < max_attempts: + try: + return obj(*args, **kwargs) + except Exception as err: + msg = ( + f"[FLAKY] {description or obj.__name__!r} " + f"failed on attempt {retry_count}/{max_attempts}: {err}" + ) + print(msg, file=sys.stderr) + if wait_before_retry is not None: + time.sleep(wait_before_retry) + retry_count += 1 + + return obj(*args, **kwargs) + + return wrapper + + return decorator + + +# Taken from: https://github.com/huggingface/transformers/blob/3658488ff77ff8d45101293e749263acf437f4d5/src/transformers/testing_utils.py#L1787 +def run_test_in_subprocess(test_case, target_func, inputs=None, timeout=None): + """ + To run a test in a subprocess. In particular, this can avoid (GPU) memory issue. + + Args: + test_case (`unittest.TestCase`): + The test that will run `target_func`. + target_func (`Callable`): + The function implementing the actual testing logic. + inputs (`dict`, *optional*, defaults to `None`): + The inputs that will be passed to `target_func` through an (input) queue. + timeout (`int`, *optional*, defaults to `None`): + The timeout (in seconds) that will be passed to the input and output queues. If not specified, the env. + variable `PYTEST_TIMEOUT` will be checked. If still `None`, its value will be set to `600`. + """ + if timeout is None: + timeout = int(os.environ.get("PYTEST_TIMEOUT", 600)) + + start_methohd = "spawn" + ctx = multiprocessing.get_context(start_methohd) + + input_queue = ctx.Queue(1) + output_queue = ctx.JoinableQueue(1) + + # We can't send `unittest.TestCase` to the child, otherwise we get issues regarding pickle. + input_queue.put(inputs, timeout=timeout) + + process = ctx.Process(target=target_func, args=(input_queue, output_queue, timeout)) + process.start() + # Kill the child process if we can't get outputs from it in time: otherwise, the hanging subprocess prevents + # the test to exit properly. + try: + results = output_queue.get(timeout=timeout) + output_queue.task_done() + except Exception as e: + process.terminate() + test_case.fail(e) + process.join(timeout=timeout) + + if results["error"] is not None: + test_case.fail(f"{results['error']}") + + +class CaptureLogger: + """ + Args: + Context manager to capture `logging` streams + logger: 'logging` logger object + Returns: + The captured output is available via `self.out` + Example: + ```python + >>> from diffusers import logging + >>> from diffusers.testing_utils import CaptureLogger + + >>> msg = "Testing 1, 2, 3" + >>> logging.set_verbosity_info() + >>> logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.py") + >>> with CaptureLogger(logger) as cl: + ... logger.info(msg) + >>> assert cl.out, msg + "\n" + ``` + """ + + def __init__(self, logger): + self.logger = logger + self.io = StringIO() + self.sh = logging.StreamHandler(self.io) + self.out = "" + + def __enter__(self): + self.logger.addHandler(self.sh) + return self + + def __exit__(self, *exc): + self.logger.removeHandler(self.sh) + self.out = self.io.getvalue() + + def __repr__(self): + return f"captured: {self.out}\n" + + +def enable_full_determinism(): + """ + Helper function for reproducible behavior during distributed training. See + - https://pytorch.org/docs/stable/notes/randomness.html for pytorch + """ + from .torch_utils import enable_full_determinism as _enable_full_determinism + + logger.warning( + "enable_full_determinism has been moved to diffusers.utils.torch_utils. " + "Importing from diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _enable_full_determinism() + + +def disable_full_determinism(): + from .torch_utils import disable_full_determinism as _disable_full_determinism + + logger.warning( + "disable_full_determinism has been moved to diffusers.utils.torch_utils. " + "Importing from diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _disable_full_determinism() + + +# Utils for custom and alternative accelerator devices +def _is_torch_fp16_available(device): + if not is_torch_available(): + return False + + import torch + + device = torch.device(device) + + try: + x = torch.zeros((2, 2), dtype=torch.float16).to(device) + _ = torch.mul(x, x) + return True + + except Exception as e: + if device.type == "cuda": + raise ValueError( + f"You have passed a device of type 'cuda' which should work with 'fp16', but 'cuda' does not seem to be correctly installed on your machine: {e}" + ) + + return False + + +def _is_torch_fp64_available(device): + if not is_torch_available(): + return False + + import torch + + device = torch.device(device) + + try: + x = torch.zeros((2, 2), dtype=torch.float64).to(device) + _ = torch.mul(x, x) + return True + + except Exception as e: + if device.type == "cuda": + raise ValueError( + f"You have passed a device of type 'cuda' which should work with 'fp64', but 'cuda' does not seem to be correctly installed on your machine: {e}" + ) + + return False + + +# Guard these lookups for when Torch is not used - alternative accelerator support is for PyTorch +if is_torch_available(): + # Behaviour flags + BACKEND_SUPPORTS_TRAINING = {"cuda": True, "xpu": True, "cpu": True, "mps": False, "default": True} + + # Function definitions + BACKEND_EMPTY_CACHE = { + "cuda": torch.cuda.empty_cache, + "xpu": torch.xpu.empty_cache, + "cpu": None, + "mps": torch.mps.empty_cache, + "default": None, + } + BACKEND_DEVICE_COUNT = { + "cuda": torch.cuda.device_count, + "xpu": torch.xpu.device_count, + "cpu": lambda: 0, + "mps": lambda: 0, + "default": 0, + } + BACKEND_MANUAL_SEED = { + "cuda": torch.cuda.manual_seed, + "xpu": torch.xpu.manual_seed, + "cpu": torch.manual_seed, + "mps": torch.mps.manual_seed, + "default": torch.manual_seed, + } + BACKEND_RESET_PEAK_MEMORY_STATS = { + "cuda": torch.cuda.reset_peak_memory_stats, + "xpu": getattr(torch.xpu, "reset_peak_memory_stats", None), + "cpu": None, + "mps": None, + "default": None, + } + BACKEND_RESET_MAX_MEMORY_ALLOCATED = { + "cuda": torch.cuda.reset_max_memory_allocated, + "xpu": getattr(torch.xpu, "reset_peak_memory_stats", None), + "cpu": None, + "mps": None, + "default": None, + } + BACKEND_MAX_MEMORY_ALLOCATED = { + "cuda": torch.cuda.max_memory_allocated, + "xpu": getattr(torch.xpu, "max_memory_allocated", None), + "cpu": 0, + "mps": 0, + "default": 0, + } + BACKEND_SYNCHRONIZE = { + "cuda": torch.cuda.synchronize, + "xpu": getattr(torch.xpu, "synchronize", None), + "cpu": None, + "mps": None, + "default": None, + } + + +# This dispatches a defined function according to the accelerator from the function definitions. +def _device_agnostic_dispatch(device: str, dispatch_table: dict[str, Callable], *args, **kwargs): + if device not in dispatch_table: + return dispatch_table["default"](*args, **kwargs) + + fn = dispatch_table[device] + + # Some device agnostic functions return values. Need to guard against 'None' instead at + # user level + if not callable(fn): + return fn + + return fn(*args, **kwargs) + + +# These are callables which automatically dispatch the function specific to the accelerator +def backend_manual_seed(device: str, seed: int): + from .torch_utils import backend_manual_seed as _backend_manual_seed + + logger.warning( + "backend_manual_seed has been moved to diffusers.utils.torch_utils. " + "diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _backend_manual_seed(device, seed) + + +def backend_synchronize(device: str): + from .torch_utils import backend_synchronize as _backend_synchronize + + logger.warning( + "backend_synchronize has been moved to diffusers.utils.torch_utils. " + "diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _backend_synchronize(device) + + +def backend_empty_cache(device: str): + from .torch_utils import backend_empty_cache as _backend_empty_cache + + logger.warning( + "backend_empty_cache has been moved to diffusers.utils.torch_utils. " + "diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _backend_empty_cache(device) + + +def backend_device_count(device: str): + from .torch_utils import backend_device_count as _backend_device_count + + logger.warning( + "backend_device_count has been moved to diffusers.utils.torch_utils. " + "diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _backend_device_count(device) + + +def backend_reset_peak_memory_stats(device: str): + from .torch_utils import backend_reset_peak_memory_stats as _backend_reset_peak_memory_stats + + logger.warning( + "backend_reset_peak_memory_stats has been moved to diffusers.utils.torch_utils. " + "diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _backend_reset_peak_memory_stats(device) + + +def backend_reset_max_memory_allocated(device: str): + from .torch_utils import backend_reset_max_memory_allocated as _backend_reset_max_memory_allocated + + logger.warning( + "backend_reset_max_memory_allocated has been moved to diffusers.utils.torch_utils. " + "diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _backend_reset_max_memory_allocated(device) + + +def backend_max_memory_allocated(device: str): + from .torch_utils import backend_max_memory_allocated as _backend_max_memory_allocated + + logger.warning( + "backend_max_memory_allocated has been moved to diffusers.utils.torch_utils. " + "diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _backend_max_memory_allocated(device) + + +# These are callables which return boolean behaviour flags and can be used to specify some +# device agnostic alternative where the feature is unsupported. +def backend_supports_training(device: str): + from .torch_utils import backend_supports_training as _backend_supports_training + + logger.warning( + "backend_supports_training has been moved to diffusers.utils.torch_utils. " + "diffusers.utils.testing_utils is deprecated and will be removed in a future version." + ) + return _backend_supports_training(device) + + +# Guard for when Torch is not available +if is_torch_available(): + # Update device function dict mapping + def update_mapping_from_spec(device_fn_dict: dict[str, Callable], attribute_name: str): + try: + # Try to import the function directly + spec_fn = getattr(device_spec_module, attribute_name) + device_fn_dict[torch_device] = spec_fn + except AttributeError as e: + # If the function doesn't exist, and there is no default, throw an error + if "default" not in device_fn_dict: + raise AttributeError( + f"`{attribute_name}` not found in '{device_spec_path}' and no default fallback function found." + ) from e + + if "DIFFUSERS_TEST_DEVICE_SPEC" in os.environ: + device_spec_path = os.environ["DIFFUSERS_TEST_DEVICE_SPEC"] + if not Path(device_spec_path).is_file(): + raise ValueError(f"Specified path to device specification file is not found. Received {device_spec_path}") + + try: + import_name = device_spec_path[: device_spec_path.index(".py")] + except ValueError as e: + raise ValueError(f"Provided device spec file is not a Python file! Received {device_spec_path}") from e + + device_spec_module = importlib.import_module(import_name) + + try: + device_name = device_spec_module.DEVICE_NAME + except AttributeError: + raise AttributeError("Device spec file did not contain `DEVICE_NAME`") + + if "DIFFUSERS_TEST_DEVICE" in os.environ and torch_device != device_name: + msg = f"Mismatch between environment variable `DIFFUSERS_TEST_DEVICE` '{torch_device}' and device found in spec '{device_name}'\n" + msg += "Either unset `DIFFUSERS_TEST_DEVICE` or ensure it matches device spec name." + raise ValueError(msg) + + torch_device = device_name + + # Add one entry here for each `BACKEND_*` dictionary. + update_mapping_from_spec(BACKEND_MANUAL_SEED, "MANUAL_SEED_FN") + update_mapping_from_spec(BACKEND_EMPTY_CACHE, "EMPTY_CACHE_FN") + update_mapping_from_spec(BACKEND_DEVICE_COUNT, "DEVICE_COUNT_FN") + update_mapping_from_spec(BACKEND_SUPPORTS_TRAINING, "SUPPORTS_TRAINING") + update_mapping_from_spec(BACKEND_RESET_PEAK_MEMORY_STATS, "RESET_PEAK_MEMORY_STATS_FN") + update_mapping_from_spec(BACKEND_RESET_MAX_MEMORY_ALLOCATED, "RESET_MAX_MEMORY_ALLOCATED_FN") + update_mapping_from_spec(BACKEND_MAX_MEMORY_ALLOCATED, "MAX_MEMORY_ALLOCATED_FN") + + +# Modified from https://github.com/huggingface/transformers/blob/cdfb018d0300fef3b07d9220f3efe9c2a9974662/src/transformers/testing_utils.py#L3090 + +# Type definition of key used in `Expectations` class. +DeviceProperties = tuple[str | None, int | None] + + +@functools.lru_cache +def get_device_properties() -> DeviceProperties: + """ + Get environment device properties. + """ + if IS_CUDA_SYSTEM or IS_ROCM_SYSTEM: + import torch + + major, _ = torch.cuda.get_device_capability() + if IS_ROCM_SYSTEM: + return ("rocm", major) + else: + return ("cuda", major) + elif IS_XPU_SYSTEM: + import torch + + # To get more info of the architecture meaning and bit allocation, refer to https://github.com/intel/llvm/blob/sycl/sycl/include/sycl/ext/oneapi/experimental/device_architecture.def + arch = torch.xpu.get_device_capability()["architecture"] + gen_mask = 0x000000FF00000000 + gen = (arch & gen_mask) >> 32 + return ("xpu", gen) + else: + return (torch_device, None) + + +if TYPE_CHECKING: + DevicePropertiesUserDict = UserDict[DeviceProperties, Any] +else: + DevicePropertiesUserDict = UserDict + +if is_torch_available(): + from diffusers.hooks._common import _GO_LC_SUPPORTED_PYTORCH_LAYERS + from diffusers.hooks.group_offloading import ( + _GROUP_ID_LAZY_LEAF, + _compute_group_hash, + _find_parent_module_in_module_dict, + _gather_buffers_with_no_group_offloading_parent, + _gather_parameters_with_no_group_offloading_parent, + ) + + def _get_expected_safetensors_files( + module: torch.nn.Module, + offload_to_disk_path: str, + offload_type: str, + num_blocks_per_group: int | None = None, + ) -> Set[str]: + expected_files = set() + + def get_hashed_filename(group_id: str) -> str: + short_hash = _compute_group_hash(group_id) + return os.path.join(offload_to_disk_path, f"group_{short_hash}.safetensors") + + if offload_type == "block_level": + if num_blocks_per_group is None: + raise ValueError("num_blocks_per_group must be provided for 'block_level' offloading.") + + # Handle groups of ModuleList and Sequential blocks + unmatched_modules = [] + for name, submodule in module.named_children(): + if not isinstance(submodule, (torch.nn.ModuleList, torch.nn.Sequential)): + unmatched_modules.append(module) + continue + + for i in range(0, len(submodule), num_blocks_per_group): + current_modules = submodule[i : i + num_blocks_per_group] + if not current_modules: + continue + group_id = f"{name}_{i}_{i + len(current_modules) - 1}" + expected_files.add(get_hashed_filename(group_id)) + + # Handle the group for unmatched top-level modules and parameters + for module in unmatched_modules: + expected_files.add(get_hashed_filename(f"{module.__class__.__name__}_unmatched_group")) + + elif offload_type == "leaf_level": + # Handle leaf-level module groups + for name, submodule in module.named_modules(): + if isinstance(submodule, _GO_LC_SUPPORTED_PYTORCH_LAYERS): + # These groups will always have parameters, so a file is expected + expected_files.add(get_hashed_filename(name)) + + # Handle groups for non-leaf parameters/buffers + modules_with_group_offloading = { + name for name, sm in module.named_modules() if isinstance(sm, _GO_LC_SUPPORTED_PYTORCH_LAYERS) + } + parameters = _gather_parameters_with_no_group_offloading_parent(module, modules_with_group_offloading) + buffers = _gather_buffers_with_no_group_offloading_parent(module, modules_with_group_offloading) + + all_orphans = parameters + buffers + if all_orphans: + parent_to_tensors = {} + module_dict = dict(module.named_modules()) + for tensor_name, _ in all_orphans: + parent_name = _find_parent_module_in_module_dict(tensor_name, module_dict) + if parent_name not in parent_to_tensors: + parent_to_tensors[parent_name] = [] + parent_to_tensors[parent_name].append(tensor_name) + + for parent_name in parent_to_tensors: + # A file is expected for each parent that gathers orphaned tensors + expected_files.add(get_hashed_filename(parent_name)) + expected_files.add(get_hashed_filename(_GROUP_ID_LAZY_LEAF)) + + else: + raise ValueError(f"Unsupported offload_type: {offload_type}") + + return expected_files + + def _check_safetensors_serialization( + module: torch.nn.Module, + offload_to_disk_path: str, + offload_type: str, + num_blocks_per_group: int | None = None, + ) -> bool: + if not os.path.isdir(offload_to_disk_path): + return False, None, None + + expected_files = _get_expected_safetensors_files( + module, offload_to_disk_path, offload_type, num_blocks_per_group + ) + actual_files = set(glob.glob(os.path.join(offload_to_disk_path, "*.safetensors"))) + missing_files = expected_files - actual_files + extra_files = actual_files - expected_files + + is_correct = not missing_files and not extra_files + return is_correct, extra_files, missing_files + + +class Expectations(DevicePropertiesUserDict): + def get_expectation(self) -> Any: + """ + Find best matching expectation based on environment device properties. + """ + return self.find_expectation(get_device_properties()) + + @staticmethod + def is_default(key: DeviceProperties) -> bool: + return all(p is None for p in key) + + @staticmethod + def score(key: DeviceProperties, other: DeviceProperties) -> int: + """ + Returns score indicating how similar two instances of the `Properties` tuple are. Points are calculated using + bits, but documented as int. Rules are as follows: + * Matching `type` gives 8 points. + * Semi-matching `type`, for example cuda and rocm, gives 4 points. + * Matching `major` (compute capability major version) gives 2 points. + * Default expectation (if present) gives 1 points. + """ + (device_type, major) = key + (other_device_type, other_major) = other + + score = 0b0 + if device_type == other_device_type: + score |= 0b1000 + elif device_type in ["cuda", "rocm"] and other_device_type in ["cuda", "rocm"]: + score |= 0b100 + + if major == other_major and other_major is not None: + score |= 0b10 + + if Expectations.is_default(other): + score |= 0b1 + + return int(score) + + def find_expectation(self, key: DeviceProperties = (None, None)) -> Any: + """ + Find best matching expectation based on provided device properties. + """ + (result_key, result) = max(self.data.items(), key=lambda x: Expectations.score(key, x[0])) + + if Expectations.score(key, result_key) == 0: + raise ValueError(f"No matching expectation found for {key}") + + return result + + def __repr__(self): + return f"{self.data}" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/torch_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/torch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7f4cb3e12766a51fef8a87b9de0a1f2a9f2cc989 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/torch_utils.py @@ -0,0 +1,337 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +PyTorch utilities: Utilities related to PyTorch +""" + +from __future__ import annotations + +import functools +import os + +from . import logging +from .import_utils import is_torch_available, is_torch_mlu_available, is_torch_npu_available, is_torch_version + + +if is_torch_available(): + import torch + from torch.fft import fftn, fftshift, ifftn, ifftshift + + BACKEND_SUPPORTS_TRAINING = {"cuda": True, "xpu": True, "cpu": True, "mps": False, "default": True} + BACKEND_EMPTY_CACHE = { + "cuda": torch.cuda.empty_cache, + "xpu": torch.xpu.empty_cache, + "cpu": None, + "mps": torch.mps.empty_cache, + "default": None, + } + BACKEND_DEVICE_COUNT = { + "cuda": torch.cuda.device_count, + "xpu": torch.xpu.device_count, + "cpu": lambda: 0, + "mps": lambda: 0, + "default": 0, + } + BACKEND_MANUAL_SEED = { + "cuda": torch.cuda.manual_seed, + "xpu": torch.xpu.manual_seed, + "cpu": torch.manual_seed, + "mps": torch.mps.manual_seed, + "default": torch.manual_seed, + } + BACKEND_RESET_PEAK_MEMORY_STATS = { + "cuda": torch.cuda.reset_peak_memory_stats, + "xpu": getattr(torch.xpu, "reset_peak_memory_stats", None), + "cpu": None, + "mps": None, + "default": None, + } + BACKEND_RESET_MAX_MEMORY_ALLOCATED = { + "cuda": torch.cuda.reset_max_memory_allocated, + "xpu": getattr(torch.xpu, "reset_peak_memory_stats", None), + "cpu": None, + "mps": None, + "default": None, + } + BACKEND_MAX_MEMORY_ALLOCATED = { + "cuda": torch.cuda.max_memory_allocated, + "xpu": getattr(torch.xpu, "max_memory_allocated", None), + "cpu": 0, + "mps": 0, + "default": 0, + } + BACKEND_SYNCHRONIZE = { + "cuda": torch.cuda.synchronize, + "xpu": getattr(torch.xpu, "synchronize", None), + "cpu": None, + "mps": None, + "default": None, + } +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + +try: + from torch._dynamo import allow_in_graph as maybe_allow_in_graph +except (ImportError, ModuleNotFoundError): + + def maybe_allow_in_graph(cls): + return cls + + +# This dispatches a defined function according to the accelerator from the function definitions. +def _device_agnostic_dispatch(device: str, dispatch_table: dict[str, callable], *args, **kwargs): + if device not in dispatch_table: + return dispatch_table["default"](*args, **kwargs) + + fn = dispatch_table[device] + + # Some device agnostic functions return values. Need to guard against 'None' instead at + # user level + if not callable(fn): + return fn + + return fn(*args, **kwargs) + + +# These are callables which automatically dispatch the function specific to the accelerator +def backend_manual_seed(device: str, seed: int): + return _device_agnostic_dispatch(device, BACKEND_MANUAL_SEED, seed) + + +def backend_synchronize(device: str): + return _device_agnostic_dispatch(device, BACKEND_SYNCHRONIZE) + + +def backend_empty_cache(device: str): + return _device_agnostic_dispatch(device, BACKEND_EMPTY_CACHE) + + +def backend_device_count(device: str): + return _device_agnostic_dispatch(device, BACKEND_DEVICE_COUNT) + + +def backend_reset_peak_memory_stats(device: str): + return _device_agnostic_dispatch(device, BACKEND_RESET_PEAK_MEMORY_STATS) + + +def backend_reset_max_memory_allocated(device: str): + return _device_agnostic_dispatch(device, BACKEND_RESET_MAX_MEMORY_ALLOCATED) + + +def backend_max_memory_allocated(device: str): + return _device_agnostic_dispatch(device, BACKEND_MAX_MEMORY_ALLOCATED) + + +# These are callables which return boolean behaviour flags and can be used to specify some +# device agnostic alternative where the feature is unsupported. +def backend_supports_training(device: str): + if not is_torch_available(): + return False + + if device not in BACKEND_SUPPORTS_TRAINING: + device = "default" + + return BACKEND_SUPPORTS_TRAINING[device] + + +def randn_tensor( + shape: tuple | list, + generator: list["torch.Generator"] | "torch.Generator" | None = None, + device: str | "torch.device" | None = None, + dtype: "torch.dtype" | None = None, + layout: "torch.layout" | None = None, +): + """A helper function to create random tensors on the desired `device` with the desired `dtype`. When + passing a list of generators, you can seed each batch size individually. If CPU generators are passed, the tensor + is always created on the CPU. + """ + # device on which tensor is created defaults to device + if isinstance(device, str): + device = torch.device(device) + rand_device = device + batch_size = shape[0] + + layout = layout or torch.strided + device = device or torch.device("cpu") + + if generator is not None: + gen_device_type = generator.device.type if not isinstance(generator, list) else generator[0].device.type + if gen_device_type != device.type and gen_device_type == "cpu": + rand_device = "cpu" + if device != "mps": + logger.info( + f"The passed generator was created on 'cpu' even though a tensor on {device} was expected." + f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably" + f" slightly speed up this function by passing a generator that was created on the {device} device." + ) + elif gen_device_type != device.type and gen_device_type == "cuda": + raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.") + + # make sure generator list of length 1 is treated like a non-list + if isinstance(generator, list) and len(generator) == 1: + generator = generator[0] + + if isinstance(generator, list): + shape = (1,) + shape[1:] + latents = [ + torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype, layout=layout) + for i in range(batch_size) + ] + latents = torch.cat(latents, dim=0).to(device) + else: + latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype, layout=layout).to(device) + + return latents + + +def is_compiled_module(module) -> bool: + """Check whether the module was compiled with torch.compile()""" + if is_torch_version("<", "2.0.0") or not hasattr(torch, "_dynamo"): + return False + return isinstance(module, torch._dynamo.eval_frame.OptimizedModule) + + +def unwrap_module(module): + """Unwraps a module if it was compiled with torch.compile()""" + return module._orig_mod if is_compiled_module(module) else module + + +def fourier_filter(x_in: "torch.Tensor", threshold: int, scale: int) -> "torch.Tensor": + """Fourier filter as introduced in FreeU (https://huggingface.co/papers/2309.11497). + + This version of the method comes from here: + https://github.com/huggingface/diffusers/pull/5164#issuecomment-1732638706 + """ + x = x_in + B, C, H, W = x.shape + + # Non-power of 2 images must be float32 + if (W & (W - 1)) != 0 or (H & (H - 1)) != 0: + x = x.to(dtype=torch.float32) + # fftn does not support bfloat16 + elif x.dtype == torch.bfloat16: + x = x.to(dtype=torch.float32) + + # FFT + x_freq = fftn(x, dim=(-2, -1)) + x_freq = fftshift(x_freq, dim=(-2, -1)) + + B, C, H, W = x_freq.shape + mask = torch.ones((B, C, H, W), device=x.device) + + crow, ccol = H // 2, W // 2 + mask[..., crow - threshold : crow + threshold, ccol - threshold : ccol + threshold] = scale + x_freq = x_freq * mask + + # IFFT + x_freq = ifftshift(x_freq, dim=(-2, -1)) + x_filtered = ifftn(x_freq, dim=(-2, -1)).real + + return x_filtered.to(dtype=x_in.dtype) + + +def apply_freeu( + resolution_idx: int, hidden_states: "torch.Tensor", res_hidden_states: "torch.Tensor", **freeu_kwargs +) -> tuple["torch.Tensor", "torch.Tensor"]: + """Applies the FreeU mechanism as introduced in https: + //arxiv.org/abs/2309.11497. Adapted from the official code repository: https://github.com/ChenyangSi/FreeU. + + Args: + resolution_idx (`int`): Integer denoting the UNet block where FreeU is being applied. + hidden_states (`torch.Tensor`): Inputs to the underlying block. + res_hidden_states (`torch.Tensor`): Features from the skip block corresponding to the underlying block. + s1 (`float`): Scaling factor for stage 1 to attenuate the contributions of the skip features. + s2 (`float`): Scaling factor for stage 2 to attenuate the contributions of the skip features. + b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. + b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. + """ + if resolution_idx == 0: + num_half_channels = hidden_states.shape[1] // 2 + hidden_states[:, :num_half_channels] = hidden_states[:, :num_half_channels] * freeu_kwargs["b1"] + res_hidden_states = fourier_filter(res_hidden_states, threshold=1, scale=freeu_kwargs["s1"]) + if resolution_idx == 1: + num_half_channels = hidden_states.shape[1] // 2 + hidden_states[:, :num_half_channels] = hidden_states[:, :num_half_channels] * freeu_kwargs["b2"] + res_hidden_states = fourier_filter(res_hidden_states, threshold=1, scale=freeu_kwargs["s2"]) + + return hidden_states, res_hidden_states + + +def get_torch_cuda_device_capability(): + if torch.cuda.is_available(): + device = torch.device("cuda") + compute_capability = torch.cuda.get_device_capability(device) + compute_capability = f"{compute_capability[0]}.{compute_capability[1]}" + return float(compute_capability) + else: + return None + + +@functools.lru_cache +def get_device(): + if torch.cuda.is_available(): + return "cuda" + elif is_torch_npu_available(): + return "npu" + elif hasattr(torch, "xpu") and torch.xpu.is_available(): + return "xpu" + elif torch.backends.mps.is_available(): + return "mps" + elif is_torch_mlu_available(): + return "mlu" + else: + return "cpu" + + +def empty_device_cache(device_type: str | None = None): + if device_type is None: + device_type = get_device() + if device_type in ["cpu"]: + return + device_mod = getattr(torch, device_type, torch.cuda) + device_mod.empty_cache() + + +def device_synchronize(device_type: str | None = None): + if device_type is None: + device_type = get_device() + device_mod = getattr(torch, device_type, torch.cuda) + device_mod.synchronize() + + +def enable_full_determinism(): + """ + Helper function for reproducible behavior during distributed training. See + - https://pytorch.org/docs/stable/notes/randomness.html for pytorch + """ + # Enable PyTorch deterministic mode. This potentially requires either the environment + # variable 'CUDA_LAUNCH_BLOCKING' or 'CUBLAS_WORKSPACE_CONFIG' to be set, + # depending on the CUDA version, so we set them both here + os.environ["CUDA_LAUNCH_BLOCKING"] = "1" + os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8" + torch.use_deterministic_algorithms(True) + + # Enable CUDNN deterministic mode + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + torch.backends.cuda.matmul.allow_tf32 = False + + +def disable_full_determinism(): + os.environ["CUDA_LAUNCH_BLOCKING"] = "0" + os.environ["CUBLAS_WORKSPACE_CONFIG"] = "" + torch.use_deterministic_algorithms(False) + + +if is_torch_available(): + torch_device = get_device() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/typing_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/typing_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e910932f33d93f0b2f8919227eda9042687e323f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/typing_utils.py @@ -0,0 +1,91 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Typing utilities: Utilities related to type checking and validation +""" + +from typing import Any, Dict, List, Set, Tuple, Type, Union, get_args, get_origin + + +def _is_valid_type(obj: Any, class_or_tuple: Type | tuple[Type, ...]) -> bool: + """ + Checks if an object is an instance of any of the provided types. For collections, it checks if every element is of + the correct type as well. + """ + if not isinstance(class_or_tuple, tuple): + class_or_tuple = (class_or_tuple,) + + # Unpack unions + unpacked_class_or_tuple = [] + for t in class_or_tuple: + if get_origin(t) is Union: + unpacked_class_or_tuple.extend(get_args(t)) + else: + unpacked_class_or_tuple.append(t) + class_or_tuple = tuple(unpacked_class_or_tuple) + + if Any in class_or_tuple: + return True + + obj_type = type(obj) + # Classes with obj's type + class_or_tuple = {t for t in class_or_tuple if isinstance(obj, get_origin(t) or t)} + + # Singular types (e.g. int, ControlNet, ...) + # Untyped collections (e.g. List, but not List[int]) + elem_class_or_tuple = {get_args(t) for t in class_or_tuple} + if () in elem_class_or_tuple: + return True + # Typed lists or sets + elif obj_type in (list, set): + return any(all(_is_valid_type(x, t) for x in obj) for t in elem_class_or_tuple) + # Typed tuples + elif obj_type is tuple: + return any( + # Tuples with any length and single type (e.g. Tuple[int, ...]) + (len(t) == 2 and t[-1] is Ellipsis and all(_is_valid_type(x, t[0]) for x in obj)) + or + # Tuples with fixed length and any types (e.g. Tuple[int, str]) + (len(obj) == len(t) and all(_is_valid_type(x, tt) for x, tt in zip(obj, t))) + for t in elem_class_or_tuple + ) + # Typed dicts + elif obj_type is dict: + return any( + all(_is_valid_type(k, kt) and _is_valid_type(v, vt) for k, v in obj.items()) + for kt, vt in elem_class_or_tuple + ) + + else: + return False + + +def _get_detailed_type(obj: Any) -> Type: + """ + Gets a detailed type for an object, including nested types for collections. + """ + obj_type = type(obj) + + if obj_type in (list, set): + obj_origin_type = List if obj_type is list else Set + elems_type = Union[tuple({_get_detailed_type(x) for x in obj})] + return obj_origin_type[elems_type] + elif obj_type is tuple: + return Tuple[tuple(_get_detailed_type(x) for x in obj)] + elif obj_type is dict: + keys_type = Union[tuple({_get_detailed_type(k) for k in obj.keys()})] + values_type = Union[tuple({_get_detailed_type(k) for k in obj.values()})] + return Dict[keys_type, values_type] + else: + return obj_type diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/versions.py b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/versions.py new file mode 100644 index 0000000000000000000000000000000000000000..ccede7b5ee5737f7e76fe9e00f9f836eb695d16a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers/utils/versions.py @@ -0,0 +1,116 @@ +# Copyright 2020 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Utilities for working with package versions +""" + +import importlib.metadata +import operator +import re +import sys + +from packaging import version + + +ops = { + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, +} + + +def _compare_versions(op, got_ver, want_ver, requirement, pkg, hint): + if got_ver is None or want_ver is None: + raise ValueError( + f"Unable to compare versions for {requirement}: need={want_ver} found={got_ver}. This is unusual. Consider" + f" reinstalling {pkg}." + ) + if not ops[op](version.parse(got_ver), version.parse(want_ver)): + raise ImportError( + f"{requirement} is required for a normal functioning of this module, but found {pkg}=={got_ver}.{hint}" + ) + + +def require_version(requirement: str, hint: str | None = None) -> None: + """ + Perform a runtime check of the dependency versions, using the exact same syntax used by pip. + + The installed module version comes from the *site-packages* dir via *importlib.metadata*. + + Args: + requirement (`str`): pip style definition, e.g., "tokenizers==0.9.4", "tqdm>=4.27", "numpy" + hint (`str`, *optional*): what suggestion to print in case of requirements not being met + + Example: + + ```python + require_version("pandas>1.1.2") + require_version("numpy>1.18.5", "this is important to have for whatever reason") + ```""" + + hint = f"\n{hint}" if hint is not None else "" + + # non-versioned check + if re.match(r"^[\w_\-\d]+$", requirement): + pkg, op, want_ver = requirement, None, None + else: + match = re.findall(r"^([^!=<>\s]+)([\s!=<>]{1,2}.+)", requirement) + if not match: + raise ValueError( + "requirement needs to be in the pip package format, .e.g., package_a==1.23, or package_b>=1.23, but" + f" got {requirement}" + ) + pkg, want_full = match[0] + want_range = want_full.split(",") # there could be multiple requirements + wanted = {} + for w in want_range: + match = re.findall(r"^([\s!=<>]{1,2})(.+)", w) + if not match: + raise ValueError( + "requirement needs to be in the pip package format, .e.g., package_a==1.23, or package_b>=1.23," + f" but got {requirement}" + ) + op, want_ver = match[0] + wanted[op] = want_ver + if op not in ops: + raise ValueError(f"{requirement}: need one of {list(ops.keys())}, but got {op}") + + # special case + if pkg == "python": + got_ver = ".".join([str(x) for x in sys.version_info[:3]]) + for op, want_ver in wanted.items(): + _compare_versions(op, got_ver, want_ver, requirement, pkg, hint) + return + + # check if any version is installed + try: + got_ver = importlib.metadata.version(pkg) + except importlib.metadata.PackageNotFoundError: + raise importlib.metadata.PackageNotFoundError( + f"The '{requirement}' distribution was not found and is required by this application. {hint}" + ) + + # check that the right version is installed if version number or a range was provided + if want_ver is not None: + for op, want_ver in wanted.items(): + _compare_versions(op, got_ver, want_ver, requirement, pkg, hint) + + +def require_version_core(requirement): + """require_version wrapper which emits a core-specific hint on failure""" + hint = "Try: pip install transformers -U or pip install -e '.[dev]' if you're working with git main" + return require_version(requirement, hint) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6674c5d18cc1ad7b365de26c14957b45bd194e66 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/licenses/LICENSE.md b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/licenses/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..311b2b56c53f678ab95fc0def708c675d521a807 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/licenses/LICENSE.md @@ -0,0 +1,27 @@ +Copyright © 2020, [Encode OSS Ltd](https://www.encode.io/). +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..91436a20b9a048842a670c5f6a2f31286eee856a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_api.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_api.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd52a7015fc9cd5df1e4b49757d9952f16a75c97 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_api.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_exceptions.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_exceptions.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36b4c6e89a624e3d631ac13832a3707945ed9a48 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_exceptions.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_models.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_models.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49f4af293d31dc1b22ba3926b6dba30b188cd67e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_models.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_ssl.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_ssl.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eea8951330f05159da1778adbf1d3379bafdce0f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_ssl.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_synchronization.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_synchronization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3be5c8615245abd040e5c5e42698267ffeae621b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_synchronization.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_trace.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_trace.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb861c29fc3f8bd8025c3b7534b319705edb6c64 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_trace.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d535651763c04237ea049482055929640f5a7d76 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/__pycache__/_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..88dc7f01e132933728cbcf45c88ce82e85ddf65f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__init__.py @@ -0,0 +1,39 @@ +from .connection import AsyncHTTPConnection +from .connection_pool import AsyncConnectionPool +from .http11 import AsyncHTTP11Connection +from .http_proxy import AsyncHTTPProxy +from .interfaces import AsyncConnectionInterface + +try: + from .http2 import AsyncHTTP2Connection +except ImportError: # pragma: nocover + + class AsyncHTTP2Connection: # type: ignore + def __init__(self, *args, **kwargs) -> None: # type: ignore + raise RuntimeError( + "Attempted to use http2 support, but the `h2` package is not " + "installed. Use 'pip install httpcore[http2]'." + ) + + +try: + from .socks_proxy import AsyncSOCKSProxy +except ImportError: # pragma: nocover + + class AsyncSOCKSProxy: # type: ignore + def __init__(self, *args, **kwargs) -> None: # type: ignore + raise RuntimeError( + "Attempted to use SOCKS support, but the `socksio` package is not " + "installed. Use 'pip install httpcore[socks]'." + ) + + +__all__ = [ + "AsyncHTTPConnection", + "AsyncConnectionPool", + "AsyncHTTPProxy", + "AsyncHTTP11Connection", + "AsyncHTTP2Connection", + "AsyncConnectionInterface", + "AsyncSOCKSProxy", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f82a291780caf448d1f4ea7488311c946724cc31 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/connection.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/connection.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd20a68526a870d4739781156213edb65060fe83 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/connection.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/connection_pool.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/connection_pool.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70f80672bb726f2178eb1781ff5531fa25b22b0d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/connection_pool.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http11.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http11.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c88b92c2d0ebd9ca85335d9214ca375dd7d5c6f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http11.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3215f0a831917115117d3014105f382c3b5bccf7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http_proxy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http_proxy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad463a59f9df6479721e088906e8906155c546b6 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/http_proxy.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/interfaces.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/interfaces.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..20748e5ec8c0a7a85297d3e1f2324de59526dabf Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/interfaces.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/socks_proxy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/socks_proxy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6cc076173653d073b4c4c499a7fed4fe06829bde Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/__pycache__/socks_proxy.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/connection.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/connection.py new file mode 100644 index 0000000000000000000000000000000000000000..b42581dff8aabf4c2ef80ffda26296e1b368d693 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/connection.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +import itertools +import logging +import ssl +import types +import typing + +from .._backends.auto import AutoBackend +from .._backends.base import SOCKET_OPTION, AsyncNetworkBackend, AsyncNetworkStream +from .._exceptions import ConnectError, ConnectTimeout +from .._models import Origin, Request, Response +from .._ssl import default_ssl_context +from .._synchronization import AsyncLock +from .._trace import Trace +from .http11 import AsyncHTTP11Connection +from .interfaces import AsyncConnectionInterface + +RETRIES_BACKOFF_FACTOR = 0.5 # 0s, 0.5s, 1s, 2s, 4s, etc. + + +logger = logging.getLogger("httpcore.connection") + + +def exponential_backoff(factor: float) -> typing.Iterator[float]: + """ + Generate a geometric sequence that has a ratio of 2 and starts with 0. + + For example: + - `factor = 2`: `0, 2, 4, 8, 16, 32, 64, ...` + - `factor = 3`: `0, 3, 6, 12, 24, 48, 96, ...` + """ + yield 0 + for n in itertools.count(): + yield factor * 2**n + + +class AsyncHTTPConnection(AsyncConnectionInterface): + def __init__( + self, + origin: Origin, + ssl_context: ssl.SSLContext | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + retries: int = 0, + local_address: str | None = None, + uds: str | None = None, + network_backend: AsyncNetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> None: + self._origin = origin + self._ssl_context = ssl_context + self._keepalive_expiry = keepalive_expiry + self._http1 = http1 + self._http2 = http2 + self._retries = retries + self._local_address = local_address + self._uds = uds + + self._network_backend: AsyncNetworkBackend = ( + AutoBackend() if network_backend is None else network_backend + ) + self._connection: AsyncConnectionInterface | None = None + self._connect_failed: bool = False + self._request_lock = AsyncLock() + self._socket_options = socket_options + + async def handle_async_request(self, request: Request) -> Response: + if not self.can_handle_request(request.url.origin): + raise RuntimeError( + f"Attempted to send request to {request.url.origin} on connection to {self._origin}" + ) + + try: + async with self._request_lock: + if self._connection is None: + stream = await self._connect(request) + + ssl_object = stream.get_extra_info("ssl_object") + http2_negotiated = ( + ssl_object is not None + and ssl_object.selected_alpn_protocol() == "h2" + ) + if http2_negotiated or (self._http2 and not self._http1): + from .http2 import AsyncHTTP2Connection + + self._connection = AsyncHTTP2Connection( + origin=self._origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + else: + self._connection = AsyncHTTP11Connection( + origin=self._origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + except BaseException as exc: + self._connect_failed = True + raise exc + + return await self._connection.handle_async_request(request) + + async def _connect(self, request: Request) -> AsyncNetworkStream: + timeouts = request.extensions.get("timeout", {}) + sni_hostname = request.extensions.get("sni_hostname", None) + timeout = timeouts.get("connect", None) + + retries_left = self._retries + delays = exponential_backoff(factor=RETRIES_BACKOFF_FACTOR) + + while True: + try: + if self._uds is None: + kwargs = { + "host": self._origin.host.decode("ascii"), + "port": self._origin.port, + "local_address": self._local_address, + "timeout": timeout, + "socket_options": self._socket_options, + } + async with Trace("connect_tcp", logger, request, kwargs) as trace: + stream = await self._network_backend.connect_tcp(**kwargs) + trace.return_value = stream + else: + kwargs = { + "path": self._uds, + "timeout": timeout, + "socket_options": self._socket_options, + } + async with Trace( + "connect_unix_socket", logger, request, kwargs + ) as trace: + stream = await self._network_backend.connect_unix_socket( + **kwargs + ) + trace.return_value = stream + + if self._origin.scheme in (b"https", b"wss"): + ssl_context = ( + default_ssl_context() + if self._ssl_context is None + else self._ssl_context + ) + alpn_protocols = ["http/1.1", "h2"] if self._http2 else ["http/1.1"] + ssl_context.set_alpn_protocols(alpn_protocols) + + kwargs = { + "ssl_context": ssl_context, + "server_hostname": sni_hostname + or self._origin.host.decode("ascii"), + "timeout": timeout, + } + async with Trace("start_tls", logger, request, kwargs) as trace: + stream = await stream.start_tls(**kwargs) + trace.return_value = stream + return stream + except (ConnectError, ConnectTimeout): + if retries_left <= 0: + raise + retries_left -= 1 + delay = next(delays) + async with Trace("retry", logger, request, kwargs) as trace: + await self._network_backend.sleep(delay) + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._origin + + async def aclose(self) -> None: + if self._connection is not None: + async with Trace("close", logger, None, {}): + await self._connection.aclose() + + def is_available(self) -> bool: + if self._connection is None: + # If HTTP/2 support is enabled, and the resulting connection could + # end up as HTTP/2 then we should indicate the connection as being + # available to service multiple requests. + return ( + self._http2 + and (self._origin.scheme == b"https" or not self._http1) + and not self._connect_failed + ) + return self._connection.is_available() + + def has_expired(self) -> bool: + if self._connection is None: + return self._connect_failed + return self._connection.has_expired() + + def is_idle(self) -> bool: + if self._connection is None: + return self._connect_failed + return self._connection.is_idle() + + def is_closed(self) -> bool: + if self._connection is None: + return self._connect_failed + return self._connection.is_closed() + + def info(self) -> str: + if self._connection is None: + return "CONNECTION FAILED" if self._connect_failed else "CONNECTING" + return self._connection.info() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.info()}]>" + + # These context managers are not used in the standard flow, but are + # useful for testing or working with connection instances directly. + + async def __aenter__(self) -> AsyncHTTPConnection: + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + await self.aclose() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/connection_pool.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/connection_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..96e973d0ce223f6bed9be9e6a6a2f3c01622c611 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/connection_pool.py @@ -0,0 +1,420 @@ +from __future__ import annotations + +import ssl +import sys +import types +import typing + +from .._backends.auto import AutoBackend +from .._backends.base import SOCKET_OPTION, AsyncNetworkBackend +from .._exceptions import ConnectionNotAvailable, UnsupportedProtocol +from .._models import Origin, Proxy, Request, Response +from .._synchronization import AsyncEvent, AsyncShieldCancellation, AsyncThreadLock +from .connection import AsyncHTTPConnection +from .interfaces import AsyncConnectionInterface, AsyncRequestInterface + + +class AsyncPoolRequest: + def __init__(self, request: Request) -> None: + self.request = request + self.connection: AsyncConnectionInterface | None = None + self._connection_acquired = AsyncEvent() + + def assign_to_connection(self, connection: AsyncConnectionInterface | None) -> None: + self.connection = connection + self._connection_acquired.set() + + def clear_connection(self) -> None: + self.connection = None + self._connection_acquired = AsyncEvent() + + async def wait_for_connection( + self, timeout: float | None = None + ) -> AsyncConnectionInterface: + if self.connection is None: + await self._connection_acquired.wait(timeout=timeout) + assert self.connection is not None + return self.connection + + def is_queued(self) -> bool: + return self.connection is None + + +class AsyncConnectionPool(AsyncRequestInterface): + """ + A connection pool for making HTTP requests. + """ + + def __init__( + self, + ssl_context: ssl.SSLContext | None = None, + proxy: Proxy | None = None, + max_connections: int | None = 10, + max_keepalive_connections: int | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + retries: int = 0, + local_address: str | None = None, + uds: str | None = None, + network_backend: AsyncNetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> None: + """ + A connection pool for making HTTP requests. + + Parameters: + ssl_context: An SSL context to use for verifying connections. + If not specified, the default `httpcore.default_ssl_context()` + will be used. + max_connections: The maximum number of concurrent HTTP connections that + the pool should allow. Any attempt to send a request on a pool that + would exceed this amount will block until a connection is available. + max_keepalive_connections: The maximum number of idle HTTP connections + that will be maintained in the pool. + keepalive_expiry: The duration in seconds that an idle HTTP connection + may be maintained for before being expired from the pool. + http1: A boolean indicating if HTTP/1.1 requests should be supported + by the connection pool. Defaults to True. + http2: A boolean indicating if HTTP/2 requests should be supported by + the connection pool. Defaults to False. + retries: The maximum number of retries when trying to establish a + connection. + local_address: Local address to connect from. Can also be used to connect + using a particular address family. Using `local_address="0.0.0.0"` + will connect using an `AF_INET` address (IPv4), while using + `local_address="::"` will connect using an `AF_INET6` address (IPv6). + uds: Path to a Unix Domain Socket to use instead of TCP sockets. + network_backend: A backend instance to use for handling network I/O. + socket_options: Socket options that have to be included + in the TCP socket when the connection was established. + """ + self._ssl_context = ssl_context + self._proxy = proxy + self._max_connections = ( + sys.maxsize if max_connections is None else max_connections + ) + self._max_keepalive_connections = ( + sys.maxsize + if max_keepalive_connections is None + else max_keepalive_connections + ) + self._max_keepalive_connections = min( + self._max_connections, self._max_keepalive_connections + ) + + self._keepalive_expiry = keepalive_expiry + self._http1 = http1 + self._http2 = http2 + self._retries = retries + self._local_address = local_address + self._uds = uds + + self._network_backend = ( + AutoBackend() if network_backend is None else network_backend + ) + self._socket_options = socket_options + + # The mutable state on a connection pool is the queue of incoming requests, + # and the set of connections that are servicing those requests. + self._connections: list[AsyncConnectionInterface] = [] + self._requests: list[AsyncPoolRequest] = [] + + # We only mutate the state of the connection pool within an 'optional_thread_lock' + # context. This holds a threading lock unless we're running in async mode, + # in which case it is a no-op. + self._optional_thread_lock = AsyncThreadLock() + + def create_connection(self, origin: Origin) -> AsyncConnectionInterface: + if self._proxy is not None: + if self._proxy.url.scheme in (b"socks5", b"socks5h"): + from .socks_proxy import AsyncSocks5Connection + + return AsyncSocks5Connection( + proxy_origin=self._proxy.url.origin, + proxy_auth=self._proxy.auth, + remote_origin=origin, + ssl_context=self._ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + network_backend=self._network_backend, + ) + elif origin.scheme == b"http": + from .http_proxy import AsyncForwardHTTPConnection + + return AsyncForwardHTTPConnection( + proxy_origin=self._proxy.url.origin, + proxy_headers=self._proxy.headers, + proxy_ssl_context=self._proxy.ssl_context, + remote_origin=origin, + keepalive_expiry=self._keepalive_expiry, + network_backend=self._network_backend, + ) + from .http_proxy import AsyncTunnelHTTPConnection + + return AsyncTunnelHTTPConnection( + proxy_origin=self._proxy.url.origin, + proxy_headers=self._proxy.headers, + proxy_ssl_context=self._proxy.ssl_context, + remote_origin=origin, + ssl_context=self._ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + network_backend=self._network_backend, + ) + + return AsyncHTTPConnection( + origin=origin, + ssl_context=self._ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + retries=self._retries, + local_address=self._local_address, + uds=self._uds, + network_backend=self._network_backend, + socket_options=self._socket_options, + ) + + @property + def connections(self) -> list[AsyncConnectionInterface]: + """ + Return a list of the connections currently in the pool. + + For example: + + ```python + >>> pool.connections + [ + , + , + , + ] + ``` + """ + return list(self._connections) + + async def handle_async_request(self, request: Request) -> Response: + """ + Send an HTTP request, and return an HTTP response. + + This is the core implementation that is called into by `.request()` or `.stream()`. + """ + scheme = request.url.scheme.decode() + if scheme == "": + raise UnsupportedProtocol( + "Request URL is missing an 'http://' or 'https://' protocol." + ) + if scheme not in ("http", "https", "ws", "wss"): + raise UnsupportedProtocol( + f"Request URL has an unsupported protocol '{scheme}://'." + ) + + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("pool", None) + + with self._optional_thread_lock: + # Add the incoming request to our request queue. + pool_request = AsyncPoolRequest(request) + self._requests.append(pool_request) + + try: + while True: + with self._optional_thread_lock: + # Assign incoming requests to available connections, + # closing or creating new connections as required. + closing = self._assign_requests_to_connections() + await self._close_connections(closing) + + # Wait until this request has an assigned connection. + connection = await pool_request.wait_for_connection(timeout=timeout) + + try: + # Send the request on the assigned connection. + response = await connection.handle_async_request( + pool_request.request + ) + except ConnectionNotAvailable: + # In some cases a connection may initially be available to + # handle a request, but then become unavailable. + # + # In this case we clear the connection and try again. + pool_request.clear_connection() + else: + break # pragma: nocover + + except BaseException as exc: + with self._optional_thread_lock: + # For any exception or cancellation we remove the request from + # the queue, and then re-assign requests to connections. + self._requests.remove(pool_request) + closing = self._assign_requests_to_connections() + + await self._close_connections(closing) + raise exc from None + + # Return the response. Note that in this case we still have to manage + # the point at which the response is closed. + assert isinstance(response.stream, typing.AsyncIterable) + return Response( + status=response.status, + headers=response.headers, + content=PoolByteStream( + stream=response.stream, pool_request=pool_request, pool=self + ), + extensions=response.extensions, + ) + + def _assign_requests_to_connections(self) -> list[AsyncConnectionInterface]: + """ + Manage the state of the connection pool, assigning incoming + requests to connections as available. + + Called whenever a new request is added or removed from the pool. + + Any closing connections are returned, allowing the I/O for closing + those connections to be handled seperately. + """ + closing_connections = [] + + # First we handle cleaning up any connections that are closed, + # have expired their keep-alive, or surplus idle connections. + for connection in list(self._connections): + if connection.is_closed(): + # log: "removing closed connection" + self._connections.remove(connection) + elif connection.has_expired(): + # log: "closing expired connection" + self._connections.remove(connection) + closing_connections.append(connection) + elif ( + connection.is_idle() + and len([connection.is_idle() for connection in self._connections]) + > self._max_keepalive_connections + ): + # log: "closing idle connection" + self._connections.remove(connection) + closing_connections.append(connection) + + # Assign queued requests to connections. + queued_requests = [request for request in self._requests if request.is_queued()] + for pool_request in queued_requests: + origin = pool_request.request.url.origin + available_connections = [ + connection + for connection in self._connections + if connection.can_handle_request(origin) and connection.is_available() + ] + idle_connections = [ + connection for connection in self._connections if connection.is_idle() + ] + + # There are three cases for how we may be able to handle the request: + # + # 1. There is an existing connection that can handle the request. + # 2. We can create a new connection to handle the request. + # 3. We can close an idle connection and then create a new connection + # to handle the request. + if available_connections: + # log: "reusing existing connection" + connection = available_connections[0] + pool_request.assign_to_connection(connection) + elif len(self._connections) < self._max_connections: + # log: "creating new connection" + connection = self.create_connection(origin) + self._connections.append(connection) + pool_request.assign_to_connection(connection) + elif idle_connections: + # log: "closing idle connection" + connection = idle_connections[0] + self._connections.remove(connection) + closing_connections.append(connection) + # log: "creating new connection" + connection = self.create_connection(origin) + self._connections.append(connection) + pool_request.assign_to_connection(connection) + + return closing_connections + + async def _close_connections(self, closing: list[AsyncConnectionInterface]) -> None: + # Close connections which have been removed from the pool. + with AsyncShieldCancellation(): + for connection in closing: + await connection.aclose() + + async def aclose(self) -> None: + # Explicitly close the connection pool. + # Clears all existing requests and connections. + with self._optional_thread_lock: + closing_connections = list(self._connections) + self._connections = [] + await self._close_connections(closing_connections) + + async def __aenter__(self) -> AsyncConnectionPool: + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + await self.aclose() + + def __repr__(self) -> str: + class_name = self.__class__.__name__ + with self._optional_thread_lock: + request_is_queued = [request.is_queued() for request in self._requests] + connection_is_idle = [ + connection.is_idle() for connection in self._connections + ] + + num_active_requests = request_is_queued.count(False) + num_queued_requests = request_is_queued.count(True) + num_active_connections = connection_is_idle.count(False) + num_idle_connections = connection_is_idle.count(True) + + requests_info = ( + f"Requests: {num_active_requests} active, {num_queued_requests} queued" + ) + connection_info = ( + f"Connections: {num_active_connections} active, {num_idle_connections} idle" + ) + + return f"<{class_name} [{requests_info} | {connection_info}]>" + + +class PoolByteStream: + def __init__( + self, + stream: typing.AsyncIterable[bytes], + pool_request: AsyncPoolRequest, + pool: AsyncConnectionPool, + ) -> None: + self._stream = stream + self._pool_request = pool_request + self._pool = pool + self._closed = False + + async def __aiter__(self) -> typing.AsyncIterator[bytes]: + try: + async for part in self._stream: + yield part + except BaseException as exc: + await self.aclose() + raise exc from None + + async def aclose(self) -> None: + if not self._closed: + self._closed = True + with AsyncShieldCancellation(): + if hasattr(self._stream, "aclose"): + await self._stream.aclose() + + with self._pool._optional_thread_lock: + self._pool._requests.remove(self._pool_request) + closing = self._pool._assign_requests_to_connections() + + await self._pool._close_connections(closing) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http11.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http11.py new file mode 100644 index 0000000000000000000000000000000000000000..e6d6d709852b137a862cfe2b3af42dc790fa705d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http11.py @@ -0,0 +1,379 @@ +from __future__ import annotations + +import enum +import logging +import ssl +import time +import types +import typing + +import h11 + +from .._backends.base import AsyncNetworkStream +from .._exceptions import ( + ConnectionNotAvailable, + LocalProtocolError, + RemoteProtocolError, + WriteError, + map_exceptions, +) +from .._models import Origin, Request, Response +from .._synchronization import AsyncLock, AsyncShieldCancellation +from .._trace import Trace +from .interfaces import AsyncConnectionInterface + +logger = logging.getLogger("httpcore.http11") + + +# A subset of `h11.Event` types supported by `_send_event` +H11SendEvent = typing.Union[ + h11.Request, + h11.Data, + h11.EndOfMessage, +] + + +class HTTPConnectionState(enum.IntEnum): + NEW = 0 + ACTIVE = 1 + IDLE = 2 + CLOSED = 3 + + +class AsyncHTTP11Connection(AsyncConnectionInterface): + READ_NUM_BYTES = 64 * 1024 + MAX_INCOMPLETE_EVENT_SIZE = 100 * 1024 + + def __init__( + self, + origin: Origin, + stream: AsyncNetworkStream, + keepalive_expiry: float | None = None, + ) -> None: + self._origin = origin + self._network_stream = stream + self._keepalive_expiry: float | None = keepalive_expiry + self._expire_at: float | None = None + self._state = HTTPConnectionState.NEW + self._state_lock = AsyncLock() + self._request_count = 0 + self._h11_state = h11.Connection( + our_role=h11.CLIENT, + max_incomplete_event_size=self.MAX_INCOMPLETE_EVENT_SIZE, + ) + + async def handle_async_request(self, request: Request) -> Response: + if not self.can_handle_request(request.url.origin): + raise RuntimeError( + f"Attempted to send request to {request.url.origin} on connection " + f"to {self._origin}" + ) + + async with self._state_lock: + if self._state in (HTTPConnectionState.NEW, HTTPConnectionState.IDLE): + self._request_count += 1 + self._state = HTTPConnectionState.ACTIVE + self._expire_at = None + else: + raise ConnectionNotAvailable() + + try: + kwargs = {"request": request} + try: + async with Trace( + "send_request_headers", logger, request, kwargs + ) as trace: + await self._send_request_headers(**kwargs) + async with Trace("send_request_body", logger, request, kwargs) as trace: + await self._send_request_body(**kwargs) + except WriteError: + # If we get a write error while we're writing the request, + # then we supress this error and move on to attempting to + # read the response. Servers can sometimes close the request + # pre-emptively and then respond with a well formed HTTP + # error response. + pass + + async with Trace( + "receive_response_headers", logger, request, kwargs + ) as trace: + ( + http_version, + status, + reason_phrase, + headers, + trailing_data, + ) = await self._receive_response_headers(**kwargs) + trace.return_value = ( + http_version, + status, + reason_phrase, + headers, + ) + + network_stream = self._network_stream + + # CONNECT or Upgrade request + if (status == 101) or ( + (request.method == b"CONNECT") and (200 <= status < 300) + ): + network_stream = AsyncHTTP11UpgradeStream(network_stream, trailing_data) + + return Response( + status=status, + headers=headers, + content=HTTP11ConnectionByteStream(self, request), + extensions={ + "http_version": http_version, + "reason_phrase": reason_phrase, + "network_stream": network_stream, + }, + ) + except BaseException as exc: + with AsyncShieldCancellation(): + async with Trace("response_closed", logger, request) as trace: + await self._response_closed() + raise exc + + # Sending the request... + + async def _send_request_headers(self, request: Request) -> None: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("write", None) + + with map_exceptions({h11.LocalProtocolError: LocalProtocolError}): + event = h11.Request( + method=request.method, + target=request.url.target, + headers=request.headers, + ) + await self._send_event(event, timeout=timeout) + + async def _send_request_body(self, request: Request) -> None: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("write", None) + + assert isinstance(request.stream, typing.AsyncIterable) + async for chunk in request.stream: + event = h11.Data(data=chunk) + await self._send_event(event, timeout=timeout) + + await self._send_event(h11.EndOfMessage(), timeout=timeout) + + async def _send_event(self, event: h11.Event, timeout: float | None = None) -> None: + bytes_to_send = self._h11_state.send(event) + if bytes_to_send is not None: + await self._network_stream.write(bytes_to_send, timeout=timeout) + + # Receiving the response... + + async def _receive_response_headers( + self, request: Request + ) -> tuple[bytes, int, bytes, list[tuple[bytes, bytes]], bytes]: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("read", None) + + while True: + event = await self._receive_event(timeout=timeout) + if isinstance(event, h11.Response): + break + if ( + isinstance(event, h11.InformationalResponse) + and event.status_code == 101 + ): + break + + http_version = b"HTTP/" + event.http_version + + # h11 version 0.11+ supports a `raw_items` interface to get the + # raw header casing, rather than the enforced lowercase headers. + headers = event.headers.raw_items() + + trailing_data, _ = self._h11_state.trailing_data + + return http_version, event.status_code, event.reason, headers, trailing_data + + async def _receive_response_body( + self, request: Request + ) -> typing.AsyncIterator[bytes]: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("read", None) + + while True: + event = await self._receive_event(timeout=timeout) + if isinstance(event, h11.Data): + yield bytes(event.data) + elif isinstance(event, (h11.EndOfMessage, h11.PAUSED)): + break + + async def _receive_event( + self, timeout: float | None = None + ) -> h11.Event | type[h11.PAUSED]: + while True: + with map_exceptions({h11.RemoteProtocolError: RemoteProtocolError}): + event = self._h11_state.next_event() + + if event is h11.NEED_DATA: + data = await self._network_stream.read( + self.READ_NUM_BYTES, timeout=timeout + ) + + # If we feed this case through h11 we'll raise an exception like: + # + # httpcore.RemoteProtocolError: can't handle event type + # ConnectionClosed when role=SERVER and state=SEND_RESPONSE + # + # Which is accurate, but not very informative from an end-user + # perspective. Instead we handle this case distinctly and treat + # it as a ConnectError. + if data == b"" and self._h11_state.their_state == h11.SEND_RESPONSE: + msg = "Server disconnected without sending a response." + raise RemoteProtocolError(msg) + + self._h11_state.receive_data(data) + else: + # mypy fails to narrow the type in the above if statement above + return event # type: ignore[return-value] + + async def _response_closed(self) -> None: + async with self._state_lock: + if ( + self._h11_state.our_state is h11.DONE + and self._h11_state.their_state is h11.DONE + ): + self._state = HTTPConnectionState.IDLE + self._h11_state.start_next_cycle() + if self._keepalive_expiry is not None: + now = time.monotonic() + self._expire_at = now + self._keepalive_expiry + else: + await self.aclose() + + # Once the connection is no longer required... + + async def aclose(self) -> None: + # Note that this method unilaterally closes the connection, and does + # not have any kind of locking in place around it. + self._state = HTTPConnectionState.CLOSED + await self._network_stream.aclose() + + # The AsyncConnectionInterface methods provide information about the state of + # the connection, allowing for a connection pooling implementation to + # determine when to reuse and when to close the connection... + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._origin + + def is_available(self) -> bool: + # Note that HTTP/1.1 connections in the "NEW" state are not treated as + # being "available". The control flow which created the connection will + # be able to send an outgoing request, but the connection will not be + # acquired from the connection pool for any other request. + return self._state == HTTPConnectionState.IDLE + + def has_expired(self) -> bool: + now = time.monotonic() + keepalive_expired = self._expire_at is not None and now > self._expire_at + + # If the HTTP connection is idle but the socket is readable, then the + # only valid state is that the socket is about to return b"", indicating + # a server-initiated disconnect. + server_disconnected = ( + self._state == HTTPConnectionState.IDLE + and self._network_stream.get_extra_info("is_readable") + ) + + return keepalive_expired or server_disconnected + + def is_idle(self) -> bool: + return self._state == HTTPConnectionState.IDLE + + def is_closed(self) -> bool: + return self._state == HTTPConnectionState.CLOSED + + def info(self) -> str: + origin = str(self._origin) + return ( + f"{origin!r}, HTTP/1.1, {self._state.name}, " + f"Request Count: {self._request_count}" + ) + + def __repr__(self) -> str: + class_name = self.__class__.__name__ + origin = str(self._origin) + return ( + f"<{class_name} [{origin!r}, {self._state.name}, " + f"Request Count: {self._request_count}]>" + ) + + # These context managers are not used in the standard flow, but are + # useful for testing or working with connection instances directly. + + async def __aenter__(self) -> AsyncHTTP11Connection: + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + await self.aclose() + + +class HTTP11ConnectionByteStream: + def __init__(self, connection: AsyncHTTP11Connection, request: Request) -> None: + self._connection = connection + self._request = request + self._closed = False + + async def __aiter__(self) -> typing.AsyncIterator[bytes]: + kwargs = {"request": self._request} + try: + async with Trace("receive_response_body", logger, self._request, kwargs): + async for chunk in self._connection._receive_response_body(**kwargs): + yield chunk + except BaseException as exc: + # If we get an exception while streaming the response, + # we want to close the response (and possibly the connection) + # before raising that exception. + with AsyncShieldCancellation(): + await self.aclose() + raise exc + + async def aclose(self) -> None: + if not self._closed: + self._closed = True + async with Trace("response_closed", logger, self._request): + await self._connection._response_closed() + + +class AsyncHTTP11UpgradeStream(AsyncNetworkStream): + def __init__(self, stream: AsyncNetworkStream, leading_data: bytes) -> None: + self._stream = stream + self._leading_data = leading_data + + async def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + if self._leading_data: + buffer = self._leading_data[:max_bytes] + self._leading_data = self._leading_data[max_bytes:] + return buffer + else: + return await self._stream.read(max_bytes, timeout) + + async def write(self, buffer: bytes, timeout: float | None = None) -> None: + await self._stream.write(buffer, timeout) + + async def aclose(self) -> None: + await self._stream.aclose() + + async def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> AsyncNetworkStream: + return await self._stream.start_tls(ssl_context, server_hostname, timeout) + + def get_extra_info(self, info: str) -> typing.Any: + return self._stream.get_extra_info(info) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http2.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http2.py new file mode 100644 index 0000000000000000000000000000000000000000..c6434a049696dd43b8c0dac7fcbca2e4cb0394b1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http2.py @@ -0,0 +1,583 @@ +from __future__ import annotations + +import enum +import logging +import time +import types +import typing + +import h2.config +import h2.connection +import h2.events +import h2.exceptions +import h2.settings + +from .._backends.base import AsyncNetworkStream +from .._exceptions import ( + ConnectionNotAvailable, + LocalProtocolError, + RemoteProtocolError, +) +from .._models import Origin, Request, Response +from .._synchronization import AsyncLock, AsyncSemaphore, AsyncShieldCancellation +from .._trace import Trace +from .interfaces import AsyncConnectionInterface + +logger = logging.getLogger("httpcore.http2") + + +def has_body_headers(request: Request) -> bool: + return any( + k.lower() == b"content-length" or k.lower() == b"transfer-encoding" + for k, v in request.headers + ) + + +class HTTPConnectionState(enum.IntEnum): + ACTIVE = 1 + IDLE = 2 + CLOSED = 3 + + +class AsyncHTTP2Connection(AsyncConnectionInterface): + READ_NUM_BYTES = 64 * 1024 + CONFIG = h2.config.H2Configuration(validate_inbound_headers=False) + + def __init__( + self, + origin: Origin, + stream: AsyncNetworkStream, + keepalive_expiry: float | None = None, + ): + self._origin = origin + self._network_stream = stream + self._keepalive_expiry: float | None = keepalive_expiry + self._h2_state = h2.connection.H2Connection(config=self.CONFIG) + self._state = HTTPConnectionState.IDLE + self._expire_at: float | None = None + self._request_count = 0 + self._init_lock = AsyncLock() + self._state_lock = AsyncLock() + self._read_lock = AsyncLock() + self._write_lock = AsyncLock() + self._sent_connection_init = False + self._used_all_stream_ids = False + self._connection_error = False + + # Mapping from stream ID to response stream events. + self._events: dict[ + int, + h2.events.ResponseReceived + | h2.events.DataReceived + | h2.events.StreamEnded + | h2.events.StreamReset, + ] = {} + + # Connection terminated events are stored as state since + # we need to handle them for all streams. + self._connection_terminated: h2.events.ConnectionTerminated | None = None + + self._read_exception: Exception | None = None + self._write_exception: Exception | None = None + + async def handle_async_request(self, request: Request) -> Response: + if not self.can_handle_request(request.url.origin): + # This cannot occur in normal operation, since the connection pool + # will only send requests on connections that handle them. + # It's in place simply for resilience as a guard against incorrect + # usage, for anyone working directly with httpcore connections. + raise RuntimeError( + f"Attempted to send request to {request.url.origin} on connection " + f"to {self._origin}" + ) + + async with self._state_lock: + if self._state in (HTTPConnectionState.ACTIVE, HTTPConnectionState.IDLE): + self._request_count += 1 + self._expire_at = None + self._state = HTTPConnectionState.ACTIVE + else: + raise ConnectionNotAvailable() + + async with self._init_lock: + if not self._sent_connection_init: + try: + kwargs = {"request": request} + async with Trace("send_connection_init", logger, request, kwargs): + await self._send_connection_init(**kwargs) + except BaseException as exc: + with AsyncShieldCancellation(): + await self.aclose() + raise exc + + self._sent_connection_init = True + + # Initially start with just 1 until the remote server provides + # its max_concurrent_streams value + self._max_streams = 1 + + local_settings_max_streams = ( + self._h2_state.local_settings.max_concurrent_streams + ) + self._max_streams_semaphore = AsyncSemaphore(local_settings_max_streams) + + for _ in range(local_settings_max_streams - self._max_streams): + await self._max_streams_semaphore.acquire() + + await self._max_streams_semaphore.acquire() + + try: + stream_id = self._h2_state.get_next_available_stream_id() + self._events[stream_id] = [] + except h2.exceptions.NoAvailableStreamIDError: # pragma: nocover + self._used_all_stream_ids = True + self._request_count -= 1 + raise ConnectionNotAvailable() + + try: + kwargs = {"request": request, "stream_id": stream_id} + async with Trace("send_request_headers", logger, request, kwargs): + await self._send_request_headers(request=request, stream_id=stream_id) + async with Trace("send_request_body", logger, request, kwargs): + await self._send_request_body(request=request, stream_id=stream_id) + async with Trace( + "receive_response_headers", logger, request, kwargs + ) as trace: + status, headers = await self._receive_response( + request=request, stream_id=stream_id + ) + trace.return_value = (status, headers) + + return Response( + status=status, + headers=headers, + content=HTTP2ConnectionByteStream(self, request, stream_id=stream_id), + extensions={ + "http_version": b"HTTP/2", + "network_stream": self._network_stream, + "stream_id": stream_id, + }, + ) + except BaseException as exc: # noqa: PIE786 + with AsyncShieldCancellation(): + kwargs = {"stream_id": stream_id} + async with Trace("response_closed", logger, request, kwargs): + await self._response_closed(stream_id=stream_id) + + if isinstance(exc, h2.exceptions.ProtocolError): + # One case where h2 can raise a protocol error is when a + # closed frame has been seen by the state machine. + # + # This happens when one stream is reading, and encounters + # a GOAWAY event. Other flows of control may then raise + # a protocol error at any point they interact with the 'h2_state'. + # + # In this case we'll have stored the event, and should raise + # it as a RemoteProtocolError. + if self._connection_terminated: # pragma: nocover + raise RemoteProtocolError(self._connection_terminated) + # If h2 raises a protocol error in some other state then we + # must somehow have made a protocol violation. + raise LocalProtocolError(exc) # pragma: nocover + + raise exc + + async def _send_connection_init(self, request: Request) -> None: + """ + The HTTP/2 connection requires some initial setup before we can start + using individual request/response streams on it. + """ + # Need to set these manually here instead of manipulating via + # __setitem__() otherwise the H2Connection will emit SettingsUpdate + # frames in addition to sending the undesired defaults. + self._h2_state.local_settings = h2.settings.Settings( + client=True, + initial_values={ + # Disable PUSH_PROMISE frames from the server since we don't do anything + # with them for now. Maybe when we support caching? + h2.settings.SettingCodes.ENABLE_PUSH: 0, + # These two are taken from h2 for safe defaults + h2.settings.SettingCodes.MAX_CONCURRENT_STREAMS: 100, + h2.settings.SettingCodes.MAX_HEADER_LIST_SIZE: 65536, + }, + ) + + # Some websites (*cough* Yahoo *cough*) balk at this setting being + # present in the initial handshake since it's not defined in the original + # RFC despite the RFC mandating ignoring settings you don't know about. + del self._h2_state.local_settings[ + h2.settings.SettingCodes.ENABLE_CONNECT_PROTOCOL + ] + + self._h2_state.initiate_connection() + self._h2_state.increment_flow_control_window(2**24) + await self._write_outgoing_data(request) + + # Sending the request... + + async def _send_request_headers(self, request: Request, stream_id: int) -> None: + """ + Send the request headers to a given stream ID. + """ + end_stream = not has_body_headers(request) + + # In HTTP/2 the ':authority' pseudo-header is used instead of 'Host'. + # In order to gracefully handle HTTP/1.1 and HTTP/2 we always require + # HTTP/1.1 style headers, and map them appropriately if we end up on + # an HTTP/2 connection. + authority = [v for k, v in request.headers if k.lower() == b"host"][0] + + headers = [ + (b":method", request.method), + (b":authority", authority), + (b":scheme", request.url.scheme), + (b":path", request.url.target), + ] + [ + (k.lower(), v) + for k, v in request.headers + if k.lower() + not in ( + b"host", + b"transfer-encoding", + ) + ] + + self._h2_state.send_headers(stream_id, headers, end_stream=end_stream) + self._h2_state.increment_flow_control_window(2**24, stream_id=stream_id) + await self._write_outgoing_data(request) + + async def _send_request_body(self, request: Request, stream_id: int) -> None: + """ + Iterate over the request body sending it to a given stream ID. + """ + if not has_body_headers(request): + return + + assert isinstance(request.stream, typing.AsyncIterable) + async for data in request.stream: + await self._send_stream_data(request, stream_id, data) + await self._send_end_stream(request, stream_id) + + async def _send_stream_data( + self, request: Request, stream_id: int, data: bytes + ) -> None: + """ + Send a single chunk of data in one or more data frames. + """ + while data: + max_flow = await self._wait_for_outgoing_flow(request, stream_id) + chunk_size = min(len(data), max_flow) + chunk, data = data[:chunk_size], data[chunk_size:] + self._h2_state.send_data(stream_id, chunk) + await self._write_outgoing_data(request) + + async def _send_end_stream(self, request: Request, stream_id: int) -> None: + """ + Send an empty data frame on on a given stream ID with the END_STREAM flag set. + """ + self._h2_state.end_stream(stream_id) + await self._write_outgoing_data(request) + + # Receiving the response... + + async def _receive_response( + self, request: Request, stream_id: int + ) -> tuple[int, list[tuple[bytes, bytes]]]: + """ + Return the response status code and headers for a given stream ID. + """ + while True: + event = await self._receive_stream_event(request, stream_id) + if isinstance(event, h2.events.ResponseReceived): + break + + status_code = 200 + headers = [] + for k, v in event.headers: + if k == b":status": + status_code = int(v.decode("ascii", errors="ignore")) + elif not k.startswith(b":"): + headers.append((k, v)) + + return (status_code, headers) + + async def _receive_response_body( + self, request: Request, stream_id: int + ) -> typing.AsyncIterator[bytes]: + """ + Iterator that returns the bytes of the response body for a given stream ID. + """ + while True: + event = await self._receive_stream_event(request, stream_id) + if isinstance(event, h2.events.DataReceived): + amount = event.flow_controlled_length + self._h2_state.acknowledge_received_data(amount, stream_id) + await self._write_outgoing_data(request) + yield event.data + elif isinstance(event, h2.events.StreamEnded): + break + + async def _receive_stream_event( + self, request: Request, stream_id: int + ) -> h2.events.ResponseReceived | h2.events.DataReceived | h2.events.StreamEnded: + """ + Return the next available event for a given stream ID. + + Will read more data from the network if required. + """ + while not self._events.get(stream_id): + await self._receive_events(request, stream_id) + event = self._events[stream_id].pop(0) + if isinstance(event, h2.events.StreamReset): + raise RemoteProtocolError(event) + return event + + async def _receive_events( + self, request: Request, stream_id: int | None = None + ) -> None: + """ + Read some data from the network until we see one or more events + for a given stream ID. + """ + async with self._read_lock: + if self._connection_terminated is not None: + last_stream_id = self._connection_terminated.last_stream_id + if stream_id and last_stream_id and stream_id > last_stream_id: + self._request_count -= 1 + raise ConnectionNotAvailable() + raise RemoteProtocolError(self._connection_terminated) + + # This conditional is a bit icky. We don't want to block reading if we've + # actually got an event to return for a given stream. We need to do that + # check *within* the atomic read lock. Though it also need to be optional, + # because when we call it from `_wait_for_outgoing_flow` we *do* want to + # block until we've available flow control, event when we have events + # pending for the stream ID we're attempting to send on. + if stream_id is None or not self._events.get(stream_id): + events = await self._read_incoming_data(request) + for event in events: + if isinstance(event, h2.events.RemoteSettingsChanged): + async with Trace( + "receive_remote_settings", logger, request + ) as trace: + await self._receive_remote_settings_change(event) + trace.return_value = event + + elif isinstance( + event, + ( + h2.events.ResponseReceived, + h2.events.DataReceived, + h2.events.StreamEnded, + h2.events.StreamReset, + ), + ): + if event.stream_id in self._events: + self._events[event.stream_id].append(event) + + elif isinstance(event, h2.events.ConnectionTerminated): + self._connection_terminated = event + + await self._write_outgoing_data(request) + + async def _receive_remote_settings_change(self, event: h2.events.Event) -> None: + max_concurrent_streams = event.changed_settings.get( + h2.settings.SettingCodes.MAX_CONCURRENT_STREAMS + ) + if max_concurrent_streams: + new_max_streams = min( + max_concurrent_streams.new_value, + self._h2_state.local_settings.max_concurrent_streams, + ) + if new_max_streams and new_max_streams != self._max_streams: + while new_max_streams > self._max_streams: + await self._max_streams_semaphore.release() + self._max_streams += 1 + while new_max_streams < self._max_streams: + await self._max_streams_semaphore.acquire() + self._max_streams -= 1 + + async def _response_closed(self, stream_id: int) -> None: + await self._max_streams_semaphore.release() + del self._events[stream_id] + async with self._state_lock: + if self._connection_terminated and not self._events: + await self.aclose() + + elif self._state == HTTPConnectionState.ACTIVE and not self._events: + self._state = HTTPConnectionState.IDLE + if self._keepalive_expiry is not None: + now = time.monotonic() + self._expire_at = now + self._keepalive_expiry + if self._used_all_stream_ids: # pragma: nocover + await self.aclose() + + async def aclose(self) -> None: + # Note that this method unilaterally closes the connection, and does + # not have any kind of locking in place around it. + self._h2_state.close_connection() + self._state = HTTPConnectionState.CLOSED + await self._network_stream.aclose() + + # Wrappers around network read/write operations... + + async def _read_incoming_data(self, request: Request) -> list[h2.events.Event]: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("read", None) + + if self._read_exception is not None: + raise self._read_exception # pragma: nocover + + try: + data = await self._network_stream.read(self.READ_NUM_BYTES, timeout) + if data == b"": + raise RemoteProtocolError("Server disconnected") + except Exception as exc: + # If we get a network error we should: + # + # 1. Save the exception and just raise it immediately on any future reads. + # (For example, this means that a single read timeout or disconnect will + # immediately close all pending streams. Without requiring multiple + # sequential timeouts.) + # 2. Mark the connection as errored, so that we don't accept any other + # incoming requests. + self._read_exception = exc + self._connection_error = True + raise exc + + events: list[h2.events.Event] = self._h2_state.receive_data(data) + + return events + + async def _write_outgoing_data(self, request: Request) -> None: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("write", None) + + async with self._write_lock: + data_to_send = self._h2_state.data_to_send() + + if self._write_exception is not None: + raise self._write_exception # pragma: nocover + + try: + await self._network_stream.write(data_to_send, timeout) + except Exception as exc: # pragma: nocover + # If we get a network error we should: + # + # 1. Save the exception and just raise it immediately on any future write. + # (For example, this means that a single write timeout or disconnect will + # immediately close all pending streams. Without requiring multiple + # sequential timeouts.) + # 2. Mark the connection as errored, so that we don't accept any other + # incoming requests. + self._write_exception = exc + self._connection_error = True + raise exc + + # Flow control... + + async def _wait_for_outgoing_flow(self, request: Request, stream_id: int) -> int: + """ + Returns the maximum allowable outgoing flow for a given stream. + + If the allowable flow is zero, then waits on the network until + WindowUpdated frames have increased the flow rate. + https://tools.ietf.org/html/rfc7540#section-6.9 + """ + local_flow: int = self._h2_state.local_flow_control_window(stream_id) + max_frame_size: int = self._h2_state.max_outbound_frame_size + flow = min(local_flow, max_frame_size) + while flow == 0: + await self._receive_events(request) + local_flow = self._h2_state.local_flow_control_window(stream_id) + max_frame_size = self._h2_state.max_outbound_frame_size + flow = min(local_flow, max_frame_size) + return flow + + # Interface for connection pooling... + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._origin + + def is_available(self) -> bool: + return ( + self._state != HTTPConnectionState.CLOSED + and not self._connection_error + and not self._used_all_stream_ids + and not ( + self._h2_state.state_machine.state + == h2.connection.ConnectionState.CLOSED + ) + ) + + def has_expired(self) -> bool: + now = time.monotonic() + return self._expire_at is not None and now > self._expire_at + + def is_idle(self) -> bool: + return self._state == HTTPConnectionState.IDLE + + def is_closed(self) -> bool: + return self._state == HTTPConnectionState.CLOSED + + def info(self) -> str: + origin = str(self._origin) + return ( + f"{origin!r}, HTTP/2, {self._state.name}, " + f"Request Count: {self._request_count}" + ) + + def __repr__(self) -> str: + class_name = self.__class__.__name__ + origin = str(self._origin) + return ( + f"<{class_name} [{origin!r}, {self._state.name}, " + f"Request Count: {self._request_count}]>" + ) + + # These context managers are not used in the standard flow, but are + # useful for testing or working with connection instances directly. + + async def __aenter__(self) -> AsyncHTTP2Connection: + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + await self.aclose() + + +class HTTP2ConnectionByteStream: + def __init__( + self, connection: AsyncHTTP2Connection, request: Request, stream_id: int + ) -> None: + self._connection = connection + self._request = request + self._stream_id = stream_id + self._closed = False + + async def __aiter__(self) -> typing.AsyncIterator[bytes]: + kwargs = {"request": self._request, "stream_id": self._stream_id} + try: + async with Trace("receive_response_body", logger, self._request, kwargs): + async for chunk in self._connection._receive_response_body( + request=self._request, stream_id=self._stream_id + ): + yield chunk + except BaseException as exc: + # If we get an exception while streaming the response, + # we want to close the response (and possibly the connection) + # before raising that exception. + with AsyncShieldCancellation(): + await self.aclose() + raise exc + + async def aclose(self) -> None: + if not self._closed: + self._closed = True + kwargs = {"stream_id": self._stream_id} + async with Trace("response_closed", logger, self._request, kwargs): + await self._connection._response_closed(stream_id=self._stream_id) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http_proxy.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http_proxy.py new file mode 100644 index 0000000000000000000000000000000000000000..cc9d92066e1680576846e46ccdf645a2b1dd5718 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/http_proxy.py @@ -0,0 +1,367 @@ +from __future__ import annotations + +import base64 +import logging +import ssl +import typing + +from .._backends.base import SOCKET_OPTION, AsyncNetworkBackend +from .._exceptions import ProxyError +from .._models import ( + URL, + Origin, + Request, + Response, + enforce_bytes, + enforce_headers, + enforce_url, +) +from .._ssl import default_ssl_context +from .._synchronization import AsyncLock +from .._trace import Trace +from .connection import AsyncHTTPConnection +from .connection_pool import AsyncConnectionPool +from .http11 import AsyncHTTP11Connection +from .interfaces import AsyncConnectionInterface + +ByteOrStr = typing.Union[bytes, str] +HeadersAsSequence = typing.Sequence[typing.Tuple[ByteOrStr, ByteOrStr]] +HeadersAsMapping = typing.Mapping[ByteOrStr, ByteOrStr] + + +logger = logging.getLogger("httpcore.proxy") + + +def merge_headers( + default_headers: typing.Sequence[tuple[bytes, bytes]] | None = None, + override_headers: typing.Sequence[tuple[bytes, bytes]] | None = None, +) -> list[tuple[bytes, bytes]]: + """ + Append default_headers and override_headers, de-duplicating if a key exists + in both cases. + """ + default_headers = [] if default_headers is None else list(default_headers) + override_headers = [] if override_headers is None else list(override_headers) + has_override = set(key.lower() for key, value in override_headers) + default_headers = [ + (key, value) + for key, value in default_headers + if key.lower() not in has_override + ] + return default_headers + override_headers + + +class AsyncHTTPProxy(AsyncConnectionPool): # pragma: nocover + """ + A connection pool that sends requests via an HTTP proxy. + """ + + def __init__( + self, + proxy_url: URL | bytes | str, + proxy_auth: tuple[bytes | str, bytes | str] | None = None, + proxy_headers: HeadersAsMapping | HeadersAsSequence | None = None, + ssl_context: ssl.SSLContext | None = None, + proxy_ssl_context: ssl.SSLContext | None = None, + max_connections: int | None = 10, + max_keepalive_connections: int | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + retries: int = 0, + local_address: str | None = None, + uds: str | None = None, + network_backend: AsyncNetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> None: + """ + A connection pool for making HTTP requests. + + Parameters: + proxy_url: The URL to use when connecting to the proxy server. + For example `"http://127.0.0.1:8080/"`. + proxy_auth: Any proxy authentication as a two-tuple of + (username, password). May be either bytes or ascii-only str. + proxy_headers: Any HTTP headers to use for the proxy requests. + For example `{"Proxy-Authorization": "Basic :"}`. + ssl_context: An SSL context to use for verifying connections. + If not specified, the default `httpcore.default_ssl_context()` + will be used. + proxy_ssl_context: The same as `ssl_context`, but for a proxy server rather than a remote origin. + max_connections: The maximum number of concurrent HTTP connections that + the pool should allow. Any attempt to send a request on a pool that + would exceed this amount will block until a connection is available. + max_keepalive_connections: The maximum number of idle HTTP connections + that will be maintained in the pool. + keepalive_expiry: The duration in seconds that an idle HTTP connection + may be maintained for before being expired from the pool. + http1: A boolean indicating if HTTP/1.1 requests should be supported + by the connection pool. Defaults to True. + http2: A boolean indicating if HTTP/2 requests should be supported by + the connection pool. Defaults to False. + retries: The maximum number of retries when trying to establish + a connection. + local_address: Local address to connect from. Can also be used to + connect using a particular address family. Using + `local_address="0.0.0.0"` will connect using an `AF_INET` address + (IPv4), while using `local_address="::"` will connect using an + `AF_INET6` address (IPv6). + uds: Path to a Unix Domain Socket to use instead of TCP sockets. + network_backend: A backend instance to use for handling network I/O. + """ + super().__init__( + ssl_context=ssl_context, + max_connections=max_connections, + max_keepalive_connections=max_keepalive_connections, + keepalive_expiry=keepalive_expiry, + http1=http1, + http2=http2, + network_backend=network_backend, + retries=retries, + local_address=local_address, + uds=uds, + socket_options=socket_options, + ) + + self._proxy_url = enforce_url(proxy_url, name="proxy_url") + if ( + self._proxy_url.scheme == b"http" and proxy_ssl_context is not None + ): # pragma: no cover + raise RuntimeError( + "The `proxy_ssl_context` argument is not allowed for the http scheme" + ) + + self._ssl_context = ssl_context + self._proxy_ssl_context = proxy_ssl_context + self._proxy_headers = enforce_headers(proxy_headers, name="proxy_headers") + if proxy_auth is not None: + username = enforce_bytes(proxy_auth[0], name="proxy_auth") + password = enforce_bytes(proxy_auth[1], name="proxy_auth") + userpass = username + b":" + password + authorization = b"Basic " + base64.b64encode(userpass) + self._proxy_headers = [ + (b"Proxy-Authorization", authorization) + ] + self._proxy_headers + + def create_connection(self, origin: Origin) -> AsyncConnectionInterface: + if origin.scheme == b"http": + return AsyncForwardHTTPConnection( + proxy_origin=self._proxy_url.origin, + proxy_headers=self._proxy_headers, + remote_origin=origin, + keepalive_expiry=self._keepalive_expiry, + network_backend=self._network_backend, + proxy_ssl_context=self._proxy_ssl_context, + ) + return AsyncTunnelHTTPConnection( + proxy_origin=self._proxy_url.origin, + proxy_headers=self._proxy_headers, + remote_origin=origin, + ssl_context=self._ssl_context, + proxy_ssl_context=self._proxy_ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + network_backend=self._network_backend, + ) + + +class AsyncForwardHTTPConnection(AsyncConnectionInterface): + def __init__( + self, + proxy_origin: Origin, + remote_origin: Origin, + proxy_headers: HeadersAsMapping | HeadersAsSequence | None = None, + keepalive_expiry: float | None = None, + network_backend: AsyncNetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + proxy_ssl_context: ssl.SSLContext | None = None, + ) -> None: + self._connection = AsyncHTTPConnection( + origin=proxy_origin, + keepalive_expiry=keepalive_expiry, + network_backend=network_backend, + socket_options=socket_options, + ssl_context=proxy_ssl_context, + ) + self._proxy_origin = proxy_origin + self._proxy_headers = enforce_headers(proxy_headers, name="proxy_headers") + self._remote_origin = remote_origin + + async def handle_async_request(self, request: Request) -> Response: + headers = merge_headers(self._proxy_headers, request.headers) + url = URL( + scheme=self._proxy_origin.scheme, + host=self._proxy_origin.host, + port=self._proxy_origin.port, + target=bytes(request.url), + ) + proxy_request = Request( + method=request.method, + url=url, + headers=headers, + content=request.stream, + extensions=request.extensions, + ) + return await self._connection.handle_async_request(proxy_request) + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._remote_origin + + async def aclose(self) -> None: + await self._connection.aclose() + + def info(self) -> str: + return self._connection.info() + + def is_available(self) -> bool: + return self._connection.is_available() + + def has_expired(self) -> bool: + return self._connection.has_expired() + + def is_idle(self) -> bool: + return self._connection.is_idle() + + def is_closed(self) -> bool: + return self._connection.is_closed() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.info()}]>" + + +class AsyncTunnelHTTPConnection(AsyncConnectionInterface): + def __init__( + self, + proxy_origin: Origin, + remote_origin: Origin, + ssl_context: ssl.SSLContext | None = None, + proxy_ssl_context: ssl.SSLContext | None = None, + proxy_headers: typing.Sequence[tuple[bytes, bytes]] | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + network_backend: AsyncNetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> None: + self._connection: AsyncConnectionInterface = AsyncHTTPConnection( + origin=proxy_origin, + keepalive_expiry=keepalive_expiry, + network_backend=network_backend, + socket_options=socket_options, + ssl_context=proxy_ssl_context, + ) + self._proxy_origin = proxy_origin + self._remote_origin = remote_origin + self._ssl_context = ssl_context + self._proxy_ssl_context = proxy_ssl_context + self._proxy_headers = enforce_headers(proxy_headers, name="proxy_headers") + self._keepalive_expiry = keepalive_expiry + self._http1 = http1 + self._http2 = http2 + self._connect_lock = AsyncLock() + self._connected = False + + async def handle_async_request(self, request: Request) -> Response: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("connect", None) + + async with self._connect_lock: + if not self._connected: + target = b"%b:%d" % (self._remote_origin.host, self._remote_origin.port) + + connect_url = URL( + scheme=self._proxy_origin.scheme, + host=self._proxy_origin.host, + port=self._proxy_origin.port, + target=target, + ) + connect_headers = merge_headers( + [(b"Host", target), (b"Accept", b"*/*")], self._proxy_headers + ) + connect_request = Request( + method=b"CONNECT", + url=connect_url, + headers=connect_headers, + extensions=request.extensions, + ) + connect_response = await self._connection.handle_async_request( + connect_request + ) + + if connect_response.status < 200 or connect_response.status > 299: + reason_bytes = connect_response.extensions.get("reason_phrase", b"") + reason_str = reason_bytes.decode("ascii", errors="ignore") + msg = "%d %s" % (connect_response.status, reason_str) + await self._connection.aclose() + raise ProxyError(msg) + + stream = connect_response.extensions["network_stream"] + + # Upgrade the stream to SSL + ssl_context = ( + default_ssl_context() + if self._ssl_context is None + else self._ssl_context + ) + alpn_protocols = ["http/1.1", "h2"] if self._http2 else ["http/1.1"] + ssl_context.set_alpn_protocols(alpn_protocols) + + kwargs = { + "ssl_context": ssl_context, + "server_hostname": self._remote_origin.host.decode("ascii"), + "timeout": timeout, + } + async with Trace("start_tls", logger, request, kwargs) as trace: + stream = await stream.start_tls(**kwargs) + trace.return_value = stream + + # Determine if we should be using HTTP/1.1 or HTTP/2 + ssl_object = stream.get_extra_info("ssl_object") + http2_negotiated = ( + ssl_object is not None + and ssl_object.selected_alpn_protocol() == "h2" + ) + + # Create the HTTP/1.1 or HTTP/2 connection + if http2_negotiated or (self._http2 and not self._http1): + from .http2 import AsyncHTTP2Connection + + self._connection = AsyncHTTP2Connection( + origin=self._remote_origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + else: + self._connection = AsyncHTTP11Connection( + origin=self._remote_origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + + self._connected = True + return await self._connection.handle_async_request(request) + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._remote_origin + + async def aclose(self) -> None: + await self._connection.aclose() + + def info(self) -> str: + return self._connection.info() + + def is_available(self) -> bool: + return self._connection.is_available() + + def has_expired(self) -> bool: + return self._connection.has_expired() + + def is_idle(self) -> bool: + return self._connection.is_idle() + + def is_closed(self) -> bool: + return self._connection.is_closed() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.info()}]>" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/interfaces.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/interfaces.py new file mode 100644 index 0000000000000000000000000000000000000000..361583bede6b2b84088b38054d5d8116ef9f1597 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/interfaces.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +import contextlib +import typing + +from .._models import ( + URL, + Extensions, + HeaderTypes, + Origin, + Request, + Response, + enforce_bytes, + enforce_headers, + enforce_url, + include_request_headers, +) + + +class AsyncRequestInterface: + async def request( + self, + method: bytes | str, + url: URL | bytes | str, + *, + headers: HeaderTypes = None, + content: bytes | typing.AsyncIterator[bytes] | None = None, + extensions: Extensions | None = None, + ) -> Response: + # Strict type checking on our parameters. + method = enforce_bytes(method, name="method") + url = enforce_url(url, name="url") + headers = enforce_headers(headers, name="headers") + + # Include Host header, and optionally Content-Length or Transfer-Encoding. + headers = include_request_headers(headers, url=url, content=content) + + request = Request( + method=method, + url=url, + headers=headers, + content=content, + extensions=extensions, + ) + response = await self.handle_async_request(request) + try: + await response.aread() + finally: + await response.aclose() + return response + + @contextlib.asynccontextmanager + async def stream( + self, + method: bytes | str, + url: URL | bytes | str, + *, + headers: HeaderTypes = None, + content: bytes | typing.AsyncIterator[bytes] | None = None, + extensions: Extensions | None = None, + ) -> typing.AsyncIterator[Response]: + # Strict type checking on our parameters. + method = enforce_bytes(method, name="method") + url = enforce_url(url, name="url") + headers = enforce_headers(headers, name="headers") + + # Include Host header, and optionally Content-Length or Transfer-Encoding. + headers = include_request_headers(headers, url=url, content=content) + + request = Request( + method=method, + url=url, + headers=headers, + content=content, + extensions=extensions, + ) + response = await self.handle_async_request(request) + try: + yield response + finally: + await response.aclose() + + async def handle_async_request(self, request: Request) -> Response: + raise NotImplementedError() # pragma: nocover + + +class AsyncConnectionInterface(AsyncRequestInterface): + async def aclose(self) -> None: + raise NotImplementedError() # pragma: nocover + + def info(self) -> str: + raise NotImplementedError() # pragma: nocover + + def can_handle_request(self, origin: Origin) -> bool: + raise NotImplementedError() # pragma: nocover + + def is_available(self) -> bool: + """ + Return `True` if the connection is currently able to accept an + outgoing request. + + An HTTP/1.1 connection will only be available if it is currently idle. + + An HTTP/2 connection will be available so long as the stream ID space is + not yet exhausted, and the connection is not in an error state. + + While the connection is being established we may not yet know if it is going + to result in an HTTP/1.1 or HTTP/2 connection. The connection should be + treated as being available, but might ultimately raise `NewConnectionRequired` + required exceptions if multiple requests are attempted over a connection + that ends up being established as HTTP/1.1. + """ + raise NotImplementedError() # pragma: nocover + + def has_expired(self) -> bool: + """ + Return `True` if the connection is in a state where it should be closed. + + This either means that the connection is idle and it has passed the + expiry time on its keep-alive, or that server has sent an EOF. + """ + raise NotImplementedError() # pragma: nocover + + def is_idle(self) -> bool: + """ + Return `True` if the connection is currently idle. + """ + raise NotImplementedError() # pragma: nocover + + def is_closed(self) -> bool: + """ + Return `True` if the connection has been closed. + + Used when a response is closed to determine if the connection may be + returned to the connection pool or not. + """ + raise NotImplementedError() # pragma: nocover diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/socks_proxy.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/socks_proxy.py new file mode 100644 index 0000000000000000000000000000000000000000..b363f55a0b071de6c5f377726be82dc2110e373c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_async/socks_proxy.py @@ -0,0 +1,341 @@ +from __future__ import annotations + +import logging +import ssl + +import socksio + +from .._backends.auto import AutoBackend +from .._backends.base import AsyncNetworkBackend, AsyncNetworkStream +from .._exceptions import ConnectionNotAvailable, ProxyError +from .._models import URL, Origin, Request, Response, enforce_bytes, enforce_url +from .._ssl import default_ssl_context +from .._synchronization import AsyncLock +from .._trace import Trace +from .connection_pool import AsyncConnectionPool +from .http11 import AsyncHTTP11Connection +from .interfaces import AsyncConnectionInterface + +logger = logging.getLogger("httpcore.socks") + + +AUTH_METHODS = { + b"\x00": "NO AUTHENTICATION REQUIRED", + b"\x01": "GSSAPI", + b"\x02": "USERNAME/PASSWORD", + b"\xff": "NO ACCEPTABLE METHODS", +} + +REPLY_CODES = { + b"\x00": "Succeeded", + b"\x01": "General SOCKS server failure", + b"\x02": "Connection not allowed by ruleset", + b"\x03": "Network unreachable", + b"\x04": "Host unreachable", + b"\x05": "Connection refused", + b"\x06": "TTL expired", + b"\x07": "Command not supported", + b"\x08": "Address type not supported", +} + + +async def _init_socks5_connection( + stream: AsyncNetworkStream, + *, + host: bytes, + port: int, + auth: tuple[bytes, bytes] | None = None, +) -> None: + conn = socksio.socks5.SOCKS5Connection() + + # Auth method request + auth_method = ( + socksio.socks5.SOCKS5AuthMethod.NO_AUTH_REQUIRED + if auth is None + else socksio.socks5.SOCKS5AuthMethod.USERNAME_PASSWORD + ) + conn.send(socksio.socks5.SOCKS5AuthMethodsRequest([auth_method])) + outgoing_bytes = conn.data_to_send() + await stream.write(outgoing_bytes) + + # Auth method response + incoming_bytes = await stream.read(max_bytes=4096) + response = conn.receive_data(incoming_bytes) + assert isinstance(response, socksio.socks5.SOCKS5AuthReply) + if response.method != auth_method: + requested = AUTH_METHODS.get(auth_method, "UNKNOWN") + responded = AUTH_METHODS.get(response.method, "UNKNOWN") + raise ProxyError( + f"Requested {requested} from proxy server, but got {responded}." + ) + + if response.method == socksio.socks5.SOCKS5AuthMethod.USERNAME_PASSWORD: + # Username/password request + assert auth is not None + username, password = auth + conn.send(socksio.socks5.SOCKS5UsernamePasswordRequest(username, password)) + outgoing_bytes = conn.data_to_send() + await stream.write(outgoing_bytes) + + # Username/password response + incoming_bytes = await stream.read(max_bytes=4096) + response = conn.receive_data(incoming_bytes) + assert isinstance(response, socksio.socks5.SOCKS5UsernamePasswordReply) + if not response.success: + raise ProxyError("Invalid username/password") + + # Connect request + conn.send( + socksio.socks5.SOCKS5CommandRequest.from_address( + socksio.socks5.SOCKS5Command.CONNECT, (host, port) + ) + ) + outgoing_bytes = conn.data_to_send() + await stream.write(outgoing_bytes) + + # Connect response + incoming_bytes = await stream.read(max_bytes=4096) + response = conn.receive_data(incoming_bytes) + assert isinstance(response, socksio.socks5.SOCKS5Reply) + if response.reply_code != socksio.socks5.SOCKS5ReplyCode.SUCCEEDED: + reply_code = REPLY_CODES.get(response.reply_code, "UNKOWN") + raise ProxyError(f"Proxy Server could not connect: {reply_code}.") + + +class AsyncSOCKSProxy(AsyncConnectionPool): # pragma: nocover + """ + A connection pool that sends requests via an HTTP proxy. + """ + + def __init__( + self, + proxy_url: URL | bytes | str, + proxy_auth: tuple[bytes | str, bytes | str] | None = None, + ssl_context: ssl.SSLContext | None = None, + max_connections: int | None = 10, + max_keepalive_connections: int | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + retries: int = 0, + network_backend: AsyncNetworkBackend | None = None, + ) -> None: + """ + A connection pool for making HTTP requests. + + Parameters: + proxy_url: The URL to use when connecting to the proxy server. + For example `"http://127.0.0.1:8080/"`. + ssl_context: An SSL context to use for verifying connections. + If not specified, the default `httpcore.default_ssl_context()` + will be used. + max_connections: The maximum number of concurrent HTTP connections that + the pool should allow. Any attempt to send a request on a pool that + would exceed this amount will block until a connection is available. + max_keepalive_connections: The maximum number of idle HTTP connections + that will be maintained in the pool. + keepalive_expiry: The duration in seconds that an idle HTTP connection + may be maintained for before being expired from the pool. + http1: A boolean indicating if HTTP/1.1 requests should be supported + by the connection pool. Defaults to True. + http2: A boolean indicating if HTTP/2 requests should be supported by + the connection pool. Defaults to False. + retries: The maximum number of retries when trying to establish + a connection. + local_address: Local address to connect from. Can also be used to + connect using a particular address family. Using + `local_address="0.0.0.0"` will connect using an `AF_INET` address + (IPv4), while using `local_address="::"` will connect using an + `AF_INET6` address (IPv6). + uds: Path to a Unix Domain Socket to use instead of TCP sockets. + network_backend: A backend instance to use for handling network I/O. + """ + super().__init__( + ssl_context=ssl_context, + max_connections=max_connections, + max_keepalive_connections=max_keepalive_connections, + keepalive_expiry=keepalive_expiry, + http1=http1, + http2=http2, + network_backend=network_backend, + retries=retries, + ) + self._ssl_context = ssl_context + self._proxy_url = enforce_url(proxy_url, name="proxy_url") + if proxy_auth is not None: + username, password = proxy_auth + username_bytes = enforce_bytes(username, name="proxy_auth") + password_bytes = enforce_bytes(password, name="proxy_auth") + self._proxy_auth: tuple[bytes, bytes] | None = ( + username_bytes, + password_bytes, + ) + else: + self._proxy_auth = None + + def create_connection(self, origin: Origin) -> AsyncConnectionInterface: + return AsyncSocks5Connection( + proxy_origin=self._proxy_url.origin, + remote_origin=origin, + proxy_auth=self._proxy_auth, + ssl_context=self._ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + network_backend=self._network_backend, + ) + + +class AsyncSocks5Connection(AsyncConnectionInterface): + def __init__( + self, + proxy_origin: Origin, + remote_origin: Origin, + proxy_auth: tuple[bytes, bytes] | None = None, + ssl_context: ssl.SSLContext | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + network_backend: AsyncNetworkBackend | None = None, + ) -> None: + self._proxy_origin = proxy_origin + self._remote_origin = remote_origin + self._proxy_auth = proxy_auth + self._ssl_context = ssl_context + self._keepalive_expiry = keepalive_expiry + self._http1 = http1 + self._http2 = http2 + + self._network_backend: AsyncNetworkBackend = ( + AutoBackend() if network_backend is None else network_backend + ) + self._connect_lock = AsyncLock() + self._connection: AsyncConnectionInterface | None = None + self._connect_failed = False + + async def handle_async_request(self, request: Request) -> Response: + timeouts = request.extensions.get("timeout", {}) + sni_hostname = request.extensions.get("sni_hostname", None) + timeout = timeouts.get("connect", None) + + async with self._connect_lock: + if self._connection is None: + try: + # Connect to the proxy + kwargs = { + "host": self._proxy_origin.host.decode("ascii"), + "port": self._proxy_origin.port, + "timeout": timeout, + } + async with Trace("connect_tcp", logger, request, kwargs) as trace: + stream = await self._network_backend.connect_tcp(**kwargs) + trace.return_value = stream + + # Connect to the remote host using socks5 + kwargs = { + "stream": stream, + "host": self._remote_origin.host.decode("ascii"), + "port": self._remote_origin.port, + "auth": self._proxy_auth, + } + async with Trace( + "setup_socks5_connection", logger, request, kwargs + ) as trace: + await _init_socks5_connection(**kwargs) + trace.return_value = stream + + # Upgrade the stream to SSL + if self._remote_origin.scheme == b"https": + ssl_context = ( + default_ssl_context() + if self._ssl_context is None + else self._ssl_context + ) + alpn_protocols = ( + ["http/1.1", "h2"] if self._http2 else ["http/1.1"] + ) + ssl_context.set_alpn_protocols(alpn_protocols) + + kwargs = { + "ssl_context": ssl_context, + "server_hostname": sni_hostname + or self._remote_origin.host.decode("ascii"), + "timeout": timeout, + } + async with Trace("start_tls", logger, request, kwargs) as trace: + stream = await stream.start_tls(**kwargs) + trace.return_value = stream + + # Determine if we should be using HTTP/1.1 or HTTP/2 + ssl_object = stream.get_extra_info("ssl_object") + http2_negotiated = ( + ssl_object is not None + and ssl_object.selected_alpn_protocol() == "h2" + ) + + # Create the HTTP/1.1 or HTTP/2 connection + if http2_negotiated or ( + self._http2 and not self._http1 + ): # pragma: nocover + from .http2 import AsyncHTTP2Connection + + self._connection = AsyncHTTP2Connection( + origin=self._remote_origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + else: + self._connection = AsyncHTTP11Connection( + origin=self._remote_origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + except Exception as exc: + self._connect_failed = True + raise exc + elif not self._connection.is_available(): # pragma: nocover + raise ConnectionNotAvailable() + + return await self._connection.handle_async_request(request) + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._remote_origin + + async def aclose(self) -> None: + if self._connection is not None: + await self._connection.aclose() + + def is_available(self) -> bool: + if self._connection is None: # pragma: nocover + # If HTTP/2 support is enabled, and the resulting connection could + # end up as HTTP/2 then we should indicate the connection as being + # available to service multiple requests. + return ( + self._http2 + and (self._remote_origin.scheme == b"https" or not self._http1) + and not self._connect_failed + ) + return self._connection.is_available() + + def has_expired(self) -> bool: + if self._connection is None: # pragma: nocover + return self._connect_failed + return self._connection.has_expired() + + def is_idle(self) -> bool: + if self._connection is None: # pragma: nocover + return self._connect_failed + return self._connection.is_idle() + + def is_closed(self) -> bool: + if self._connection is None: # pragma: nocover + return self._connect_failed + return self._connection.is_closed() + + def info(self) -> str: + if self._connection is None: # pragma: nocover + return "CONNECTION FAILED" if self._connect_failed else "CONNECTING" + return self._connection.info() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.info()}]>" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7aaf8809c113822de1b534d66b36ba027b998949 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/anyio.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/anyio.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..839dd6514558bec43d290ff96054bdb3a1d6975b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/anyio.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/auto.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/auto.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a35070927220e19ca5eb2012b20555de5b58a91c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/auto.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/base.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/base.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de9a11b6f5a04c489e7d7325f169709468e3e330 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/base.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/mock.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/mock.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ead524c94c8e74f94138e7dbaabd2fda91cda47 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/mock.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/sync.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/sync.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5eedb70f84178715f1872a7d3071bf061c9e2168 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/sync.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/trio.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/trio.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a74472b6f3d24ca0533c16a8b930b418da0fb39b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/__pycache__/trio.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/anyio.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/anyio.py new file mode 100644 index 0000000000000000000000000000000000000000..a140095e1b8de022f321a41c0125e0e5febc0749 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/anyio.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import ssl +import typing + +import anyio + +from .._exceptions import ( + ConnectError, + ConnectTimeout, + ReadError, + ReadTimeout, + WriteError, + WriteTimeout, + map_exceptions, +) +from .._utils import is_socket_readable +from .base import SOCKET_OPTION, AsyncNetworkBackend, AsyncNetworkStream + + +class AnyIOStream(AsyncNetworkStream): + def __init__(self, stream: anyio.abc.ByteStream) -> None: + self._stream = stream + + async def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + exc_map = { + TimeoutError: ReadTimeout, + anyio.BrokenResourceError: ReadError, + anyio.ClosedResourceError: ReadError, + anyio.EndOfStream: ReadError, + } + with map_exceptions(exc_map): + with anyio.fail_after(timeout): + try: + return await self._stream.receive(max_bytes=max_bytes) + except anyio.EndOfStream: # pragma: nocover + return b"" + + async def write(self, buffer: bytes, timeout: float | None = None) -> None: + if not buffer: + return + + exc_map = { + TimeoutError: WriteTimeout, + anyio.BrokenResourceError: WriteError, + anyio.ClosedResourceError: WriteError, + } + with map_exceptions(exc_map): + with anyio.fail_after(timeout): + await self._stream.send(item=buffer) + + async def aclose(self) -> None: + await self._stream.aclose() + + async def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> AsyncNetworkStream: + exc_map = { + TimeoutError: ConnectTimeout, + anyio.BrokenResourceError: ConnectError, + anyio.EndOfStream: ConnectError, + ssl.SSLError: ConnectError, + } + with map_exceptions(exc_map): + try: + with anyio.fail_after(timeout): + ssl_stream = await anyio.streams.tls.TLSStream.wrap( + self._stream, + ssl_context=ssl_context, + hostname=server_hostname, + standard_compatible=False, + server_side=False, + ) + except Exception as exc: # pragma: nocover + await self.aclose() + raise exc + return AnyIOStream(ssl_stream) + + def get_extra_info(self, info: str) -> typing.Any: + if info == "ssl_object": + return self._stream.extra(anyio.streams.tls.TLSAttribute.ssl_object, None) + if info == "client_addr": + return self._stream.extra(anyio.abc.SocketAttribute.local_address, None) + if info == "server_addr": + return self._stream.extra(anyio.abc.SocketAttribute.remote_address, None) + if info == "socket": + return self._stream.extra(anyio.abc.SocketAttribute.raw_socket, None) + if info == "is_readable": + sock = self._stream.extra(anyio.abc.SocketAttribute.raw_socket, None) + return is_socket_readable(sock) + return None + + +class AnyIOBackend(AsyncNetworkBackend): + async def connect_tcp( + self, + host: str, + port: int, + timeout: float | None = None, + local_address: str | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: # pragma: nocover + if socket_options is None: + socket_options = [] + exc_map = { + TimeoutError: ConnectTimeout, + OSError: ConnectError, + anyio.BrokenResourceError: ConnectError, + } + with map_exceptions(exc_map): + with anyio.fail_after(timeout): + stream: anyio.abc.ByteStream = await anyio.connect_tcp( + remote_host=host, + remote_port=port, + local_host=local_address, + ) + # By default TCP sockets opened in `asyncio` include TCP_NODELAY. + for option in socket_options: + stream._raw_socket.setsockopt(*option) # type: ignore[attr-defined] # pragma: no cover + return AnyIOStream(stream) + + async def connect_unix_socket( + self, + path: str, + timeout: float | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: # pragma: nocover + if socket_options is None: + socket_options = [] + exc_map = { + TimeoutError: ConnectTimeout, + OSError: ConnectError, + anyio.BrokenResourceError: ConnectError, + } + with map_exceptions(exc_map): + with anyio.fail_after(timeout): + stream: anyio.abc.ByteStream = await anyio.connect_unix(path) + for option in socket_options: + stream._raw_socket.setsockopt(*option) # type: ignore[attr-defined] # pragma: no cover + return AnyIOStream(stream) + + async def sleep(self, seconds: float) -> None: + await anyio.sleep(seconds) # pragma: nocover diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/auto.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/auto.py new file mode 100644 index 0000000000000000000000000000000000000000..49f0e698c97ad5623f376d8182675352e21c2c3c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/auto.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import typing + +from .._synchronization import current_async_library +from .base import SOCKET_OPTION, AsyncNetworkBackend, AsyncNetworkStream + + +class AutoBackend(AsyncNetworkBackend): + async def _init_backend(self) -> None: + if not (hasattr(self, "_backend")): + backend = current_async_library() + if backend == "trio": + from .trio import TrioBackend + + self._backend: AsyncNetworkBackend = TrioBackend() + else: + from .anyio import AnyIOBackend + + self._backend = AnyIOBackend() + + async def connect_tcp( + self, + host: str, + port: int, + timeout: float | None = None, + local_address: str | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: + await self._init_backend() + return await self._backend.connect_tcp( + host, + port, + timeout=timeout, + local_address=local_address, + socket_options=socket_options, + ) + + async def connect_unix_socket( + self, + path: str, + timeout: float | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: # pragma: nocover + await self._init_backend() + return await self._backend.connect_unix_socket( + path, timeout=timeout, socket_options=socket_options + ) + + async def sleep(self, seconds: float) -> None: # pragma: nocover + await self._init_backend() + return await self._backend.sleep(seconds) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/base.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/base.py new file mode 100644 index 0000000000000000000000000000000000000000..cf55c8b10eb543872550be863206fe2f760d0d8d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/base.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import ssl +import time +import typing + +SOCKET_OPTION = typing.Union[ + typing.Tuple[int, int, int], + typing.Tuple[int, int, typing.Union[bytes, bytearray]], + typing.Tuple[int, int, None, int], +] + + +class NetworkStream: + def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + raise NotImplementedError() # pragma: nocover + + def write(self, buffer: bytes, timeout: float | None = None) -> None: + raise NotImplementedError() # pragma: nocover + + def close(self) -> None: + raise NotImplementedError() # pragma: nocover + + def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> NetworkStream: + raise NotImplementedError() # pragma: nocover + + def get_extra_info(self, info: str) -> typing.Any: + return None # pragma: nocover + + +class NetworkBackend: + def connect_tcp( + self, + host: str, + port: int, + timeout: float | None = None, + local_address: str | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> NetworkStream: + raise NotImplementedError() # pragma: nocover + + def connect_unix_socket( + self, + path: str, + timeout: float | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> NetworkStream: + raise NotImplementedError() # pragma: nocover + + def sleep(self, seconds: float) -> None: + time.sleep(seconds) # pragma: nocover + + +class AsyncNetworkStream: + async def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + raise NotImplementedError() # pragma: nocover + + async def write(self, buffer: bytes, timeout: float | None = None) -> None: + raise NotImplementedError() # pragma: nocover + + async def aclose(self) -> None: + raise NotImplementedError() # pragma: nocover + + async def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> AsyncNetworkStream: + raise NotImplementedError() # pragma: nocover + + def get_extra_info(self, info: str) -> typing.Any: + return None # pragma: nocover + + +class AsyncNetworkBackend: + async def connect_tcp( + self, + host: str, + port: int, + timeout: float | None = None, + local_address: str | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: + raise NotImplementedError() # pragma: nocover + + async def connect_unix_socket( + self, + path: str, + timeout: float | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: + raise NotImplementedError() # pragma: nocover + + async def sleep(self, seconds: float) -> None: + raise NotImplementedError() # pragma: nocover diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/mock.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/mock.py new file mode 100644 index 0000000000000000000000000000000000000000..9b6edca03d4d4b34f355fd53e49d4b4c699c972c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/mock.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import ssl +import typing + +from .._exceptions import ReadError +from .base import ( + SOCKET_OPTION, + AsyncNetworkBackend, + AsyncNetworkStream, + NetworkBackend, + NetworkStream, +) + + +class MockSSLObject: + def __init__(self, http2: bool): + self._http2 = http2 + + def selected_alpn_protocol(self) -> str: + return "h2" if self._http2 else "http/1.1" + + +class MockStream(NetworkStream): + def __init__(self, buffer: list[bytes], http2: bool = False) -> None: + self._buffer = buffer + self._http2 = http2 + self._closed = False + + def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + if self._closed: + raise ReadError("Connection closed") + if not self._buffer: + return b"" + return self._buffer.pop(0) + + def write(self, buffer: bytes, timeout: float | None = None) -> None: + pass + + def close(self) -> None: + self._closed = True + + def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> NetworkStream: + return self + + def get_extra_info(self, info: str) -> typing.Any: + return MockSSLObject(http2=self._http2) if info == "ssl_object" else None + + def __repr__(self) -> str: + return "" + + +class MockBackend(NetworkBackend): + def __init__(self, buffer: list[bytes], http2: bool = False) -> None: + self._buffer = buffer + self._http2 = http2 + + def connect_tcp( + self, + host: str, + port: int, + timeout: float | None = None, + local_address: str | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> NetworkStream: + return MockStream(list(self._buffer), http2=self._http2) + + def connect_unix_socket( + self, + path: str, + timeout: float | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> NetworkStream: + return MockStream(list(self._buffer), http2=self._http2) + + def sleep(self, seconds: float) -> None: + pass + + +class AsyncMockStream(AsyncNetworkStream): + def __init__(self, buffer: list[bytes], http2: bool = False) -> None: + self._buffer = buffer + self._http2 = http2 + self._closed = False + + async def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + if self._closed: + raise ReadError("Connection closed") + if not self._buffer: + return b"" + return self._buffer.pop(0) + + async def write(self, buffer: bytes, timeout: float | None = None) -> None: + pass + + async def aclose(self) -> None: + self._closed = True + + async def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> AsyncNetworkStream: + return self + + def get_extra_info(self, info: str) -> typing.Any: + return MockSSLObject(http2=self._http2) if info == "ssl_object" else None + + def __repr__(self) -> str: + return "" + + +class AsyncMockBackend(AsyncNetworkBackend): + def __init__(self, buffer: list[bytes], http2: bool = False) -> None: + self._buffer = buffer + self._http2 = http2 + + async def connect_tcp( + self, + host: str, + port: int, + timeout: float | None = None, + local_address: str | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: + return AsyncMockStream(list(self._buffer), http2=self._http2) + + async def connect_unix_socket( + self, + path: str, + timeout: float | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: + return AsyncMockStream(list(self._buffer), http2=self._http2) + + async def sleep(self, seconds: float) -> None: + pass diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/sync.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/sync.py new file mode 100644 index 0000000000000000000000000000000000000000..4018a09c6fb1e0ef1b03ab8d84b13ebef4031f7c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/sync.py @@ -0,0 +1,241 @@ +from __future__ import annotations + +import functools +import socket +import ssl +import sys +import typing + +from .._exceptions import ( + ConnectError, + ConnectTimeout, + ExceptionMapping, + ReadError, + ReadTimeout, + WriteError, + WriteTimeout, + map_exceptions, +) +from .._utils import is_socket_readable +from .base import SOCKET_OPTION, NetworkBackend, NetworkStream + + +class TLSinTLSStream(NetworkStream): # pragma: no cover + """ + Because the standard `SSLContext.wrap_socket` method does + not work for `SSLSocket` objects, we need this class + to implement TLS stream using an underlying `SSLObject` + instance in order to support TLS on top of TLS. + """ + + # Defined in RFC 8449 + TLS_RECORD_SIZE = 16384 + + def __init__( + self, + sock: socket.socket, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ): + self._sock = sock + self._incoming = ssl.MemoryBIO() + self._outgoing = ssl.MemoryBIO() + + self.ssl_obj = ssl_context.wrap_bio( + incoming=self._incoming, + outgoing=self._outgoing, + server_hostname=server_hostname, + ) + + self._sock.settimeout(timeout) + self._perform_io(self.ssl_obj.do_handshake) + + def _perform_io( + self, + func: typing.Callable[..., typing.Any], + ) -> typing.Any: + ret = None + + while True: + errno = None + try: + ret = func() + except (ssl.SSLWantReadError, ssl.SSLWantWriteError) as e: + errno = e.errno + + self._sock.sendall(self._outgoing.read()) + + if errno == ssl.SSL_ERROR_WANT_READ: + buf = self._sock.recv(self.TLS_RECORD_SIZE) + + if buf: + self._incoming.write(buf) + else: + self._incoming.write_eof() + if errno is None: + return ret + + def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + exc_map: ExceptionMapping = {socket.timeout: ReadTimeout, OSError: ReadError} + with map_exceptions(exc_map): + self._sock.settimeout(timeout) + return typing.cast( + bytes, self._perform_io(functools.partial(self.ssl_obj.read, max_bytes)) + ) + + def write(self, buffer: bytes, timeout: float | None = None) -> None: + exc_map: ExceptionMapping = {socket.timeout: WriteTimeout, OSError: WriteError} + with map_exceptions(exc_map): + self._sock.settimeout(timeout) + while buffer: + nsent = self._perform_io(functools.partial(self.ssl_obj.write, buffer)) + buffer = buffer[nsent:] + + def close(self) -> None: + self._sock.close() + + def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> NetworkStream: + raise NotImplementedError() + + def get_extra_info(self, info: str) -> typing.Any: + if info == "ssl_object": + return self.ssl_obj + if info == "client_addr": + return self._sock.getsockname() + if info == "server_addr": + return self._sock.getpeername() + if info == "socket": + return self._sock + if info == "is_readable": + return is_socket_readable(self._sock) + return None + + +class SyncStream(NetworkStream): + def __init__(self, sock: socket.socket) -> None: + self._sock = sock + + def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + exc_map: ExceptionMapping = {socket.timeout: ReadTimeout, OSError: ReadError} + with map_exceptions(exc_map): + self._sock.settimeout(timeout) + return self._sock.recv(max_bytes) + + def write(self, buffer: bytes, timeout: float | None = None) -> None: + if not buffer: + return + + exc_map: ExceptionMapping = {socket.timeout: WriteTimeout, OSError: WriteError} + with map_exceptions(exc_map): + while buffer: + self._sock.settimeout(timeout) + n = self._sock.send(buffer) + buffer = buffer[n:] + + def close(self) -> None: + self._sock.close() + + def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> NetworkStream: + exc_map: ExceptionMapping = { + socket.timeout: ConnectTimeout, + OSError: ConnectError, + } + with map_exceptions(exc_map): + try: + if isinstance(self._sock, ssl.SSLSocket): # pragma: no cover + # If the underlying socket has already been upgraded + # to the TLS layer (i.e. is an instance of SSLSocket), + # we need some additional smarts to support TLS-in-TLS. + return TLSinTLSStream( + self._sock, ssl_context, server_hostname, timeout + ) + else: + self._sock.settimeout(timeout) + sock = ssl_context.wrap_socket( + self._sock, server_hostname=server_hostname + ) + except Exception as exc: # pragma: nocover + self.close() + raise exc + return SyncStream(sock) + + def get_extra_info(self, info: str) -> typing.Any: + if info == "ssl_object" and isinstance(self._sock, ssl.SSLSocket): + return self._sock._sslobj # type: ignore + if info == "client_addr": + return self._sock.getsockname() + if info == "server_addr": + return self._sock.getpeername() + if info == "socket": + return self._sock + if info == "is_readable": + return is_socket_readable(self._sock) + return None + + +class SyncBackend(NetworkBackend): + def connect_tcp( + self, + host: str, + port: int, + timeout: float | None = None, + local_address: str | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> NetworkStream: + # Note that we automatically include `TCP_NODELAY` + # in addition to any other custom socket options. + if socket_options is None: + socket_options = [] # pragma: no cover + address = (host, port) + source_address = None if local_address is None else (local_address, 0) + exc_map: ExceptionMapping = { + socket.timeout: ConnectTimeout, + OSError: ConnectError, + } + + with map_exceptions(exc_map): + sock = socket.create_connection( + address, + timeout, + source_address=source_address, + ) + for option in socket_options: + sock.setsockopt(*option) # pragma: no cover + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + return SyncStream(sock) + + def connect_unix_socket( + self, + path: str, + timeout: float | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> NetworkStream: # pragma: nocover + if sys.platform == "win32": + raise RuntimeError( + "Attempted to connect to a UNIX socket on a Windows system." + ) + if socket_options is None: + socket_options = [] + + exc_map: ExceptionMapping = { + socket.timeout: ConnectTimeout, + OSError: ConnectError, + } + with map_exceptions(exc_map): + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + for option in socket_options: + sock.setsockopt(*option) + sock.settimeout(timeout) + sock.connect(path) + return SyncStream(sock) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/trio.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/trio.py new file mode 100644 index 0000000000000000000000000000000000000000..6f53f5f2a025e01e9949e2530bd9ca6928859251 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_backends/trio.py @@ -0,0 +1,159 @@ +from __future__ import annotations + +import ssl +import typing + +import trio + +from .._exceptions import ( + ConnectError, + ConnectTimeout, + ExceptionMapping, + ReadError, + ReadTimeout, + WriteError, + WriteTimeout, + map_exceptions, +) +from .base import SOCKET_OPTION, AsyncNetworkBackend, AsyncNetworkStream + + +class TrioStream(AsyncNetworkStream): + def __init__(self, stream: trio.abc.Stream) -> None: + self._stream = stream + + async def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + timeout_or_inf = float("inf") if timeout is None else timeout + exc_map: ExceptionMapping = { + trio.TooSlowError: ReadTimeout, + trio.BrokenResourceError: ReadError, + trio.ClosedResourceError: ReadError, + } + with map_exceptions(exc_map): + with trio.fail_after(timeout_or_inf): + data: bytes = await self._stream.receive_some(max_bytes=max_bytes) + return data + + async def write(self, buffer: bytes, timeout: float | None = None) -> None: + if not buffer: + return + + timeout_or_inf = float("inf") if timeout is None else timeout + exc_map: ExceptionMapping = { + trio.TooSlowError: WriteTimeout, + trio.BrokenResourceError: WriteError, + trio.ClosedResourceError: WriteError, + } + with map_exceptions(exc_map): + with trio.fail_after(timeout_or_inf): + await self._stream.send_all(data=buffer) + + async def aclose(self) -> None: + await self._stream.aclose() + + async def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> AsyncNetworkStream: + timeout_or_inf = float("inf") if timeout is None else timeout + exc_map: ExceptionMapping = { + trio.TooSlowError: ConnectTimeout, + trio.BrokenResourceError: ConnectError, + } + ssl_stream = trio.SSLStream( + self._stream, + ssl_context=ssl_context, + server_hostname=server_hostname, + https_compatible=True, + server_side=False, + ) + with map_exceptions(exc_map): + try: + with trio.fail_after(timeout_or_inf): + await ssl_stream.do_handshake() + except Exception as exc: # pragma: nocover + await self.aclose() + raise exc + return TrioStream(ssl_stream) + + def get_extra_info(self, info: str) -> typing.Any: + if info == "ssl_object" and isinstance(self._stream, trio.SSLStream): + # Type checkers cannot see `_ssl_object` attribute because trio._ssl.SSLStream uses __getattr__/__setattr__. + # Tracked at https://github.com/python-trio/trio/issues/542 + return self._stream._ssl_object # type: ignore[attr-defined] + if info == "client_addr": + return self._get_socket_stream().socket.getsockname() + if info == "server_addr": + return self._get_socket_stream().socket.getpeername() + if info == "socket": + stream = self._stream + while isinstance(stream, trio.SSLStream): + stream = stream.transport_stream + assert isinstance(stream, trio.SocketStream) + return stream.socket + if info == "is_readable": + socket = self.get_extra_info("socket") + return socket.is_readable() + return None + + def _get_socket_stream(self) -> trio.SocketStream: + stream = self._stream + while isinstance(stream, trio.SSLStream): + stream = stream.transport_stream + assert isinstance(stream, trio.SocketStream) + return stream + + +class TrioBackend(AsyncNetworkBackend): + async def connect_tcp( + self, + host: str, + port: int, + timeout: float | None = None, + local_address: str | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: + # By default for TCP sockets, trio enables TCP_NODELAY. + # https://trio.readthedocs.io/en/stable/reference-io.html#trio.SocketStream + if socket_options is None: + socket_options = [] # pragma: no cover + timeout_or_inf = float("inf") if timeout is None else timeout + exc_map: ExceptionMapping = { + trio.TooSlowError: ConnectTimeout, + trio.BrokenResourceError: ConnectError, + OSError: ConnectError, + } + with map_exceptions(exc_map): + with trio.fail_after(timeout_or_inf): + stream: trio.abc.Stream = await trio.open_tcp_stream( + host=host, port=port, local_address=local_address + ) + for option in socket_options: + stream.setsockopt(*option) # type: ignore[attr-defined] # pragma: no cover + return TrioStream(stream) + + async def connect_unix_socket( + self, + path: str, + timeout: float | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> AsyncNetworkStream: # pragma: nocover + if socket_options is None: + socket_options = [] + timeout_or_inf = float("inf") if timeout is None else timeout + exc_map: ExceptionMapping = { + trio.TooSlowError: ConnectTimeout, + trio.BrokenResourceError: ConnectError, + OSError: ConnectError, + } + with map_exceptions(exc_map): + with trio.fail_after(timeout_or_inf): + stream: trio.abc.Stream = await trio.open_unix_socket(path) + for option in socket_options: + stream.setsockopt(*option) # type: ignore[attr-defined] # pragma: no cover + return TrioStream(stream) + + async def sleep(self, seconds: float) -> None: + await trio.sleep(seconds) # pragma: nocover diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b476d76d9a7ff45de8d18ec22d33d6af2982f92e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__init__.py @@ -0,0 +1,39 @@ +from .connection import HTTPConnection +from .connection_pool import ConnectionPool +from .http11 import HTTP11Connection +from .http_proxy import HTTPProxy +from .interfaces import ConnectionInterface + +try: + from .http2 import HTTP2Connection +except ImportError: # pragma: nocover + + class HTTP2Connection: # type: ignore + def __init__(self, *args, **kwargs) -> None: # type: ignore + raise RuntimeError( + "Attempted to use http2 support, but the `h2` package is not " + "installed. Use 'pip install httpcore[http2]'." + ) + + +try: + from .socks_proxy import SOCKSProxy +except ImportError: # pragma: nocover + + class SOCKSProxy: # type: ignore + def __init__(self, *args, **kwargs) -> None: # type: ignore + raise RuntimeError( + "Attempted to use SOCKS support, but the `socksio` package is not " + "installed. Use 'pip install httpcore[socks]'." + ) + + +__all__ = [ + "HTTPConnection", + "ConnectionPool", + "HTTPProxy", + "HTTP11Connection", + "HTTP2Connection", + "ConnectionInterface", + "SOCKSProxy", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1162d2571fe475cda658ce939494fa879f78277c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/connection.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/connection.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..363c4b1fcd29df4e845d97a8a1f6339f2ba729f4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/connection.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/connection_pool.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/connection_pool.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b41192702761d3448eb25877622f30c9eee1c38f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/connection_pool.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http11.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http11.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..471854d853cf464e7b1c192f8103152ceff05c8f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http11.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..89bbd78d7818164e65a8488d936574b03c304799 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http_proxy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http_proxy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40f720be141c383f9d69b605f207bb18e3a2a33e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/http_proxy.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/interfaces.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/interfaces.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6e7e996f9977967f5026e4b1e8a0f340086a58e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/interfaces.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/socks_proxy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/socks_proxy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9999aeb4e1f73b4a74ae31c069981eda42e84124 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/__pycache__/socks_proxy.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/connection.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/connection.py new file mode 100644 index 0000000000000000000000000000000000000000..363f8be819d2576ea65365e625dd1596ea40429a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/connection.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +import itertools +import logging +import ssl +import types +import typing + +from .._backends.sync import SyncBackend +from .._backends.base import SOCKET_OPTION, NetworkBackend, NetworkStream +from .._exceptions import ConnectError, ConnectTimeout +from .._models import Origin, Request, Response +from .._ssl import default_ssl_context +from .._synchronization import Lock +from .._trace import Trace +from .http11 import HTTP11Connection +from .interfaces import ConnectionInterface + +RETRIES_BACKOFF_FACTOR = 0.5 # 0s, 0.5s, 1s, 2s, 4s, etc. + + +logger = logging.getLogger("httpcore.connection") + + +def exponential_backoff(factor: float) -> typing.Iterator[float]: + """ + Generate a geometric sequence that has a ratio of 2 and starts with 0. + + For example: + - `factor = 2`: `0, 2, 4, 8, 16, 32, 64, ...` + - `factor = 3`: `0, 3, 6, 12, 24, 48, 96, ...` + """ + yield 0 + for n in itertools.count(): + yield factor * 2**n + + +class HTTPConnection(ConnectionInterface): + def __init__( + self, + origin: Origin, + ssl_context: ssl.SSLContext | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + retries: int = 0, + local_address: str | None = None, + uds: str | None = None, + network_backend: NetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> None: + self._origin = origin + self._ssl_context = ssl_context + self._keepalive_expiry = keepalive_expiry + self._http1 = http1 + self._http2 = http2 + self._retries = retries + self._local_address = local_address + self._uds = uds + + self._network_backend: NetworkBackend = ( + SyncBackend() if network_backend is None else network_backend + ) + self._connection: ConnectionInterface | None = None + self._connect_failed: bool = False + self._request_lock = Lock() + self._socket_options = socket_options + + def handle_request(self, request: Request) -> Response: + if not self.can_handle_request(request.url.origin): + raise RuntimeError( + f"Attempted to send request to {request.url.origin} on connection to {self._origin}" + ) + + try: + with self._request_lock: + if self._connection is None: + stream = self._connect(request) + + ssl_object = stream.get_extra_info("ssl_object") + http2_negotiated = ( + ssl_object is not None + and ssl_object.selected_alpn_protocol() == "h2" + ) + if http2_negotiated or (self._http2 and not self._http1): + from .http2 import HTTP2Connection + + self._connection = HTTP2Connection( + origin=self._origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + else: + self._connection = HTTP11Connection( + origin=self._origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + except BaseException as exc: + self._connect_failed = True + raise exc + + return self._connection.handle_request(request) + + def _connect(self, request: Request) -> NetworkStream: + timeouts = request.extensions.get("timeout", {}) + sni_hostname = request.extensions.get("sni_hostname", None) + timeout = timeouts.get("connect", None) + + retries_left = self._retries + delays = exponential_backoff(factor=RETRIES_BACKOFF_FACTOR) + + while True: + try: + if self._uds is None: + kwargs = { + "host": self._origin.host.decode("ascii"), + "port": self._origin.port, + "local_address": self._local_address, + "timeout": timeout, + "socket_options": self._socket_options, + } + with Trace("connect_tcp", logger, request, kwargs) as trace: + stream = self._network_backend.connect_tcp(**kwargs) + trace.return_value = stream + else: + kwargs = { + "path": self._uds, + "timeout": timeout, + "socket_options": self._socket_options, + } + with Trace( + "connect_unix_socket", logger, request, kwargs + ) as trace: + stream = self._network_backend.connect_unix_socket( + **kwargs + ) + trace.return_value = stream + + if self._origin.scheme in (b"https", b"wss"): + ssl_context = ( + default_ssl_context() + if self._ssl_context is None + else self._ssl_context + ) + alpn_protocols = ["http/1.1", "h2"] if self._http2 else ["http/1.1"] + ssl_context.set_alpn_protocols(alpn_protocols) + + kwargs = { + "ssl_context": ssl_context, + "server_hostname": sni_hostname + or self._origin.host.decode("ascii"), + "timeout": timeout, + } + with Trace("start_tls", logger, request, kwargs) as trace: + stream = stream.start_tls(**kwargs) + trace.return_value = stream + return stream + except (ConnectError, ConnectTimeout): + if retries_left <= 0: + raise + retries_left -= 1 + delay = next(delays) + with Trace("retry", logger, request, kwargs) as trace: + self._network_backend.sleep(delay) + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._origin + + def close(self) -> None: + if self._connection is not None: + with Trace("close", logger, None, {}): + self._connection.close() + + def is_available(self) -> bool: + if self._connection is None: + # If HTTP/2 support is enabled, and the resulting connection could + # end up as HTTP/2 then we should indicate the connection as being + # available to service multiple requests. + return ( + self._http2 + and (self._origin.scheme == b"https" or not self._http1) + and not self._connect_failed + ) + return self._connection.is_available() + + def has_expired(self) -> bool: + if self._connection is None: + return self._connect_failed + return self._connection.has_expired() + + def is_idle(self) -> bool: + if self._connection is None: + return self._connect_failed + return self._connection.is_idle() + + def is_closed(self) -> bool: + if self._connection is None: + return self._connect_failed + return self._connection.is_closed() + + def info(self) -> str: + if self._connection is None: + return "CONNECTION FAILED" if self._connect_failed else "CONNECTING" + return self._connection.info() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.info()}]>" + + # These context managers are not used in the standard flow, but are + # useful for testing or working with connection instances directly. + + def __enter__(self) -> HTTPConnection: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + self.close() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/connection_pool.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/connection_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..9ccfa53e597a29ee387f9d16f3af4f695ac0d33a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/connection_pool.py @@ -0,0 +1,420 @@ +from __future__ import annotations + +import ssl +import sys +import types +import typing + +from .._backends.sync import SyncBackend +from .._backends.base import SOCKET_OPTION, NetworkBackend +from .._exceptions import ConnectionNotAvailable, UnsupportedProtocol +from .._models import Origin, Proxy, Request, Response +from .._synchronization import Event, ShieldCancellation, ThreadLock +from .connection import HTTPConnection +from .interfaces import ConnectionInterface, RequestInterface + + +class PoolRequest: + def __init__(self, request: Request) -> None: + self.request = request + self.connection: ConnectionInterface | None = None + self._connection_acquired = Event() + + def assign_to_connection(self, connection: ConnectionInterface | None) -> None: + self.connection = connection + self._connection_acquired.set() + + def clear_connection(self) -> None: + self.connection = None + self._connection_acquired = Event() + + def wait_for_connection( + self, timeout: float | None = None + ) -> ConnectionInterface: + if self.connection is None: + self._connection_acquired.wait(timeout=timeout) + assert self.connection is not None + return self.connection + + def is_queued(self) -> bool: + return self.connection is None + + +class ConnectionPool(RequestInterface): + """ + A connection pool for making HTTP requests. + """ + + def __init__( + self, + ssl_context: ssl.SSLContext | None = None, + proxy: Proxy | None = None, + max_connections: int | None = 10, + max_keepalive_connections: int | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + retries: int = 0, + local_address: str | None = None, + uds: str | None = None, + network_backend: NetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> None: + """ + A connection pool for making HTTP requests. + + Parameters: + ssl_context: An SSL context to use for verifying connections. + If not specified, the default `httpcore.default_ssl_context()` + will be used. + max_connections: The maximum number of concurrent HTTP connections that + the pool should allow. Any attempt to send a request on a pool that + would exceed this amount will block until a connection is available. + max_keepalive_connections: The maximum number of idle HTTP connections + that will be maintained in the pool. + keepalive_expiry: The duration in seconds that an idle HTTP connection + may be maintained for before being expired from the pool. + http1: A boolean indicating if HTTP/1.1 requests should be supported + by the connection pool. Defaults to True. + http2: A boolean indicating if HTTP/2 requests should be supported by + the connection pool. Defaults to False. + retries: The maximum number of retries when trying to establish a + connection. + local_address: Local address to connect from. Can also be used to connect + using a particular address family. Using `local_address="0.0.0.0"` + will connect using an `AF_INET` address (IPv4), while using + `local_address="::"` will connect using an `AF_INET6` address (IPv6). + uds: Path to a Unix Domain Socket to use instead of TCP sockets. + network_backend: A backend instance to use for handling network I/O. + socket_options: Socket options that have to be included + in the TCP socket when the connection was established. + """ + self._ssl_context = ssl_context + self._proxy = proxy + self._max_connections = ( + sys.maxsize if max_connections is None else max_connections + ) + self._max_keepalive_connections = ( + sys.maxsize + if max_keepalive_connections is None + else max_keepalive_connections + ) + self._max_keepalive_connections = min( + self._max_connections, self._max_keepalive_connections + ) + + self._keepalive_expiry = keepalive_expiry + self._http1 = http1 + self._http2 = http2 + self._retries = retries + self._local_address = local_address + self._uds = uds + + self._network_backend = ( + SyncBackend() if network_backend is None else network_backend + ) + self._socket_options = socket_options + + # The mutable state on a connection pool is the queue of incoming requests, + # and the set of connections that are servicing those requests. + self._connections: list[ConnectionInterface] = [] + self._requests: list[PoolRequest] = [] + + # We only mutate the state of the connection pool within an 'optional_thread_lock' + # context. This holds a threading lock unless we're running in async mode, + # in which case it is a no-op. + self._optional_thread_lock = ThreadLock() + + def create_connection(self, origin: Origin) -> ConnectionInterface: + if self._proxy is not None: + if self._proxy.url.scheme in (b"socks5", b"socks5h"): + from .socks_proxy import Socks5Connection + + return Socks5Connection( + proxy_origin=self._proxy.url.origin, + proxy_auth=self._proxy.auth, + remote_origin=origin, + ssl_context=self._ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + network_backend=self._network_backend, + ) + elif origin.scheme == b"http": + from .http_proxy import ForwardHTTPConnection + + return ForwardHTTPConnection( + proxy_origin=self._proxy.url.origin, + proxy_headers=self._proxy.headers, + proxy_ssl_context=self._proxy.ssl_context, + remote_origin=origin, + keepalive_expiry=self._keepalive_expiry, + network_backend=self._network_backend, + ) + from .http_proxy import TunnelHTTPConnection + + return TunnelHTTPConnection( + proxy_origin=self._proxy.url.origin, + proxy_headers=self._proxy.headers, + proxy_ssl_context=self._proxy.ssl_context, + remote_origin=origin, + ssl_context=self._ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + network_backend=self._network_backend, + ) + + return HTTPConnection( + origin=origin, + ssl_context=self._ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + retries=self._retries, + local_address=self._local_address, + uds=self._uds, + network_backend=self._network_backend, + socket_options=self._socket_options, + ) + + @property + def connections(self) -> list[ConnectionInterface]: + """ + Return a list of the connections currently in the pool. + + For example: + + ```python + >>> pool.connections + [ + , + , + , + ] + ``` + """ + return list(self._connections) + + def handle_request(self, request: Request) -> Response: + """ + Send an HTTP request, and return an HTTP response. + + This is the core implementation that is called into by `.request()` or `.stream()`. + """ + scheme = request.url.scheme.decode() + if scheme == "": + raise UnsupportedProtocol( + "Request URL is missing an 'http://' or 'https://' protocol." + ) + if scheme not in ("http", "https", "ws", "wss"): + raise UnsupportedProtocol( + f"Request URL has an unsupported protocol '{scheme}://'." + ) + + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("pool", None) + + with self._optional_thread_lock: + # Add the incoming request to our request queue. + pool_request = PoolRequest(request) + self._requests.append(pool_request) + + try: + while True: + with self._optional_thread_lock: + # Assign incoming requests to available connections, + # closing or creating new connections as required. + closing = self._assign_requests_to_connections() + self._close_connections(closing) + + # Wait until this request has an assigned connection. + connection = pool_request.wait_for_connection(timeout=timeout) + + try: + # Send the request on the assigned connection. + response = connection.handle_request( + pool_request.request + ) + except ConnectionNotAvailable: + # In some cases a connection may initially be available to + # handle a request, but then become unavailable. + # + # In this case we clear the connection and try again. + pool_request.clear_connection() + else: + break # pragma: nocover + + except BaseException as exc: + with self._optional_thread_lock: + # For any exception or cancellation we remove the request from + # the queue, and then re-assign requests to connections. + self._requests.remove(pool_request) + closing = self._assign_requests_to_connections() + + self._close_connections(closing) + raise exc from None + + # Return the response. Note that in this case we still have to manage + # the point at which the response is closed. + assert isinstance(response.stream, typing.Iterable) + return Response( + status=response.status, + headers=response.headers, + content=PoolByteStream( + stream=response.stream, pool_request=pool_request, pool=self + ), + extensions=response.extensions, + ) + + def _assign_requests_to_connections(self) -> list[ConnectionInterface]: + """ + Manage the state of the connection pool, assigning incoming + requests to connections as available. + + Called whenever a new request is added or removed from the pool. + + Any closing connections are returned, allowing the I/O for closing + those connections to be handled seperately. + """ + closing_connections = [] + + # First we handle cleaning up any connections that are closed, + # have expired their keep-alive, or surplus idle connections. + for connection in list(self._connections): + if connection.is_closed(): + # log: "removing closed connection" + self._connections.remove(connection) + elif connection.has_expired(): + # log: "closing expired connection" + self._connections.remove(connection) + closing_connections.append(connection) + elif ( + connection.is_idle() + and len([connection.is_idle() for connection in self._connections]) + > self._max_keepalive_connections + ): + # log: "closing idle connection" + self._connections.remove(connection) + closing_connections.append(connection) + + # Assign queued requests to connections. + queued_requests = [request for request in self._requests if request.is_queued()] + for pool_request in queued_requests: + origin = pool_request.request.url.origin + available_connections = [ + connection + for connection in self._connections + if connection.can_handle_request(origin) and connection.is_available() + ] + idle_connections = [ + connection for connection in self._connections if connection.is_idle() + ] + + # There are three cases for how we may be able to handle the request: + # + # 1. There is an existing connection that can handle the request. + # 2. We can create a new connection to handle the request. + # 3. We can close an idle connection and then create a new connection + # to handle the request. + if available_connections: + # log: "reusing existing connection" + connection = available_connections[0] + pool_request.assign_to_connection(connection) + elif len(self._connections) < self._max_connections: + # log: "creating new connection" + connection = self.create_connection(origin) + self._connections.append(connection) + pool_request.assign_to_connection(connection) + elif idle_connections: + # log: "closing idle connection" + connection = idle_connections[0] + self._connections.remove(connection) + closing_connections.append(connection) + # log: "creating new connection" + connection = self.create_connection(origin) + self._connections.append(connection) + pool_request.assign_to_connection(connection) + + return closing_connections + + def _close_connections(self, closing: list[ConnectionInterface]) -> None: + # Close connections which have been removed from the pool. + with ShieldCancellation(): + for connection in closing: + connection.close() + + def close(self) -> None: + # Explicitly close the connection pool. + # Clears all existing requests and connections. + with self._optional_thread_lock: + closing_connections = list(self._connections) + self._connections = [] + self._close_connections(closing_connections) + + def __enter__(self) -> ConnectionPool: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + self.close() + + def __repr__(self) -> str: + class_name = self.__class__.__name__ + with self._optional_thread_lock: + request_is_queued = [request.is_queued() for request in self._requests] + connection_is_idle = [ + connection.is_idle() for connection in self._connections + ] + + num_active_requests = request_is_queued.count(False) + num_queued_requests = request_is_queued.count(True) + num_active_connections = connection_is_idle.count(False) + num_idle_connections = connection_is_idle.count(True) + + requests_info = ( + f"Requests: {num_active_requests} active, {num_queued_requests} queued" + ) + connection_info = ( + f"Connections: {num_active_connections} active, {num_idle_connections} idle" + ) + + return f"<{class_name} [{requests_info} | {connection_info}]>" + + +class PoolByteStream: + def __init__( + self, + stream: typing.Iterable[bytes], + pool_request: PoolRequest, + pool: ConnectionPool, + ) -> None: + self._stream = stream + self._pool_request = pool_request + self._pool = pool + self._closed = False + + def __iter__(self) -> typing.Iterator[bytes]: + try: + for part in self._stream: + yield part + except BaseException as exc: + self.close() + raise exc from None + + def close(self) -> None: + if not self._closed: + self._closed = True + with ShieldCancellation(): + if hasattr(self._stream, "close"): + self._stream.close() + + with self._pool._optional_thread_lock: + self._pool._requests.remove(self._pool_request) + closing = self._pool._assign_requests_to_connections() + + self._pool._close_connections(closing) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http11.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http11.py new file mode 100644 index 0000000000000000000000000000000000000000..ebd3a97480c720d418acb1285a7b75da19b62c8c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http11.py @@ -0,0 +1,379 @@ +from __future__ import annotations + +import enum +import logging +import ssl +import time +import types +import typing + +import h11 + +from .._backends.base import NetworkStream +from .._exceptions import ( + ConnectionNotAvailable, + LocalProtocolError, + RemoteProtocolError, + WriteError, + map_exceptions, +) +from .._models import Origin, Request, Response +from .._synchronization import Lock, ShieldCancellation +from .._trace import Trace +from .interfaces import ConnectionInterface + +logger = logging.getLogger("httpcore.http11") + + +# A subset of `h11.Event` types supported by `_send_event` +H11SendEvent = typing.Union[ + h11.Request, + h11.Data, + h11.EndOfMessage, +] + + +class HTTPConnectionState(enum.IntEnum): + NEW = 0 + ACTIVE = 1 + IDLE = 2 + CLOSED = 3 + + +class HTTP11Connection(ConnectionInterface): + READ_NUM_BYTES = 64 * 1024 + MAX_INCOMPLETE_EVENT_SIZE = 100 * 1024 + + def __init__( + self, + origin: Origin, + stream: NetworkStream, + keepalive_expiry: float | None = None, + ) -> None: + self._origin = origin + self._network_stream = stream + self._keepalive_expiry: float | None = keepalive_expiry + self._expire_at: float | None = None + self._state = HTTPConnectionState.NEW + self._state_lock = Lock() + self._request_count = 0 + self._h11_state = h11.Connection( + our_role=h11.CLIENT, + max_incomplete_event_size=self.MAX_INCOMPLETE_EVENT_SIZE, + ) + + def handle_request(self, request: Request) -> Response: + if not self.can_handle_request(request.url.origin): + raise RuntimeError( + f"Attempted to send request to {request.url.origin} on connection " + f"to {self._origin}" + ) + + with self._state_lock: + if self._state in (HTTPConnectionState.NEW, HTTPConnectionState.IDLE): + self._request_count += 1 + self._state = HTTPConnectionState.ACTIVE + self._expire_at = None + else: + raise ConnectionNotAvailable() + + try: + kwargs = {"request": request} + try: + with Trace( + "send_request_headers", logger, request, kwargs + ) as trace: + self._send_request_headers(**kwargs) + with Trace("send_request_body", logger, request, kwargs) as trace: + self._send_request_body(**kwargs) + except WriteError: + # If we get a write error while we're writing the request, + # then we supress this error and move on to attempting to + # read the response. Servers can sometimes close the request + # pre-emptively and then respond with a well formed HTTP + # error response. + pass + + with Trace( + "receive_response_headers", logger, request, kwargs + ) as trace: + ( + http_version, + status, + reason_phrase, + headers, + trailing_data, + ) = self._receive_response_headers(**kwargs) + trace.return_value = ( + http_version, + status, + reason_phrase, + headers, + ) + + network_stream = self._network_stream + + # CONNECT or Upgrade request + if (status == 101) or ( + (request.method == b"CONNECT") and (200 <= status < 300) + ): + network_stream = HTTP11UpgradeStream(network_stream, trailing_data) + + return Response( + status=status, + headers=headers, + content=HTTP11ConnectionByteStream(self, request), + extensions={ + "http_version": http_version, + "reason_phrase": reason_phrase, + "network_stream": network_stream, + }, + ) + except BaseException as exc: + with ShieldCancellation(): + with Trace("response_closed", logger, request) as trace: + self._response_closed() + raise exc + + # Sending the request... + + def _send_request_headers(self, request: Request) -> None: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("write", None) + + with map_exceptions({h11.LocalProtocolError: LocalProtocolError}): + event = h11.Request( + method=request.method, + target=request.url.target, + headers=request.headers, + ) + self._send_event(event, timeout=timeout) + + def _send_request_body(self, request: Request) -> None: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("write", None) + + assert isinstance(request.stream, typing.Iterable) + for chunk in request.stream: + event = h11.Data(data=chunk) + self._send_event(event, timeout=timeout) + + self._send_event(h11.EndOfMessage(), timeout=timeout) + + def _send_event(self, event: h11.Event, timeout: float | None = None) -> None: + bytes_to_send = self._h11_state.send(event) + if bytes_to_send is not None: + self._network_stream.write(bytes_to_send, timeout=timeout) + + # Receiving the response... + + def _receive_response_headers( + self, request: Request + ) -> tuple[bytes, int, bytes, list[tuple[bytes, bytes]], bytes]: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("read", None) + + while True: + event = self._receive_event(timeout=timeout) + if isinstance(event, h11.Response): + break + if ( + isinstance(event, h11.InformationalResponse) + and event.status_code == 101 + ): + break + + http_version = b"HTTP/" + event.http_version + + # h11 version 0.11+ supports a `raw_items` interface to get the + # raw header casing, rather than the enforced lowercase headers. + headers = event.headers.raw_items() + + trailing_data, _ = self._h11_state.trailing_data + + return http_version, event.status_code, event.reason, headers, trailing_data + + def _receive_response_body( + self, request: Request + ) -> typing.Iterator[bytes]: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("read", None) + + while True: + event = self._receive_event(timeout=timeout) + if isinstance(event, h11.Data): + yield bytes(event.data) + elif isinstance(event, (h11.EndOfMessage, h11.PAUSED)): + break + + def _receive_event( + self, timeout: float | None = None + ) -> h11.Event | type[h11.PAUSED]: + while True: + with map_exceptions({h11.RemoteProtocolError: RemoteProtocolError}): + event = self._h11_state.next_event() + + if event is h11.NEED_DATA: + data = self._network_stream.read( + self.READ_NUM_BYTES, timeout=timeout + ) + + # If we feed this case through h11 we'll raise an exception like: + # + # httpcore.RemoteProtocolError: can't handle event type + # ConnectionClosed when role=SERVER and state=SEND_RESPONSE + # + # Which is accurate, but not very informative from an end-user + # perspective. Instead we handle this case distinctly and treat + # it as a ConnectError. + if data == b"" and self._h11_state.their_state == h11.SEND_RESPONSE: + msg = "Server disconnected without sending a response." + raise RemoteProtocolError(msg) + + self._h11_state.receive_data(data) + else: + # mypy fails to narrow the type in the above if statement above + return event # type: ignore[return-value] + + def _response_closed(self) -> None: + with self._state_lock: + if ( + self._h11_state.our_state is h11.DONE + and self._h11_state.their_state is h11.DONE + ): + self._state = HTTPConnectionState.IDLE + self._h11_state.start_next_cycle() + if self._keepalive_expiry is not None: + now = time.monotonic() + self._expire_at = now + self._keepalive_expiry + else: + self.close() + + # Once the connection is no longer required... + + def close(self) -> None: + # Note that this method unilaterally closes the connection, and does + # not have any kind of locking in place around it. + self._state = HTTPConnectionState.CLOSED + self._network_stream.close() + + # The ConnectionInterface methods provide information about the state of + # the connection, allowing for a connection pooling implementation to + # determine when to reuse and when to close the connection... + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._origin + + def is_available(self) -> bool: + # Note that HTTP/1.1 connections in the "NEW" state are not treated as + # being "available". The control flow which created the connection will + # be able to send an outgoing request, but the connection will not be + # acquired from the connection pool for any other request. + return self._state == HTTPConnectionState.IDLE + + def has_expired(self) -> bool: + now = time.monotonic() + keepalive_expired = self._expire_at is not None and now > self._expire_at + + # If the HTTP connection is idle but the socket is readable, then the + # only valid state is that the socket is about to return b"", indicating + # a server-initiated disconnect. + server_disconnected = ( + self._state == HTTPConnectionState.IDLE + and self._network_stream.get_extra_info("is_readable") + ) + + return keepalive_expired or server_disconnected + + def is_idle(self) -> bool: + return self._state == HTTPConnectionState.IDLE + + def is_closed(self) -> bool: + return self._state == HTTPConnectionState.CLOSED + + def info(self) -> str: + origin = str(self._origin) + return ( + f"{origin!r}, HTTP/1.1, {self._state.name}, " + f"Request Count: {self._request_count}" + ) + + def __repr__(self) -> str: + class_name = self.__class__.__name__ + origin = str(self._origin) + return ( + f"<{class_name} [{origin!r}, {self._state.name}, " + f"Request Count: {self._request_count}]>" + ) + + # These context managers are not used in the standard flow, but are + # useful for testing or working with connection instances directly. + + def __enter__(self) -> HTTP11Connection: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + self.close() + + +class HTTP11ConnectionByteStream: + def __init__(self, connection: HTTP11Connection, request: Request) -> None: + self._connection = connection + self._request = request + self._closed = False + + def __iter__(self) -> typing.Iterator[bytes]: + kwargs = {"request": self._request} + try: + with Trace("receive_response_body", logger, self._request, kwargs): + for chunk in self._connection._receive_response_body(**kwargs): + yield chunk + except BaseException as exc: + # If we get an exception while streaming the response, + # we want to close the response (and possibly the connection) + # before raising that exception. + with ShieldCancellation(): + self.close() + raise exc + + def close(self) -> None: + if not self._closed: + self._closed = True + with Trace("response_closed", logger, self._request): + self._connection._response_closed() + + +class HTTP11UpgradeStream(NetworkStream): + def __init__(self, stream: NetworkStream, leading_data: bytes) -> None: + self._stream = stream + self._leading_data = leading_data + + def read(self, max_bytes: int, timeout: float | None = None) -> bytes: + if self._leading_data: + buffer = self._leading_data[:max_bytes] + self._leading_data = self._leading_data[max_bytes:] + return buffer + else: + return self._stream.read(max_bytes, timeout) + + def write(self, buffer: bytes, timeout: float | None = None) -> None: + self._stream.write(buffer, timeout) + + def close(self) -> None: + self._stream.close() + + def start_tls( + self, + ssl_context: ssl.SSLContext, + server_hostname: str | None = None, + timeout: float | None = None, + ) -> NetworkStream: + return self._stream.start_tls(ssl_context, server_hostname, timeout) + + def get_extra_info(self, info: str) -> typing.Any: + return self._stream.get_extra_info(info) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http2.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http2.py new file mode 100644 index 0000000000000000000000000000000000000000..ca4dd724325cc7be23e667c2a4d46c63c5e78a80 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http2.py @@ -0,0 +1,583 @@ +from __future__ import annotations + +import enum +import logging +import time +import types +import typing + +import h2.config +import h2.connection +import h2.events +import h2.exceptions +import h2.settings + +from .._backends.base import NetworkStream +from .._exceptions import ( + ConnectionNotAvailable, + LocalProtocolError, + RemoteProtocolError, +) +from .._models import Origin, Request, Response +from .._synchronization import Lock, Semaphore, ShieldCancellation +from .._trace import Trace +from .interfaces import ConnectionInterface + +logger = logging.getLogger("httpcore.http2") + + +def has_body_headers(request: Request) -> bool: + return any( + k.lower() == b"content-length" or k.lower() == b"transfer-encoding" + for k, v in request.headers + ) + + +class HTTPConnectionState(enum.IntEnum): + ACTIVE = 1 + IDLE = 2 + CLOSED = 3 + + +class HTTP2Connection(ConnectionInterface): + READ_NUM_BYTES = 64 * 1024 + CONFIG = h2.config.H2Configuration(validate_inbound_headers=False) + + def __init__( + self, + origin: Origin, + stream: NetworkStream, + keepalive_expiry: float | None = None, + ): + self._origin = origin + self._network_stream = stream + self._keepalive_expiry: float | None = keepalive_expiry + self._h2_state = h2.connection.H2Connection(config=self.CONFIG) + self._state = HTTPConnectionState.IDLE + self._expire_at: float | None = None + self._request_count = 0 + self._init_lock = Lock() + self._state_lock = Lock() + self._read_lock = Lock() + self._write_lock = Lock() + self._sent_connection_init = False + self._used_all_stream_ids = False + self._connection_error = False + + # Mapping from stream ID to response stream events. + self._events: dict[ + int, + h2.events.ResponseReceived + | h2.events.DataReceived + | h2.events.StreamEnded + | h2.events.StreamReset, + ] = {} + + # Connection terminated events are stored as state since + # we need to handle them for all streams. + self._connection_terminated: h2.events.ConnectionTerminated | None = None + + self._read_exception: Exception | None = None + self._write_exception: Exception | None = None + + def handle_request(self, request: Request) -> Response: + if not self.can_handle_request(request.url.origin): + # This cannot occur in normal operation, since the connection pool + # will only send requests on connections that handle them. + # It's in place simply for resilience as a guard against incorrect + # usage, for anyone working directly with httpcore connections. + raise RuntimeError( + f"Attempted to send request to {request.url.origin} on connection " + f"to {self._origin}" + ) + + with self._state_lock: + if self._state in (HTTPConnectionState.ACTIVE, HTTPConnectionState.IDLE): + self._request_count += 1 + self._expire_at = None + self._state = HTTPConnectionState.ACTIVE + else: + raise ConnectionNotAvailable() + + with self._init_lock: + if not self._sent_connection_init: + try: + kwargs = {"request": request} + with Trace("send_connection_init", logger, request, kwargs): + self._send_connection_init(**kwargs) + except BaseException as exc: + with ShieldCancellation(): + self.close() + raise exc + + self._sent_connection_init = True + + # Initially start with just 1 until the remote server provides + # its max_concurrent_streams value + self._max_streams = 1 + + local_settings_max_streams = ( + self._h2_state.local_settings.max_concurrent_streams + ) + self._max_streams_semaphore = Semaphore(local_settings_max_streams) + + for _ in range(local_settings_max_streams - self._max_streams): + self._max_streams_semaphore.acquire() + + self._max_streams_semaphore.acquire() + + try: + stream_id = self._h2_state.get_next_available_stream_id() + self._events[stream_id] = [] + except h2.exceptions.NoAvailableStreamIDError: # pragma: nocover + self._used_all_stream_ids = True + self._request_count -= 1 + raise ConnectionNotAvailable() + + try: + kwargs = {"request": request, "stream_id": stream_id} + with Trace("send_request_headers", logger, request, kwargs): + self._send_request_headers(request=request, stream_id=stream_id) + with Trace("send_request_body", logger, request, kwargs): + self._send_request_body(request=request, stream_id=stream_id) + with Trace( + "receive_response_headers", logger, request, kwargs + ) as trace: + status, headers = self._receive_response( + request=request, stream_id=stream_id + ) + trace.return_value = (status, headers) + + return Response( + status=status, + headers=headers, + content=HTTP2ConnectionByteStream(self, request, stream_id=stream_id), + extensions={ + "http_version": b"HTTP/2", + "network_stream": self._network_stream, + "stream_id": stream_id, + }, + ) + except BaseException as exc: # noqa: PIE786 + with ShieldCancellation(): + kwargs = {"stream_id": stream_id} + with Trace("response_closed", logger, request, kwargs): + self._response_closed(stream_id=stream_id) + + if isinstance(exc, h2.exceptions.ProtocolError): + # One case where h2 can raise a protocol error is when a + # closed frame has been seen by the state machine. + # + # This happens when one stream is reading, and encounters + # a GOAWAY event. Other flows of control may then raise + # a protocol error at any point they interact with the 'h2_state'. + # + # In this case we'll have stored the event, and should raise + # it as a RemoteProtocolError. + if self._connection_terminated: # pragma: nocover + raise RemoteProtocolError(self._connection_terminated) + # If h2 raises a protocol error in some other state then we + # must somehow have made a protocol violation. + raise LocalProtocolError(exc) # pragma: nocover + + raise exc + + def _send_connection_init(self, request: Request) -> None: + """ + The HTTP/2 connection requires some initial setup before we can start + using individual request/response streams on it. + """ + # Need to set these manually here instead of manipulating via + # __setitem__() otherwise the H2Connection will emit SettingsUpdate + # frames in addition to sending the undesired defaults. + self._h2_state.local_settings = h2.settings.Settings( + client=True, + initial_values={ + # Disable PUSH_PROMISE frames from the server since we don't do anything + # with them for now. Maybe when we support caching? + h2.settings.SettingCodes.ENABLE_PUSH: 0, + # These two are taken from h2 for safe defaults + h2.settings.SettingCodes.MAX_CONCURRENT_STREAMS: 100, + h2.settings.SettingCodes.MAX_HEADER_LIST_SIZE: 65536, + }, + ) + + # Some websites (*cough* Yahoo *cough*) balk at this setting being + # present in the initial handshake since it's not defined in the original + # RFC despite the RFC mandating ignoring settings you don't know about. + del self._h2_state.local_settings[ + h2.settings.SettingCodes.ENABLE_CONNECT_PROTOCOL + ] + + self._h2_state.initiate_connection() + self._h2_state.increment_flow_control_window(2**24) + self._write_outgoing_data(request) + + # Sending the request... + + def _send_request_headers(self, request: Request, stream_id: int) -> None: + """ + Send the request headers to a given stream ID. + """ + end_stream = not has_body_headers(request) + + # In HTTP/2 the ':authority' pseudo-header is used instead of 'Host'. + # In order to gracefully handle HTTP/1.1 and HTTP/2 we always require + # HTTP/1.1 style headers, and map them appropriately if we end up on + # an HTTP/2 connection. + authority = [v for k, v in request.headers if k.lower() == b"host"][0] + + headers = [ + (b":method", request.method), + (b":authority", authority), + (b":scheme", request.url.scheme), + (b":path", request.url.target), + ] + [ + (k.lower(), v) + for k, v in request.headers + if k.lower() + not in ( + b"host", + b"transfer-encoding", + ) + ] + + self._h2_state.send_headers(stream_id, headers, end_stream=end_stream) + self._h2_state.increment_flow_control_window(2**24, stream_id=stream_id) + self._write_outgoing_data(request) + + def _send_request_body(self, request: Request, stream_id: int) -> None: + """ + Iterate over the request body sending it to a given stream ID. + """ + if not has_body_headers(request): + return + + assert isinstance(request.stream, typing.Iterable) + for data in request.stream: + self._send_stream_data(request, stream_id, data) + self._send_end_stream(request, stream_id) + + def _send_stream_data( + self, request: Request, stream_id: int, data: bytes + ) -> None: + """ + Send a single chunk of data in one or more data frames. + """ + while data: + max_flow = self._wait_for_outgoing_flow(request, stream_id) + chunk_size = min(len(data), max_flow) + chunk, data = data[:chunk_size], data[chunk_size:] + self._h2_state.send_data(stream_id, chunk) + self._write_outgoing_data(request) + + def _send_end_stream(self, request: Request, stream_id: int) -> None: + """ + Send an empty data frame on on a given stream ID with the END_STREAM flag set. + """ + self._h2_state.end_stream(stream_id) + self._write_outgoing_data(request) + + # Receiving the response... + + def _receive_response( + self, request: Request, stream_id: int + ) -> tuple[int, list[tuple[bytes, bytes]]]: + """ + Return the response status code and headers for a given stream ID. + """ + while True: + event = self._receive_stream_event(request, stream_id) + if isinstance(event, h2.events.ResponseReceived): + break + + status_code = 200 + headers = [] + for k, v in event.headers: + if k == b":status": + status_code = int(v.decode("ascii", errors="ignore")) + elif not k.startswith(b":"): + headers.append((k, v)) + + return (status_code, headers) + + def _receive_response_body( + self, request: Request, stream_id: int + ) -> typing.Iterator[bytes]: + """ + Iterator that returns the bytes of the response body for a given stream ID. + """ + while True: + event = self._receive_stream_event(request, stream_id) + if isinstance(event, h2.events.DataReceived): + amount = event.flow_controlled_length + self._h2_state.acknowledge_received_data(amount, stream_id) + self._write_outgoing_data(request) + yield event.data + elif isinstance(event, h2.events.StreamEnded): + break + + def _receive_stream_event( + self, request: Request, stream_id: int + ) -> h2.events.ResponseReceived | h2.events.DataReceived | h2.events.StreamEnded: + """ + Return the next available event for a given stream ID. + + Will read more data from the network if required. + """ + while not self._events.get(stream_id): + self._receive_events(request, stream_id) + event = self._events[stream_id].pop(0) + if isinstance(event, h2.events.StreamReset): + raise RemoteProtocolError(event) + return event + + def _receive_events( + self, request: Request, stream_id: int | None = None + ) -> None: + """ + Read some data from the network until we see one or more events + for a given stream ID. + """ + with self._read_lock: + if self._connection_terminated is not None: + last_stream_id = self._connection_terminated.last_stream_id + if stream_id and last_stream_id and stream_id > last_stream_id: + self._request_count -= 1 + raise ConnectionNotAvailable() + raise RemoteProtocolError(self._connection_terminated) + + # This conditional is a bit icky. We don't want to block reading if we've + # actually got an event to return for a given stream. We need to do that + # check *within* the atomic read lock. Though it also need to be optional, + # because when we call it from `_wait_for_outgoing_flow` we *do* want to + # block until we've available flow control, event when we have events + # pending for the stream ID we're attempting to send on. + if stream_id is None or not self._events.get(stream_id): + events = self._read_incoming_data(request) + for event in events: + if isinstance(event, h2.events.RemoteSettingsChanged): + with Trace( + "receive_remote_settings", logger, request + ) as trace: + self._receive_remote_settings_change(event) + trace.return_value = event + + elif isinstance( + event, + ( + h2.events.ResponseReceived, + h2.events.DataReceived, + h2.events.StreamEnded, + h2.events.StreamReset, + ), + ): + if event.stream_id in self._events: + self._events[event.stream_id].append(event) + + elif isinstance(event, h2.events.ConnectionTerminated): + self._connection_terminated = event + + self._write_outgoing_data(request) + + def _receive_remote_settings_change(self, event: h2.events.Event) -> None: + max_concurrent_streams = event.changed_settings.get( + h2.settings.SettingCodes.MAX_CONCURRENT_STREAMS + ) + if max_concurrent_streams: + new_max_streams = min( + max_concurrent_streams.new_value, + self._h2_state.local_settings.max_concurrent_streams, + ) + if new_max_streams and new_max_streams != self._max_streams: + while new_max_streams > self._max_streams: + self._max_streams_semaphore.release() + self._max_streams += 1 + while new_max_streams < self._max_streams: + self._max_streams_semaphore.acquire() + self._max_streams -= 1 + + def _response_closed(self, stream_id: int) -> None: + self._max_streams_semaphore.release() + del self._events[stream_id] + with self._state_lock: + if self._connection_terminated and not self._events: + self.close() + + elif self._state == HTTPConnectionState.ACTIVE and not self._events: + self._state = HTTPConnectionState.IDLE + if self._keepalive_expiry is not None: + now = time.monotonic() + self._expire_at = now + self._keepalive_expiry + if self._used_all_stream_ids: # pragma: nocover + self.close() + + def close(self) -> None: + # Note that this method unilaterally closes the connection, and does + # not have any kind of locking in place around it. + self._h2_state.close_connection() + self._state = HTTPConnectionState.CLOSED + self._network_stream.close() + + # Wrappers around network read/write operations... + + def _read_incoming_data(self, request: Request) -> list[h2.events.Event]: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("read", None) + + if self._read_exception is not None: + raise self._read_exception # pragma: nocover + + try: + data = self._network_stream.read(self.READ_NUM_BYTES, timeout) + if data == b"": + raise RemoteProtocolError("Server disconnected") + except Exception as exc: + # If we get a network error we should: + # + # 1. Save the exception and just raise it immediately on any future reads. + # (For example, this means that a single read timeout or disconnect will + # immediately close all pending streams. Without requiring multiple + # sequential timeouts.) + # 2. Mark the connection as errored, so that we don't accept any other + # incoming requests. + self._read_exception = exc + self._connection_error = True + raise exc + + events: list[h2.events.Event] = self._h2_state.receive_data(data) + + return events + + def _write_outgoing_data(self, request: Request) -> None: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("write", None) + + with self._write_lock: + data_to_send = self._h2_state.data_to_send() + + if self._write_exception is not None: + raise self._write_exception # pragma: nocover + + try: + self._network_stream.write(data_to_send, timeout) + except Exception as exc: # pragma: nocover + # If we get a network error we should: + # + # 1. Save the exception and just raise it immediately on any future write. + # (For example, this means that a single write timeout or disconnect will + # immediately close all pending streams. Without requiring multiple + # sequential timeouts.) + # 2. Mark the connection as errored, so that we don't accept any other + # incoming requests. + self._write_exception = exc + self._connection_error = True + raise exc + + # Flow control... + + def _wait_for_outgoing_flow(self, request: Request, stream_id: int) -> int: + """ + Returns the maximum allowable outgoing flow for a given stream. + + If the allowable flow is zero, then waits on the network until + WindowUpdated frames have increased the flow rate. + https://tools.ietf.org/html/rfc7540#section-6.9 + """ + local_flow: int = self._h2_state.local_flow_control_window(stream_id) + max_frame_size: int = self._h2_state.max_outbound_frame_size + flow = min(local_flow, max_frame_size) + while flow == 0: + self._receive_events(request) + local_flow = self._h2_state.local_flow_control_window(stream_id) + max_frame_size = self._h2_state.max_outbound_frame_size + flow = min(local_flow, max_frame_size) + return flow + + # Interface for connection pooling... + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._origin + + def is_available(self) -> bool: + return ( + self._state != HTTPConnectionState.CLOSED + and not self._connection_error + and not self._used_all_stream_ids + and not ( + self._h2_state.state_machine.state + == h2.connection.ConnectionState.CLOSED + ) + ) + + def has_expired(self) -> bool: + now = time.monotonic() + return self._expire_at is not None and now > self._expire_at + + def is_idle(self) -> bool: + return self._state == HTTPConnectionState.IDLE + + def is_closed(self) -> bool: + return self._state == HTTPConnectionState.CLOSED + + def info(self) -> str: + origin = str(self._origin) + return ( + f"{origin!r}, HTTP/2, {self._state.name}, " + f"Request Count: {self._request_count}" + ) + + def __repr__(self) -> str: + class_name = self.__class__.__name__ + origin = str(self._origin) + return ( + f"<{class_name} [{origin!r}, {self._state.name}, " + f"Request Count: {self._request_count}]>" + ) + + # These context managers are not used in the standard flow, but are + # useful for testing or working with connection instances directly. + + def __enter__(self) -> HTTP2Connection: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + traceback: types.TracebackType | None = None, + ) -> None: + self.close() + + +class HTTP2ConnectionByteStream: + def __init__( + self, connection: HTTP2Connection, request: Request, stream_id: int + ) -> None: + self._connection = connection + self._request = request + self._stream_id = stream_id + self._closed = False + + def __iter__(self) -> typing.Iterator[bytes]: + kwargs = {"request": self._request, "stream_id": self._stream_id} + try: + with Trace("receive_response_body", logger, self._request, kwargs): + for chunk in self._connection._receive_response_body( + request=self._request, stream_id=self._stream_id + ): + yield chunk + except BaseException as exc: + # If we get an exception while streaming the response, + # we want to close the response (and possibly the connection) + # before raising that exception. + with ShieldCancellation(): + self.close() + raise exc + + def close(self) -> None: + if not self._closed: + self._closed = True + kwargs = {"stream_id": self._stream_id} + with Trace("response_closed", logger, self._request, kwargs): + self._connection._response_closed(stream_id=self._stream_id) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http_proxy.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http_proxy.py new file mode 100644 index 0000000000000000000000000000000000000000..ecca88f7dc93b78f2aa26f16cf29d17a8a83ae27 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/http_proxy.py @@ -0,0 +1,367 @@ +from __future__ import annotations + +import base64 +import logging +import ssl +import typing + +from .._backends.base import SOCKET_OPTION, NetworkBackend +from .._exceptions import ProxyError +from .._models import ( + URL, + Origin, + Request, + Response, + enforce_bytes, + enforce_headers, + enforce_url, +) +from .._ssl import default_ssl_context +from .._synchronization import Lock +from .._trace import Trace +from .connection import HTTPConnection +from .connection_pool import ConnectionPool +from .http11 import HTTP11Connection +from .interfaces import ConnectionInterface + +ByteOrStr = typing.Union[bytes, str] +HeadersAsSequence = typing.Sequence[typing.Tuple[ByteOrStr, ByteOrStr]] +HeadersAsMapping = typing.Mapping[ByteOrStr, ByteOrStr] + + +logger = logging.getLogger("httpcore.proxy") + + +def merge_headers( + default_headers: typing.Sequence[tuple[bytes, bytes]] | None = None, + override_headers: typing.Sequence[tuple[bytes, bytes]] | None = None, +) -> list[tuple[bytes, bytes]]: + """ + Append default_headers and override_headers, de-duplicating if a key exists + in both cases. + """ + default_headers = [] if default_headers is None else list(default_headers) + override_headers = [] if override_headers is None else list(override_headers) + has_override = set(key.lower() for key, value in override_headers) + default_headers = [ + (key, value) + for key, value in default_headers + if key.lower() not in has_override + ] + return default_headers + override_headers + + +class HTTPProxy(ConnectionPool): # pragma: nocover + """ + A connection pool that sends requests via an HTTP proxy. + """ + + def __init__( + self, + proxy_url: URL | bytes | str, + proxy_auth: tuple[bytes | str, bytes | str] | None = None, + proxy_headers: HeadersAsMapping | HeadersAsSequence | None = None, + ssl_context: ssl.SSLContext | None = None, + proxy_ssl_context: ssl.SSLContext | None = None, + max_connections: int | None = 10, + max_keepalive_connections: int | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + retries: int = 0, + local_address: str | None = None, + uds: str | None = None, + network_backend: NetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> None: + """ + A connection pool for making HTTP requests. + + Parameters: + proxy_url: The URL to use when connecting to the proxy server. + For example `"http://127.0.0.1:8080/"`. + proxy_auth: Any proxy authentication as a two-tuple of + (username, password). May be either bytes or ascii-only str. + proxy_headers: Any HTTP headers to use for the proxy requests. + For example `{"Proxy-Authorization": "Basic :"}`. + ssl_context: An SSL context to use for verifying connections. + If not specified, the default `httpcore.default_ssl_context()` + will be used. + proxy_ssl_context: The same as `ssl_context`, but for a proxy server rather than a remote origin. + max_connections: The maximum number of concurrent HTTP connections that + the pool should allow. Any attempt to send a request on a pool that + would exceed this amount will block until a connection is available. + max_keepalive_connections: The maximum number of idle HTTP connections + that will be maintained in the pool. + keepalive_expiry: The duration in seconds that an idle HTTP connection + may be maintained for before being expired from the pool. + http1: A boolean indicating if HTTP/1.1 requests should be supported + by the connection pool. Defaults to True. + http2: A boolean indicating if HTTP/2 requests should be supported by + the connection pool. Defaults to False. + retries: The maximum number of retries when trying to establish + a connection. + local_address: Local address to connect from. Can also be used to + connect using a particular address family. Using + `local_address="0.0.0.0"` will connect using an `AF_INET` address + (IPv4), while using `local_address="::"` will connect using an + `AF_INET6` address (IPv6). + uds: Path to a Unix Domain Socket to use instead of TCP sockets. + network_backend: A backend instance to use for handling network I/O. + """ + super().__init__( + ssl_context=ssl_context, + max_connections=max_connections, + max_keepalive_connections=max_keepalive_connections, + keepalive_expiry=keepalive_expiry, + http1=http1, + http2=http2, + network_backend=network_backend, + retries=retries, + local_address=local_address, + uds=uds, + socket_options=socket_options, + ) + + self._proxy_url = enforce_url(proxy_url, name="proxy_url") + if ( + self._proxy_url.scheme == b"http" and proxy_ssl_context is not None + ): # pragma: no cover + raise RuntimeError( + "The `proxy_ssl_context` argument is not allowed for the http scheme" + ) + + self._ssl_context = ssl_context + self._proxy_ssl_context = proxy_ssl_context + self._proxy_headers = enforce_headers(proxy_headers, name="proxy_headers") + if proxy_auth is not None: + username = enforce_bytes(proxy_auth[0], name="proxy_auth") + password = enforce_bytes(proxy_auth[1], name="proxy_auth") + userpass = username + b":" + password + authorization = b"Basic " + base64.b64encode(userpass) + self._proxy_headers = [ + (b"Proxy-Authorization", authorization) + ] + self._proxy_headers + + def create_connection(self, origin: Origin) -> ConnectionInterface: + if origin.scheme == b"http": + return ForwardHTTPConnection( + proxy_origin=self._proxy_url.origin, + proxy_headers=self._proxy_headers, + remote_origin=origin, + keepalive_expiry=self._keepalive_expiry, + network_backend=self._network_backend, + proxy_ssl_context=self._proxy_ssl_context, + ) + return TunnelHTTPConnection( + proxy_origin=self._proxy_url.origin, + proxy_headers=self._proxy_headers, + remote_origin=origin, + ssl_context=self._ssl_context, + proxy_ssl_context=self._proxy_ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + network_backend=self._network_backend, + ) + + +class ForwardHTTPConnection(ConnectionInterface): + def __init__( + self, + proxy_origin: Origin, + remote_origin: Origin, + proxy_headers: HeadersAsMapping | HeadersAsSequence | None = None, + keepalive_expiry: float | None = None, + network_backend: NetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + proxy_ssl_context: ssl.SSLContext | None = None, + ) -> None: + self._connection = HTTPConnection( + origin=proxy_origin, + keepalive_expiry=keepalive_expiry, + network_backend=network_backend, + socket_options=socket_options, + ssl_context=proxy_ssl_context, + ) + self._proxy_origin = proxy_origin + self._proxy_headers = enforce_headers(proxy_headers, name="proxy_headers") + self._remote_origin = remote_origin + + def handle_request(self, request: Request) -> Response: + headers = merge_headers(self._proxy_headers, request.headers) + url = URL( + scheme=self._proxy_origin.scheme, + host=self._proxy_origin.host, + port=self._proxy_origin.port, + target=bytes(request.url), + ) + proxy_request = Request( + method=request.method, + url=url, + headers=headers, + content=request.stream, + extensions=request.extensions, + ) + return self._connection.handle_request(proxy_request) + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._remote_origin + + def close(self) -> None: + self._connection.close() + + def info(self) -> str: + return self._connection.info() + + def is_available(self) -> bool: + return self._connection.is_available() + + def has_expired(self) -> bool: + return self._connection.has_expired() + + def is_idle(self) -> bool: + return self._connection.is_idle() + + def is_closed(self) -> bool: + return self._connection.is_closed() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.info()}]>" + + +class TunnelHTTPConnection(ConnectionInterface): + def __init__( + self, + proxy_origin: Origin, + remote_origin: Origin, + ssl_context: ssl.SSLContext | None = None, + proxy_ssl_context: ssl.SSLContext | None = None, + proxy_headers: typing.Sequence[tuple[bytes, bytes]] | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + network_backend: NetworkBackend | None = None, + socket_options: typing.Iterable[SOCKET_OPTION] | None = None, + ) -> None: + self._connection: ConnectionInterface = HTTPConnection( + origin=proxy_origin, + keepalive_expiry=keepalive_expiry, + network_backend=network_backend, + socket_options=socket_options, + ssl_context=proxy_ssl_context, + ) + self._proxy_origin = proxy_origin + self._remote_origin = remote_origin + self._ssl_context = ssl_context + self._proxy_ssl_context = proxy_ssl_context + self._proxy_headers = enforce_headers(proxy_headers, name="proxy_headers") + self._keepalive_expiry = keepalive_expiry + self._http1 = http1 + self._http2 = http2 + self._connect_lock = Lock() + self._connected = False + + def handle_request(self, request: Request) -> Response: + timeouts = request.extensions.get("timeout", {}) + timeout = timeouts.get("connect", None) + + with self._connect_lock: + if not self._connected: + target = b"%b:%d" % (self._remote_origin.host, self._remote_origin.port) + + connect_url = URL( + scheme=self._proxy_origin.scheme, + host=self._proxy_origin.host, + port=self._proxy_origin.port, + target=target, + ) + connect_headers = merge_headers( + [(b"Host", target), (b"Accept", b"*/*")], self._proxy_headers + ) + connect_request = Request( + method=b"CONNECT", + url=connect_url, + headers=connect_headers, + extensions=request.extensions, + ) + connect_response = self._connection.handle_request( + connect_request + ) + + if connect_response.status < 200 or connect_response.status > 299: + reason_bytes = connect_response.extensions.get("reason_phrase", b"") + reason_str = reason_bytes.decode("ascii", errors="ignore") + msg = "%d %s" % (connect_response.status, reason_str) + self._connection.close() + raise ProxyError(msg) + + stream = connect_response.extensions["network_stream"] + + # Upgrade the stream to SSL + ssl_context = ( + default_ssl_context() + if self._ssl_context is None + else self._ssl_context + ) + alpn_protocols = ["http/1.1", "h2"] if self._http2 else ["http/1.1"] + ssl_context.set_alpn_protocols(alpn_protocols) + + kwargs = { + "ssl_context": ssl_context, + "server_hostname": self._remote_origin.host.decode("ascii"), + "timeout": timeout, + } + with Trace("start_tls", logger, request, kwargs) as trace: + stream = stream.start_tls(**kwargs) + trace.return_value = stream + + # Determine if we should be using HTTP/1.1 or HTTP/2 + ssl_object = stream.get_extra_info("ssl_object") + http2_negotiated = ( + ssl_object is not None + and ssl_object.selected_alpn_protocol() == "h2" + ) + + # Create the HTTP/1.1 or HTTP/2 connection + if http2_negotiated or (self._http2 and not self._http1): + from .http2 import HTTP2Connection + + self._connection = HTTP2Connection( + origin=self._remote_origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + else: + self._connection = HTTP11Connection( + origin=self._remote_origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + + self._connected = True + return self._connection.handle_request(request) + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._remote_origin + + def close(self) -> None: + self._connection.close() + + def info(self) -> str: + return self._connection.info() + + def is_available(self) -> bool: + return self._connection.is_available() + + def has_expired(self) -> bool: + return self._connection.has_expired() + + def is_idle(self) -> bool: + return self._connection.is_idle() + + def is_closed(self) -> bool: + return self._connection.is_closed() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.info()}]>" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/interfaces.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/interfaces.py new file mode 100644 index 0000000000000000000000000000000000000000..e673d4cc1b1dd7e7ecdbde91fd6ada386c3de03f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/interfaces.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +import contextlib +import typing + +from .._models import ( + URL, + Extensions, + HeaderTypes, + Origin, + Request, + Response, + enforce_bytes, + enforce_headers, + enforce_url, + include_request_headers, +) + + +class RequestInterface: + def request( + self, + method: bytes | str, + url: URL | bytes | str, + *, + headers: HeaderTypes = None, + content: bytes | typing.Iterator[bytes] | None = None, + extensions: Extensions | None = None, + ) -> Response: + # Strict type checking on our parameters. + method = enforce_bytes(method, name="method") + url = enforce_url(url, name="url") + headers = enforce_headers(headers, name="headers") + + # Include Host header, and optionally Content-Length or Transfer-Encoding. + headers = include_request_headers(headers, url=url, content=content) + + request = Request( + method=method, + url=url, + headers=headers, + content=content, + extensions=extensions, + ) + response = self.handle_request(request) + try: + response.read() + finally: + response.close() + return response + + @contextlib.contextmanager + def stream( + self, + method: bytes | str, + url: URL | bytes | str, + *, + headers: HeaderTypes = None, + content: bytes | typing.Iterator[bytes] | None = None, + extensions: Extensions | None = None, + ) -> typing.Iterator[Response]: + # Strict type checking on our parameters. + method = enforce_bytes(method, name="method") + url = enforce_url(url, name="url") + headers = enforce_headers(headers, name="headers") + + # Include Host header, and optionally Content-Length or Transfer-Encoding. + headers = include_request_headers(headers, url=url, content=content) + + request = Request( + method=method, + url=url, + headers=headers, + content=content, + extensions=extensions, + ) + response = self.handle_request(request) + try: + yield response + finally: + response.close() + + def handle_request(self, request: Request) -> Response: + raise NotImplementedError() # pragma: nocover + + +class ConnectionInterface(RequestInterface): + def close(self) -> None: + raise NotImplementedError() # pragma: nocover + + def info(self) -> str: + raise NotImplementedError() # pragma: nocover + + def can_handle_request(self, origin: Origin) -> bool: + raise NotImplementedError() # pragma: nocover + + def is_available(self) -> bool: + """ + Return `True` if the connection is currently able to accept an + outgoing request. + + An HTTP/1.1 connection will only be available if it is currently idle. + + An HTTP/2 connection will be available so long as the stream ID space is + not yet exhausted, and the connection is not in an error state. + + While the connection is being established we may not yet know if it is going + to result in an HTTP/1.1 or HTTP/2 connection. The connection should be + treated as being available, but might ultimately raise `NewConnectionRequired` + required exceptions if multiple requests are attempted over a connection + that ends up being established as HTTP/1.1. + """ + raise NotImplementedError() # pragma: nocover + + def has_expired(self) -> bool: + """ + Return `True` if the connection is in a state where it should be closed. + + This either means that the connection is idle and it has passed the + expiry time on its keep-alive, or that server has sent an EOF. + """ + raise NotImplementedError() # pragma: nocover + + def is_idle(self) -> bool: + """ + Return `True` if the connection is currently idle. + """ + raise NotImplementedError() # pragma: nocover + + def is_closed(self) -> bool: + """ + Return `True` if the connection has been closed. + + Used when a response is closed to determine if the connection may be + returned to the connection pool or not. + """ + raise NotImplementedError() # pragma: nocover diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/socks_proxy.py b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/socks_proxy.py new file mode 100644 index 0000000000000000000000000000000000000000..0ca96ddfb580b19413797f41e79f7abcecdd9d79 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpcore/_sync/socks_proxy.py @@ -0,0 +1,341 @@ +from __future__ import annotations + +import logging +import ssl + +import socksio + +from .._backends.sync import SyncBackend +from .._backends.base import NetworkBackend, NetworkStream +from .._exceptions import ConnectionNotAvailable, ProxyError +from .._models import URL, Origin, Request, Response, enforce_bytes, enforce_url +from .._ssl import default_ssl_context +from .._synchronization import Lock +from .._trace import Trace +from .connection_pool import ConnectionPool +from .http11 import HTTP11Connection +from .interfaces import ConnectionInterface + +logger = logging.getLogger("httpcore.socks") + + +AUTH_METHODS = { + b"\x00": "NO AUTHENTICATION REQUIRED", + b"\x01": "GSSAPI", + b"\x02": "USERNAME/PASSWORD", + b"\xff": "NO ACCEPTABLE METHODS", +} + +REPLY_CODES = { + b"\x00": "Succeeded", + b"\x01": "General SOCKS server failure", + b"\x02": "Connection not allowed by ruleset", + b"\x03": "Network unreachable", + b"\x04": "Host unreachable", + b"\x05": "Connection refused", + b"\x06": "TTL expired", + b"\x07": "Command not supported", + b"\x08": "Address type not supported", +} + + +def _init_socks5_connection( + stream: NetworkStream, + *, + host: bytes, + port: int, + auth: tuple[bytes, bytes] | None = None, +) -> None: + conn = socksio.socks5.SOCKS5Connection() + + # Auth method request + auth_method = ( + socksio.socks5.SOCKS5AuthMethod.NO_AUTH_REQUIRED + if auth is None + else socksio.socks5.SOCKS5AuthMethod.USERNAME_PASSWORD + ) + conn.send(socksio.socks5.SOCKS5AuthMethodsRequest([auth_method])) + outgoing_bytes = conn.data_to_send() + stream.write(outgoing_bytes) + + # Auth method response + incoming_bytes = stream.read(max_bytes=4096) + response = conn.receive_data(incoming_bytes) + assert isinstance(response, socksio.socks5.SOCKS5AuthReply) + if response.method != auth_method: + requested = AUTH_METHODS.get(auth_method, "UNKNOWN") + responded = AUTH_METHODS.get(response.method, "UNKNOWN") + raise ProxyError( + f"Requested {requested} from proxy server, but got {responded}." + ) + + if response.method == socksio.socks5.SOCKS5AuthMethod.USERNAME_PASSWORD: + # Username/password request + assert auth is not None + username, password = auth + conn.send(socksio.socks5.SOCKS5UsernamePasswordRequest(username, password)) + outgoing_bytes = conn.data_to_send() + stream.write(outgoing_bytes) + + # Username/password response + incoming_bytes = stream.read(max_bytes=4096) + response = conn.receive_data(incoming_bytes) + assert isinstance(response, socksio.socks5.SOCKS5UsernamePasswordReply) + if not response.success: + raise ProxyError("Invalid username/password") + + # Connect request + conn.send( + socksio.socks5.SOCKS5CommandRequest.from_address( + socksio.socks5.SOCKS5Command.CONNECT, (host, port) + ) + ) + outgoing_bytes = conn.data_to_send() + stream.write(outgoing_bytes) + + # Connect response + incoming_bytes = stream.read(max_bytes=4096) + response = conn.receive_data(incoming_bytes) + assert isinstance(response, socksio.socks5.SOCKS5Reply) + if response.reply_code != socksio.socks5.SOCKS5ReplyCode.SUCCEEDED: + reply_code = REPLY_CODES.get(response.reply_code, "UNKOWN") + raise ProxyError(f"Proxy Server could not connect: {reply_code}.") + + +class SOCKSProxy(ConnectionPool): # pragma: nocover + """ + A connection pool that sends requests via an HTTP proxy. + """ + + def __init__( + self, + proxy_url: URL | bytes | str, + proxy_auth: tuple[bytes | str, bytes | str] | None = None, + ssl_context: ssl.SSLContext | None = None, + max_connections: int | None = 10, + max_keepalive_connections: int | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + retries: int = 0, + network_backend: NetworkBackend | None = None, + ) -> None: + """ + A connection pool for making HTTP requests. + + Parameters: + proxy_url: The URL to use when connecting to the proxy server. + For example `"http://127.0.0.1:8080/"`. + ssl_context: An SSL context to use for verifying connections. + If not specified, the default `httpcore.default_ssl_context()` + will be used. + max_connections: The maximum number of concurrent HTTP connections that + the pool should allow. Any attempt to send a request on a pool that + would exceed this amount will block until a connection is available. + max_keepalive_connections: The maximum number of idle HTTP connections + that will be maintained in the pool. + keepalive_expiry: The duration in seconds that an idle HTTP connection + may be maintained for before being expired from the pool. + http1: A boolean indicating if HTTP/1.1 requests should be supported + by the connection pool. Defaults to True. + http2: A boolean indicating if HTTP/2 requests should be supported by + the connection pool. Defaults to False. + retries: The maximum number of retries when trying to establish + a connection. + local_address: Local address to connect from. Can also be used to + connect using a particular address family. Using + `local_address="0.0.0.0"` will connect using an `AF_INET` address + (IPv4), while using `local_address="::"` will connect using an + `AF_INET6` address (IPv6). + uds: Path to a Unix Domain Socket to use instead of TCP sockets. + network_backend: A backend instance to use for handling network I/O. + """ + super().__init__( + ssl_context=ssl_context, + max_connections=max_connections, + max_keepalive_connections=max_keepalive_connections, + keepalive_expiry=keepalive_expiry, + http1=http1, + http2=http2, + network_backend=network_backend, + retries=retries, + ) + self._ssl_context = ssl_context + self._proxy_url = enforce_url(proxy_url, name="proxy_url") + if proxy_auth is not None: + username, password = proxy_auth + username_bytes = enforce_bytes(username, name="proxy_auth") + password_bytes = enforce_bytes(password, name="proxy_auth") + self._proxy_auth: tuple[bytes, bytes] | None = ( + username_bytes, + password_bytes, + ) + else: + self._proxy_auth = None + + def create_connection(self, origin: Origin) -> ConnectionInterface: + return Socks5Connection( + proxy_origin=self._proxy_url.origin, + remote_origin=origin, + proxy_auth=self._proxy_auth, + ssl_context=self._ssl_context, + keepalive_expiry=self._keepalive_expiry, + http1=self._http1, + http2=self._http2, + network_backend=self._network_backend, + ) + + +class Socks5Connection(ConnectionInterface): + def __init__( + self, + proxy_origin: Origin, + remote_origin: Origin, + proxy_auth: tuple[bytes, bytes] | None = None, + ssl_context: ssl.SSLContext | None = None, + keepalive_expiry: float | None = None, + http1: bool = True, + http2: bool = False, + network_backend: NetworkBackend | None = None, + ) -> None: + self._proxy_origin = proxy_origin + self._remote_origin = remote_origin + self._proxy_auth = proxy_auth + self._ssl_context = ssl_context + self._keepalive_expiry = keepalive_expiry + self._http1 = http1 + self._http2 = http2 + + self._network_backend: NetworkBackend = ( + SyncBackend() if network_backend is None else network_backend + ) + self._connect_lock = Lock() + self._connection: ConnectionInterface | None = None + self._connect_failed = False + + def handle_request(self, request: Request) -> Response: + timeouts = request.extensions.get("timeout", {}) + sni_hostname = request.extensions.get("sni_hostname", None) + timeout = timeouts.get("connect", None) + + with self._connect_lock: + if self._connection is None: + try: + # Connect to the proxy + kwargs = { + "host": self._proxy_origin.host.decode("ascii"), + "port": self._proxy_origin.port, + "timeout": timeout, + } + with Trace("connect_tcp", logger, request, kwargs) as trace: + stream = self._network_backend.connect_tcp(**kwargs) + trace.return_value = stream + + # Connect to the remote host using socks5 + kwargs = { + "stream": stream, + "host": self._remote_origin.host.decode("ascii"), + "port": self._remote_origin.port, + "auth": self._proxy_auth, + } + with Trace( + "setup_socks5_connection", logger, request, kwargs + ) as trace: + _init_socks5_connection(**kwargs) + trace.return_value = stream + + # Upgrade the stream to SSL + if self._remote_origin.scheme == b"https": + ssl_context = ( + default_ssl_context() + if self._ssl_context is None + else self._ssl_context + ) + alpn_protocols = ( + ["http/1.1", "h2"] if self._http2 else ["http/1.1"] + ) + ssl_context.set_alpn_protocols(alpn_protocols) + + kwargs = { + "ssl_context": ssl_context, + "server_hostname": sni_hostname + or self._remote_origin.host.decode("ascii"), + "timeout": timeout, + } + with Trace("start_tls", logger, request, kwargs) as trace: + stream = stream.start_tls(**kwargs) + trace.return_value = stream + + # Determine if we should be using HTTP/1.1 or HTTP/2 + ssl_object = stream.get_extra_info("ssl_object") + http2_negotiated = ( + ssl_object is not None + and ssl_object.selected_alpn_protocol() == "h2" + ) + + # Create the HTTP/1.1 or HTTP/2 connection + if http2_negotiated or ( + self._http2 and not self._http1 + ): # pragma: nocover + from .http2 import HTTP2Connection + + self._connection = HTTP2Connection( + origin=self._remote_origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + else: + self._connection = HTTP11Connection( + origin=self._remote_origin, + stream=stream, + keepalive_expiry=self._keepalive_expiry, + ) + except Exception as exc: + self._connect_failed = True + raise exc + elif not self._connection.is_available(): # pragma: nocover + raise ConnectionNotAvailable() + + return self._connection.handle_request(request) + + def can_handle_request(self, origin: Origin) -> bool: + return origin == self._remote_origin + + def close(self) -> None: + if self._connection is not None: + self._connection.close() + + def is_available(self) -> bool: + if self._connection is None: # pragma: nocover + # If HTTP/2 support is enabled, and the resulting connection could + # end up as HTTP/2 then we should indicate the connection as being + # available to service multiple requests. + return ( + self._http2 + and (self._remote_origin.scheme == b"https" or not self._http1) + and not self._connect_failed + ) + return self._connection.is_available() + + def has_expired(self) -> bool: + if self._connection is None: # pragma: nocover + return self._connect_failed + return self._connection.has_expired() + + def is_idle(self) -> bool: + if self._connection is None: # pragma: nocover + return self._connect_failed + return self._connection.is_idle() + + def is_closed(self) -> bool: + if self._connection is None: # pragma: nocover + return self._connect_failed + return self._connection.is_closed() + + def info(self) -> str: + if self._connection is None: # pragma: nocover + return "CONNECTION FAILED" if self._connect_failed else "CONNECTING" + return self._connection.info() + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.info()}]>" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/nvidia_cufile-1.15.1.6.dist-info/licenses/License.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/nvidia_cufile-1.15.1.6.dist-info/licenses/License.txt new file mode 100644 index 0000000000000000000000000000000000000000..b491c70e0aef319022ded661e111ddbd45b8a17f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/nvidia_cufile-1.15.1.6.dist-info/licenses/License.txt @@ -0,0 +1,1568 @@ +End User License Agreement +-------------------------- + + +Preface +------- + +The Software License Agreement in Chapter 1 and the Supplement +in Chapter 2 contain license terms and conditions that govern +the use of NVIDIA software. By accepting this agreement, you +agree to comply with all the terms and conditions applicable +to the product(s) included herein. + + +NVIDIA Driver + + +Description + +This package contains the operating system driver and +fundamental system software components for NVIDIA GPUs. + + +NVIDIA CUDA Toolkit + + +Description + +The NVIDIA CUDA Toolkit provides command-line and graphical +tools for building, debugging and optimizing the performance +of applications accelerated by NVIDIA GPUs, runtime and math +libraries, and documentation including programming guides, +user manuals, and API references. + + +Default Install Location of CUDA Toolkit + +Windows platform: + +%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.# + +Linux platform: + +/usr/local/cuda-#.# + +Mac platform: + +/Developer/NVIDIA/CUDA-#.# + + +NVIDIA CUDA Samples + + +Description + +This package includes over 100+ CUDA examples that demonstrate +various CUDA programming principles, and efficient CUDA +implementation of algorithms in specific application domains. + + +Default Install Location of CUDA Samples + +Windows platform: + +%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.# + +Linux platform: + +/usr/local/cuda-#.#/samples + +and + +$HOME/NVIDIA_CUDA-#.#_Samples + +Mac platform: + +/Developer/NVIDIA/CUDA-#.#/samples + + +NVIDIA Nsight Visual Studio Edition (Windows only) + + +Description + +NVIDIA Nsight Development Platform, Visual Studio Edition is a +development environment integrated into Microsoft Visual +Studio that provides tools for debugging, profiling, analyzing +and optimizing your GPU computing and graphics applications. + + +Default Install Location of Nsight Visual Studio Edition + +Windows platform: + +%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.# + + +1. License Agreement for NVIDIA Software Development Kits +--------------------------------------------------------- + + +Release Date: July 26, 2018 +--------------------------- + + +Important NoticeRead before downloading, installing, +copying or using the licensed software: +------------------------------------------------------- + +This license agreement, including exhibits attached +("Agreement”) is a legal agreement between you and NVIDIA +Corporation ("NVIDIA") and governs your use of a NVIDIA +software development kit (“SDK”). + +Each SDK has its own set of software and materials, but here +is a description of the types of items that may be included in +a SDK: source code, header files, APIs, data sets and assets +(examples include images, textures, models, scenes, videos, +native API input/output files), binary software, sample code, +libraries, utility programs, programming code and +documentation. + +This Agreement can be accepted only by an adult of legal age +of majority in the country in which the SDK is used. + +If you are entering into this Agreement on behalf of a company +or other legal entity, you represent that you have the legal +authority to bind the entity to this Agreement, in which case +“you” will mean the entity you represent. + +If you don’t have the required age or authority to accept +this Agreement, or if you don’t accept all the terms and +conditions of this Agreement, do not download, install or use +the SDK. + +You agree to use the SDK only for purposes that are permitted +by (a) this Agreement, and (b) any applicable law, regulation +or generally accepted practices or guidelines in the relevant +jurisdictions. + + +1.1. License + + +1.1.1. License Grant + +Subject to the terms of this Agreement, NVIDIA hereby grants +you a non-exclusive, non-transferable license, without the +right to sublicense (except as expressly provided in this +Agreement) to: + + 1. Install and use the SDK, + + 2. Modify and create derivative works of sample source code + delivered in the SDK, and + + 3. Distribute those portions of the SDK that are identified + in this Agreement as distributable, as incorporated in + object code format into a software application that meets + the distribution requirements indicated in this Agreement. + + +1.1.2. Distribution Requirements + +These are the distribution requirements for you to exercise +the distribution grant: + + 1. Your application must have material additional + functionality, beyond the included portions of the SDK. + + 2. The distributable portions of the SDK shall only be + accessed by your application. + + 3. The following notice shall be included in modifications + and derivative works of sample source code distributed: + “This software contains source code provided by NVIDIA + Corporation.” + + 4. Unless a developer tool is identified in this Agreement + as distributable, it is delivered for your internal use + only. + + 5. The terms under which you distribute your application + must be consistent with the terms of this Agreement, + including (without limitation) terms relating to the + license grant and license restrictions and protection of + NVIDIA’s intellectual property rights. Additionally, you + agree that you will protect the privacy, security and + legal rights of your application users. + + 6. You agree to notify NVIDIA in writing of any known or + suspected distribution or use of the SDK not in compliance + with the requirements of this Agreement, and to enforce + the terms of your agreements with respect to distributed + SDK. + + +1.1.3. Authorized Users + +You may allow employees and contractors of your entity or of +your subsidiary(ies) to access and use the SDK from your +secure network to perform work on your behalf. + +If you are an academic institution you may allow users +enrolled or employed by the academic institution to access and +use the SDK from your secure network. + +You are responsible for the compliance with the terms of this +Agreement by your authorized users. If you become aware that +your authorized users didn’t follow the terms of this +Agreement, you agree to take reasonable steps to resolve the +non-compliance and prevent new occurrences. + + +1.1.4. Pre-Release SDK + +The SDK versions identified as alpha, beta, preview or +otherwise as pre-release, may not be fully functional, may +contain errors or design flaws, and may have reduced or +different security, privacy, accessibility, availability, and +reliability standards relative to commercial versions of +NVIDIA software and materials. Use of a pre-release SDK may +result in unexpected results, loss of data, project delays or +other unpredictable damage or loss. + +You may use a pre-release SDK at your own risk, understanding +that pre-release SDKs are not intended for use in production +or business-critical systems. + +NVIDIA may choose not to make available a commercial version +of any pre-release SDK. NVIDIA may also choose to abandon +development and terminate the availability of a pre-release +SDK at any time without liability. + + +1.1.5. Updates + +NVIDIA may, at its option, make available patches, workarounds +or other updates to this SDK. Unless the updates are provided +with their separate governing terms, they are deemed part of +the SDK licensed to you as provided in this Agreement. You +agree that the form and content of the SDK that NVIDIA +provides may change without prior notice to you. While NVIDIA +generally maintains compatibility between versions, NVIDIA may +in some cases make changes that introduce incompatibilities in +future versions of the SDK. + + +1.1.6. Third Party Licenses + +The SDK may come bundled with, or otherwise include or be +distributed with, third party software licensed by a NVIDIA +supplier and/or open source software provided under an open +source license. Use of third party software is subject to the +third-party license terms, or in the absence of third party +terms, the terms of this Agreement. Copyright to third party +software is held by the copyright holders indicated in the +third-party software or license. + + +1.1.7. Reservation of Rights + +NVIDIA reserves all rights, title, and interest in and to the +SDK, not expressly granted to you under this Agreement. + + +1.2. Limitations + +The following license limitations apply to your use of the +SDK: + + 1. You may not reverse engineer, decompile or disassemble, + or remove copyright or other proprietary notices from any + portion of the SDK or copies of the SDK. + + 2. Except as expressly provided in this Agreement, you may + not copy, sell, rent, sublicense, transfer, distribute, + modify, or create derivative works of any portion of the + SDK. For clarity, you may not distribute or sublicense the + SDK as a stand-alone product. + + 3. Unless you have an agreement with NVIDIA for this + purpose, you may not indicate that an application created + with the SDK is sponsored or endorsed by NVIDIA. + + 4. You may not bypass, disable, or circumvent any + encryption, security, digital rights management or + authentication mechanism in the SDK. + + 5. You may not use the SDK in any manner that would cause it + to become subject to an open source software license. As + examples, licenses that require as a condition of use, + modification, and/or distribution that the SDK be: + + a. Disclosed or distributed in source code form; + + b. Licensed for the purpose of making derivative works; + or + + c. Redistributable at no charge. + + 6. Unless you have an agreement with NVIDIA for this + purpose, you may not use the SDK with any system or + application where the use or failure of the system or + application can reasonably be expected to threaten or + result in personal injury, death, or catastrophic loss. + Examples include use in avionics, navigation, military, + medical, life support or other life critical applications. + NVIDIA does not design, test or manufacture the SDK for + these critical uses and NVIDIA shall not be liable to you + or any third party, in whole or in part, for any claims or + damages arising from such uses. + + 7. You agree to defend, indemnify and hold harmless NVIDIA + and its affiliates, and their respective employees, + contractors, agents, officers and directors, from and + against any and all claims, damages, obligations, losses, + liabilities, costs or debt, fines, restitutions and + expenses (including but not limited to attorney’s fees + and costs incident to establishing the right of + indemnification) arising out of or related to your use of + the SDK outside of the scope of this Agreement, or not in + compliance with its terms. + + +1.3. Ownership + + 1. NVIDIA or its licensors hold all rights, title and + interest in and to the SDK and its modifications and + derivative works, including their respective intellectual + property rights, subject to your rights described in this + section. This SDK may include software and materials from + NVIDIA’s licensors, and these licensors are intended + third party beneficiaries that may enforce this Agreement + with respect to their intellectual property rights. + + 2. You hold all rights, title and interest in and to your + applications and your derivative works of the sample + source code delivered in the SDK, including their + respective intellectual property rights, subject to + NVIDIA’s rights described in this section. + + 3. You may, but don’t have to, provide to NVIDIA + suggestions, feature requests or other feedback regarding + the SDK, including possible enhancements or modifications + to the SDK. For any feedback that you voluntarily provide, + you hereby grant NVIDIA and its affiliates a perpetual, + non-exclusive, worldwide, irrevocable license to use, + reproduce, modify, license, sublicense (through multiple + tiers of sublicensees), and distribute (through multiple + tiers of distributors) it without the payment of any + royalties or fees to you. NVIDIA will use feedback at its + choice. NVIDIA is constantly looking for ways to improve + its products, so you may send feedback to NVIDIA through + the developer portal at https://developer.nvidia.com. + + +1.4. No Warranties + +THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL +FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND +ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND +OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, +BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE +ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO +WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF +DEALING OR COURSE OF TRADE. + + +1.5. Limitation of Liability + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS +AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, +PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS +OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF +PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION +WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK, +WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH +OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF +LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES +TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS +AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE +NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS +LIMIT. + +These exclusions and limitations of liability shall apply +regardless if NVIDIA or its affiliates have been advised of +the possibility of such damages, and regardless of whether a +remedy fails its essential purpose. These exclusions and +limitations of liability form an essential basis of the +bargain between the parties, and, absent any of these +exclusions or limitations of liability, the provisions of this +Agreement, including, without limitation, the economic terms, +would be substantially different. + + +1.6. Termination + + 1. This Agreement will continue to apply until terminated by + either you or NVIDIA as described below. + + 2. If you want to terminate this Agreement, you may do so by + stopping to use the SDK. + + 3. NVIDIA may, at any time, terminate this Agreement if: + + a. (i) you fail to comply with any term of this + Agreement and the non-compliance is not fixed within + thirty (30) days following notice from NVIDIA (or + immediately if you violate NVIDIA’s intellectual + property rights); + + b. (ii) you commence or participate in any legal + proceeding against NVIDIA with respect to the SDK; or + + c. (iii) NVIDIA decides to no longer provide the SDK in + a country or, in NVIDIA’s sole discretion, the + continued use of it is no longer commercially viable. + + 4. Upon any termination of this Agreement, you agree to + promptly discontinue use of the SDK and destroy all copies + in your possession or control. Your prior distributions in + accordance with this Agreement are not affected by the + termination of this Agreement. Upon written request, you + will certify in writing that you have complied with your + commitments under this section. Upon any termination of + this Agreement all provisions survive except for the + license grant provisions. + + +1.7. General + +If you wish to assign this Agreement or your rights and +obligations, including by merger, consolidation, dissolution +or operation of law, contact NVIDIA to ask for permission. Any +attempted assignment not approved by NVIDIA in writing shall +be void and of no effect. NVIDIA may assign, delegate or +transfer this Agreement and its rights and obligations, and if +to a non-affiliate you will be notified. + +You agree to cooperate with NVIDIA and provide reasonably +requested information to verify your compliance with this +Agreement. + +This Agreement will be governed in all respects by the laws of +the United States and of the State of Delaware as those laws +are applied to contracts entered into and performed entirely +within Delaware by Delaware residents, without regard to the +conflicts of laws principles. The United Nations Convention on +Contracts for the International Sale of Goods is specifically +disclaimed. You agree to all terms of this Agreement in the +English language. + +The state or federal courts residing in Santa Clara County, +California shall have exclusive jurisdiction over any dispute +or claim arising out of this Agreement. Notwithstanding this, +you agree that NVIDIA shall still be allowed to apply for +injunctive remedies or an equivalent type of urgent legal +relief in any jurisdiction. + +If any court of competent jurisdiction determines that any +provision of this Agreement is illegal, invalid or +unenforceable, such provision will be construed as limited to +the extent necessary to be consistent with and fully +enforceable under the law and the remaining provisions will +remain in full force and effect. Unless otherwise specified, +remedies are cumulative. + +Each party acknowledges and agrees that the other is an +independent contractor in the performance of this Agreement. + +The SDK has been developed entirely at private expense and is +“commercial items” consisting of “commercial computer +software” and “commercial computer software +documentation” provided with RESTRICTED RIGHTS. Use, +duplication or disclosure by the U.S. Government or a U.S. +Government subcontractor is subject to the restrictions in +this Agreement pursuant to DFARS 227.7202-3(a) or as set forth +in subparagraphs (c)(1) and (2) of the Commercial Computer +Software - Restricted Rights clause at FAR 52.227-19, as +applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas +Expressway, Santa Clara, CA 95051. + +The SDK is subject to United States export laws and +regulations. You agree that you will not ship, transfer or +export the SDK into any country, or use the SDK in any manner, +prohibited by the United States Bureau of Industry and +Security or economic sanctions regulations administered by the +U.S. Department of Treasury’s Office of Foreign Assets +Control (OFAC), or any applicable export laws, restrictions or +regulations. These laws include restrictions on destinations, +end users and end use. By accepting this Agreement, you +confirm that you are not a resident or citizen of any country +currently embargoed by the U.S. and that you are not otherwise +prohibited from receiving the SDK. + +Any notice delivered by NVIDIA to you under this Agreement +will be delivered via mail, email or fax. You agree that any +notices that NVIDIA sends you electronically will satisfy any +legal communication requirements. Please direct your legal +notices or other correspondence to NVIDIA Corporation, 2788 +San Tomas Expressway, Santa Clara, California 95051, United +States of America, Attention: Legal Department. + +This Agreement and any exhibits incorporated into this +Agreement constitute the entire agreement of the parties with +respect to the subject matter of this Agreement and supersede +all prior negotiations or documentation exchanged between the +parties relating to this SDK license. Any additional and/or +conflicting terms on documents issued by you are null, void, +and invalid. Any amendment or waiver under this Agreement +shall be in writing and signed by representatives of both +parties. + + +2. CUDA Toolkit Supplement to Software License Agreement for +NVIDIA Software Development Kits +------------------------------------------------------------ + + +Release date: August 16, 2018 +----------------------------- + +The terms in this supplement govern your use of the NVIDIA +CUDA Toolkit SDK under the terms of your license agreement +(“Agreement”) as modified by this supplement. Capitalized +terms used but not defined below have the meaning assigned to +them in the Agreement. + +This supplement is an exhibit to the Agreement and is +incorporated as an integral part of the Agreement. In the +event of conflict between the terms in this supplement and the +terms in the Agreement, the terms in this supplement govern. + + +2.1. License Scope + +The SDK is licensed for you to develop applications only for +use in systems with NVIDIA GPUs. + + +2.2. Distribution + +The portions of the SDK that are distributable under the +Agreement are listed in Attachment A. + + +2.3. Operating Systems + +Those portions of the SDK designed exclusively for use on the +Linux or FreeBSD operating systems, or other operating systems +derived from the source code to these operating systems, may +be copied and redistributed for use in accordance with this +Agreement, provided that the object code files are not +modified in any way (except for unzipping of compressed +files). + + +2.4. Audio and Video Encoders and Decoders + +You acknowledge and agree that it is your sole responsibility +to obtain any additional third-party licenses required to +make, have made, use, have used, sell, import, and offer for +sale your products or services that include or incorporate any +third-party software and content relating to audio and/or +video encoders and decoders from, including but not limited +to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., +MPEG-LA, and Coding Technologies. NVIDIA does not grant to you +under this Agreement any necessary patent or other rights with +respect to any audio and/or video encoders and decoders. + + +2.5. Licensing + +If the distribution terms in this Agreement are not suitable +for your organization, or for any questions regarding this +Agreement, please contact NVIDIA at +nvidia-compute-license-questions@nvidia.com. + + +2.6. Attachment A + +The following portions of the SDK are distributable under the +Agreement: + +Component + +CUDA Runtime + +Windows + +cudart.dll, cudart_static.lib, cudadevrt.lib + +Mac OSX + +libcudart.dylib, libcudart_static.a, libcudadevrt.a + +Linux + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Android + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Component + +CUDA FFT Library + +Windows + +cufft.dll, cufftw.dll, cufft.lib, cufftw.lib + +Mac OSX + +libcufft.dylib, libcufft_static.a, libcufftw.dylib, +libcufftw_static.a + +Linux + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Android + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Component + +CUDA BLAS Library + +Windows + +cublas.dll, cublasLt.dll + +Mac OSX + +libcublas.dylib, libcublasLt.dylib, libcublas_static.a, +libcublasLt_static.a + +Linux + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Android + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Component + +NVIDIA "Drop-in" BLAS Library + +Windows + +nvblas.dll + +Mac OSX + +libnvblas.dylib + +Linux + +libnvblas.so + +Component + +CUDA Sparse Matrix Library + +Windows + +cusparse.dll, cusparse.lib + +Mac OSX + +libcusparse.dylib, libcusparse_static.a + +Linux + +libcusparse.so, libcusparse_static.a + +Android + +libcusparse.so, libcusparse_static.a + +Component + +CUDA Linear Solver Library + +Windows + +cusolver.dll, cusolver.lib + +Mac OSX + +libcusolver.dylib, libcusolver_static.a + +Linux + +libcusolver.so, libcusolver_static.a + +Android + +libcusolver.so, libcusolver_static.a + +Component + +CUDA Random Number Generation Library + +Windows + +curand.dll, curand.lib + +Mac OSX + +libcurand.dylib, libcurand_static.a + +Linux + +libcurand.so, libcurand_static.a + +Android + +libcurand.so, libcurand_static.a + +Component + +CUDA Accelerated Graph Library + +Component + +NVIDIA Performance Primitives Library + +Windows + +nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll, +nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll, +nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib, +nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll, +nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib + +Mac OSX + +libnppc.dylib, libnppc_static.a, libnppial.dylib, +libnppial_static.a, libnppicc.dylib, libnppicc_static.a, +libnppicom.dylib, libnppicom_static.a, libnppidei.dylib, +libnppidei_static.a, libnppif.dylib, libnppif_static.a, +libnppig.dylib, libnppig_static.a, libnppim.dylib, +libnppisu_static.a, libnppitc.dylib, libnppitc_static.a, +libnpps.dylib, libnpps_static.a + +Linux + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Android + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Component + +NVIDIA JPEG Library + +Linux + +libnvjpeg.so, libnvjpeg_static.a + +Component + +Internal common library required for statically linking to +cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP + +Mac OSX + +libculibos.a + +Linux + +libculibos.a + +Component + +NVIDIA Runtime Compilation Library and Header + +All + +nvrtc.h + +Windows + +nvrtc.dll, nvrtc-builtins.dll + +Mac OSX + +libnvrtc.dylib, libnvrtc-builtins.dylib + +Linux + +libnvrtc.so, libnvrtc-builtins.so + +Component + +NVIDIA Optimizing Compiler Library + +Windows + +nvvm.dll + +Mac OSX + +libnvvm.dylib + +Linux + +libnvvm.so + +Component + +NVIDIA Common Device Math Functions Library + +Windows + +libdevice.10.bc + +Mac OSX + +libdevice.10.bc + +Linux + +libdevice.10.bc + +Component + +CUDA Occupancy Calculation Header Library + +All + +cuda_occupancy.h + +Component + +CUDA Half Precision Headers + +All + +cuda_fp16.h, cuda_fp16.hpp + +Component + +CUDA Profiling Tools Interface (CUPTI) Library + +Windows + +cupti.dll + +Mac OSX + +libcupti.dylib + +Linux + +libcupti.so + +Component + +NVIDIA Tools Extension Library + +Windows + +nvToolsExt.dll, nvToolsExt.lib + +Mac OSX + +libnvToolsExt.dylib + +Linux + +libnvToolsExt.so + +Component + +NVIDIA CUDA Driver Libraries + +Linux + +libcuda.so, libnvidia-fatbinaryloader.so, +libnvidia-ptxjitcompiler.so + +The NVIDIA CUDA Driver Libraries are only distributable in +applications that meet this criteria: + + 1. The application was developed starting from a NVIDIA CUDA + container obtained from Docker Hub or the NVIDIA GPU + Cloud, and + + 2. The resulting application is packaged as a Docker + container and distributed to users on Docker Hub or the + NVIDIA GPU Cloud only. + + +2.7. Attachment B + + +Additional Licensing Obligations + +The following third party components included in the SOFTWARE +are licensed to Licensee pursuant to the following terms and +conditions: + + 1. Licensee's use of the GDB third party component is + subject to the terms and conditions of GNU GPL v3: + + This product includes copyrighted third-party software licensed + under the terms of the GNU General Public License v3 ("GPL v3"). + All third-party software packages are copyright by their respective + authors. GPL v3 terms and conditions are hereby incorporated into + the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt + + Consistent with these licensing requirements, the software + listed below is provided under the terms of the specified + open source software licenses. To obtain source code for + software provided under licenses that require + redistribution of source code, including the GNU General + Public License (GPL) and GNU Lesser General Public License + (LGPL), contact oss-requests@nvidia.com. This offer is + valid for a period of three (3) years from the date of the + distribution of this product by NVIDIA CORPORATION. + + Component License + CUDA-GDB GPL v3 + + 2. Licensee represents and warrants that any and all third + party licensing and/or royalty payment obligations in + connection with Licensee's use of the H.264 video codecs + are solely the responsibility of Licensee. + + 3. Licensee's use of the Thrust library is subject to the + terms and conditions of the Apache License Version 2.0. + All third-party software packages are copyright by their + respective authors. Apache License Version 2.0 terms and + conditions are hereby incorporated into the Agreement by + this reference. + http://www.apache.org/licenses/LICENSE-2.0.html + + In addition, Licensee acknowledges the following notice: + Thrust includes source code from the Boost Iterator, + Tuple, System, and Random Number libraries. + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 4. Licensee's use of the LLVM third party component is + subject to the following terms and conditions: + + ====================================================== + LLVM Release License + ====================================================== + University of Illinois/NCSA + Open Source License + + Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. + All rights reserved. + + Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal with the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at Urbana- + Champaign, nor the names of its contributors may be used to endorse or + promote products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS WITH THE SOFTWARE. + + 5. Licensee's use (e.g. nvprof) of the PCRE third party + component is subject to the following terms and + conditions: + + ------------ + PCRE LICENCE + ------------ + PCRE is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. + Release 8 of PCRE is distributed under the terms of the "BSD" licence, as + specified below. The documentation for PCRE, supplied in the "doc" + directory, is distributed under the same terms as the software itself. The + basic library functions are written in C and are freestanding. Also + included in the distribution is a set of C++ wrapper functions, and a just- + in-time compiler that can be used to optimize pattern matching. These are + both optional features that can be omitted when the library is built. + + THE BASIC LIBRARY FUNCTIONS + --------------------------- + Written by: Philip Hazel + Email local part: ph10 + Email domain: cam.ac.uk + University of Cambridge Computing Service, + Cambridge, England. + Copyright (c) 1997-2012 University of Cambridge + All rights reserved. + + PCRE JUST-IN-TIME COMPILATION SUPPORT + ------------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2010-2012 Zoltan Herczeg + All rights reserved. + + STACK-LESS JUST-IN-TIME COMPILER + -------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2009-2012 Zoltan Herczeg + All rights reserved. + + THE C++ WRAPPER FUNCTIONS + ------------------------- + Contributed by: Google Inc. + Copyright (c) 2007-2012, Google Inc. + All rights reserved. + + THE "BSD" LICENCE + ----------------- + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 6. Some of the cuBLAS library routines were written by or + derived from code written by Vasily Volkov and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2007-2009, Regents of the University of California + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the University of California, Berkeley nor + the names of its contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 7. Some of the cuBLAS library routines were written by or + derived from code written by Davide Barbieri and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * The name of the author may not be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 8. Some of the cuBLAS library routines were derived from + code developed by the University of Tennessee and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2010 The University of Tennessee. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer listed in this license in the documentation and/or + other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 9. Some of the cuBLAS library routines were written by or + derived from code written by Jonathan Hogg and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2012, The Science and Technology Facilities Council (STFC). + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the STFC nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 10. Some of the cuBLAS library routines were written by or + derived from code written by Ahmad M. Abdelfattah, David + Keyes, and Hatem Ltaief, and are subject to the Apache + License, Version 2.0, as follows: + + -- (C) Copyright 2013 King Abdullah University of Science and Technology + Authors: + Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa) + David Keyes (david.keyes@kaust.edu.sa) + Hatem Ltaief (hatem.ltaief@kaust.edu.sa) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the King Abdullah University of Science and + Technology nor the names of its contributors may be used to endorse + or promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE + + 11. Some of the cuSPARSE library routines were written by or + derived from code written by Li-Wen Chang and are subject + to the NCSA Open Source License as follows: + + Copyright (c) 2012, University of Illinois. + + All rights reserved. + + Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal with the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimers in the documentation and/or other materials provided + with the distribution. + * Neither the names of IMPACT Group, University of Illinois, nor + the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + 12. Some of the cuRAND library routines were written by or + derived from code written by Mutsuo Saito and Makoto + Matsumoto and are subject to the following license: + + Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima + University. All rights reserved. + + Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima + University and University of Tokyo. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the Hiroshima University nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 13. Some of the cuRAND library routines were derived from + code developed by D. E. Shaw Research and are subject to + the following license: + + Copyright 2010-2011, D. E. Shaw Research. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions, and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 14. Some of the Math library routines were written by or + derived from code developed by Norbert Juffa and are + subject to the following license: + + Copyright (c) 2015-2017, Norbert Juffa + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 15. Licensee's use of the lz4 third party component is + subject to the following terms and conditions: + + Copyright (C) 2011-2013, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 16. The NPP library uses code from the Boost Math Toolkit, + and is subject to the following license: + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 17. Portions of the Nsight Eclipse Edition is subject to the + following license: + + The Eclipse Foundation makes available all content in this plug-in + ("Content"). Unless otherwise indicated below, the Content is provided + to you under the terms and conditions of the Eclipse Public License + Version 1.0 ("EPL"). A copy of the EPL is available at http:// + www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program" + will mean the Content. + + If you did not receive this Content directly from the Eclipse + Foundation, the Content is being redistributed by another party + ("Redistributor") and different terms and conditions may apply to your + use of any object code in the Content. Check the Redistributor's + license that was provided with the Content. If no such license exists, + contact the Redistributor. Unless otherwise indicated below, the terms + and conditions of the EPL still apply to any source code in the + Content and such source code may be obtained at http://www.eclipse.org. + + 18. Some of the cuBLAS library routines uses code from + OpenAI, which is subject to the following license: + + License URL + https://github.com/openai/openai-gemm/blob/master/LICENSE + + License Text + The MIT License + + Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + 19. Licensee's use of the Visual Studio Setup Configuration + Samples is subject to the following license: + + The MIT License (MIT) + Copyright (C) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + 20. Licensee's use of linmath.h header for CPU functions for + GL vector/matrix operations from lunarG is subject to the + Apache License Version 2.0. + + 21. The DX12-CUDA sample uses the d3dx12.h header, which is + subject to the MIT license . + +----------------- diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/regex/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/regex/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4088f26aa9bf28a1574429d87e8f1814f928da0d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/regex/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/regex/__pycache__/regex.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/regex/__pycache__/regex.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..443300c81f2cf4d573d50eba8fb68c844b34c93b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/regex/__pycache__/regex.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f54a66250d72235ef3e700f68d7219e83d6872d0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_imp.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_imp.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf3e22bcf050209e2604f53032addb3a4d0bb869 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_imp.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_itertools.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_itertools.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3569d201d337e3d6233fd4df9b29f7bc9a3f650 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_itertools.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_normalization.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_normalization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7e07d4f9a3eb5e626d394953c9f6e1d9349bd1c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_normalization.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_path.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_path.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d76fa7daeff1ae885bebfeef38f2f4e916c2a5f4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/_path.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/archive_util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/archive_util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a09ab72f7264109720c25492c670ff6a094647d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/archive_util.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/depends.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/depends.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5890f021ee32fe52368e1392b2ad45912a945a5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/depends.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/dist.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/dist.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..61d00806385c4ae89939c36840090f78fb01442b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/dist.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/errors.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/errors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b780c5a8984edb0b9dec9d3b50f3a2fc5190e52 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/errors.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/extension.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/extension.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff8ef99aee552808cd3ce008c0b4f868209410ff Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/extension.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/glob.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/glob.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bea797e9f64eb60add3c43b9bd413b1b3487b81e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/glob.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/launch.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/launch.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b70be397bc039b34f2cb0e49ef5dc3c70a7b225 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/launch.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/modified.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/modified.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78308e8890f02e6623725b4470e338609a485cf4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/modified.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/monkey.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/monkey.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..289ee006fc3ebad31eb2fac13cdeff13f64e1adf Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/monkey.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/msvc.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/msvc.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30be25fea83e3170f5e766ca6ab5c46a5c016bb3 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/msvc.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/namespaces.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/namespaces.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..88d8a80171db16e91bb5afbd3836a4ec5a863441 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/namespaces.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/package_index.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/package_index.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a94481467128b7cd2f569950babdbc4140a30fa Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/package_index.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/sandbox.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/sandbox.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ef8a1fbd16994ebb5fbe27bcf395020dd66af4d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/sandbox.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/unicode_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/unicode_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d36831b9cfbaca132faa2c0ddc9848319fa6d5c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/unicode_utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/version.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/version.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5359715940cb1c79735dbd133abe3c50e9261ffb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/version.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/warnings.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/warnings.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80eb1d90ec8e1b90e3feea7eba451d8d2d19fdde Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/__pycache__/warnings.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e374d5c5604d0eeed555e68b7523bfac72fd9101 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__init__.py @@ -0,0 +1,14 @@ +import importlib +import sys + +__version__, _, _ = sys.version.partition(' ') + + +try: + # Allow Debian and pkgsrc (only) to customize system + # behavior. Ref pypa/distutils#2 and pypa/distutils#16. + # This hook is deprecated and no other environments + # should use it. + importlib.import_module('_distutils_system_mod') +except ImportError: + pass diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d98098873cba09d1b5336c13fabf0553cee7a773 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_collections.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_collections.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a5abca1314e2a799a64d9140c392e51a2148dab Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_collections.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_functools.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_functools.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae90207bfbca003d3a304a96a53c7b4490db1c99 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_functools.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_itertools.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_itertools.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78e9fd308cd4c6da695af818ae23c929f6305105 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_itertools.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_log.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_log.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c58dc1e744e423c206b9d24288dd4b19b14770f3 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_log.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_macos_compat.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_macos_compat.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2abd8d3185ea31bdb2ec2cc9e11090efab2175d1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_macos_compat.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_modified.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_modified.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..185d18853bf233cdbd778cc23c302269ab698ede Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_modified.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_msvccompiler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_msvccompiler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0eaef5a70bce5eab185b61b9c67b6709a2457bf Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/_msvccompiler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/archive_util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/archive_util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..67fe6f887dd66b2812cdac575eba1c777ca75da6 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/archive_util.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/bcppcompiler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/bcppcompiler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7997fe8ea83108220f7a74f94c82d2a455895f4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/bcppcompiler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/ccompiler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/ccompiler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49e647f799548f4ac7d95047b8ad7dd3f79d63d7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/ccompiler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/cmd.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/cmd.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9bf75409d8d9f1577dc852c2a6181473871fa21a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/cmd.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/config.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a883ab5301897e13c2efc90fd5dcc1feafb0a44e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/config.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/core.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/core.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b6ffdc4bda413e559db48e9748aae5cd379b3b56 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/core.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/cygwinccompiler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/cygwinccompiler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..186560e6da7d0a38a406b6cbfe3dd202abe4a62a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/cygwinccompiler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/debug.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/debug.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ea391c4a12c4d530b4ddeda1aa0e92b384e5092 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/debug.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dep_util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dep_util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6d227102844a62a08ad3665d71df30793e14b71 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dep_util.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dir_util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dir_util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95ce3ef60fdd01033dd24c8ae74337f03d144be4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dir_util.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dist.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dist.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79e64b0bd06405551680c0abca22500c811a3967 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/dist.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/errors.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/errors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2fda44ea3f4985d41da0d2d587e940233d330dc Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/errors.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/extension.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/extension.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36992511d410387109ef75bc31f55a197ffde5c4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/extension.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/fancy_getopt.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/fancy_getopt.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f9dae555ef07ddc3e1111efa058b5741e3843c1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/fancy_getopt.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/file_util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/file_util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08355b66d05d148348f8e78dcd4d5985fdaa0d93 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/file_util.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/filelist.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/filelist.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..936bb8f831046b6a1437cdd5f26617cc3e6be226 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/filelist.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/log.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/log.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55d12d676277e6001758e423f5913f2c10bb666d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/log.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/msvc9compiler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/msvc9compiler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..118eb3c4b4f21f9782c61ec37cedb201e9040782 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/msvc9compiler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/msvccompiler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/msvccompiler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db6cbb88d207f7a4a6138d91b6e36976721a1074 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/msvccompiler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/spawn.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/spawn.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..141d638cdf67ef5ff56435e634b6874afccb970a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/spawn.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/sysconfig.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/sysconfig.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d0795bb797eee359148f611560ad425d1982d8bf Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/sysconfig.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/text_file.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/text_file.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f9ddb4b81ed7654d8ea94718078e624404eeac0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/text_file.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/unixccompiler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/unixccompiler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..499b163b54c7a30e8d12bf66c490fb63f6eae512 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/unixccompiler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e733f966b0da01d4262e9baca9a9179687bafe5 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/util.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/version.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/version.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8bea62b0d8f0cdc30cb54c2b73bcabb01d006c61 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/version.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/versionpredicate.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/versionpredicate.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4066e967baee56e0d14b31b132f64e1a2cb9eed6 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/versionpredicate.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/zosccompiler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/zosccompiler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..95cd4fab5c19c9aac700971df90a2639fdc04b0d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/__pycache__/zosccompiler.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_collections.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_collections.py new file mode 100644 index 0000000000000000000000000000000000000000..d11a83467c23d34e14ef25c80b6363b80f5e5d49 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_collections.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +import collections +import functools +import itertools +import operator +from collections.abc import Mapping +from typing import Any + + +# from jaraco.collections 3.5.1 +class DictStack(list, collections.abc.Mapping): + """ + A stack of dictionaries that behaves as a view on those dictionaries, + giving preference to the last. + + >>> stack = DictStack([dict(a=1, c=2), dict(b=2, a=2)]) + >>> stack['a'] + 2 + >>> stack['b'] + 2 + >>> stack['c'] + 2 + >>> len(stack) + 3 + >>> stack.push(dict(a=3)) + >>> stack['a'] + 3 + >>> set(stack.keys()) == set(['a', 'b', 'c']) + True + >>> set(stack.items()) == set([('a', 3), ('b', 2), ('c', 2)]) + True + >>> dict(**stack) == dict(stack) == dict(a=3, c=2, b=2) + True + >>> d = stack.pop() + >>> stack['a'] + 2 + >>> d = stack.pop() + >>> stack['a'] + 1 + >>> stack.get('b', None) + >>> 'c' in stack + True + """ + + def __iter__(self): + dicts = list.__iter__(self) + return iter(set(itertools.chain.from_iterable(c.keys() for c in dicts))) + + def __getitem__(self, key): + for scope in reversed(tuple(list.__iter__(self))): + if key in scope: + return scope[key] + raise KeyError(key) + + push = list.append + + def __contains__(self, other): + return collections.abc.Mapping.__contains__(self, other) + + def __len__(self): + return len(list(iter(self))) + + +# from jaraco.collections 5.0.1 +class RangeMap(dict): + """ + A dictionary-like object that uses the keys as bounds for a range. + Inclusion of the value for that range is determined by the + key_match_comparator, which defaults to less-than-or-equal. + A value is returned for a key if it is the first key that matches in + the sorted list of keys. + + One may supply keyword parameters to be passed to the sort function used + to sort keys (i.e. key, reverse) as sort_params. + + Create a map that maps 1-3 -> 'a', 4-6 -> 'b' + + >>> r = RangeMap({3: 'a', 6: 'b'}) # boy, that was easy + >>> r[1], r[2], r[3], r[4], r[5], r[6] + ('a', 'a', 'a', 'b', 'b', 'b') + + Even float values should work so long as the comparison operator + supports it. + + >>> r[4.5] + 'b' + + Notice that the way rangemap is defined, it must be open-ended + on one side. + + >>> r[0] + 'a' + >>> r[-1] + 'a' + + One can close the open-end of the RangeMap by using undefined_value + + >>> r = RangeMap({0: RangeMap.undefined_value, 3: 'a', 6: 'b'}) + >>> r[0] + Traceback (most recent call last): + ... + KeyError: 0 + + One can get the first or last elements in the range by using RangeMap.Item + + >>> last_item = RangeMap.Item(-1) + >>> r[last_item] + 'b' + + .last_item is a shortcut for Item(-1) + + >>> r[RangeMap.last_item] + 'b' + + Sometimes it's useful to find the bounds for a RangeMap + + >>> r.bounds() + (0, 6) + + RangeMap supports .get(key, default) + + >>> r.get(0, 'not found') + 'not found' + + >>> r.get(7, 'not found') + 'not found' + + One often wishes to define the ranges by their left-most values, + which requires use of sort params and a key_match_comparator. + + >>> r = RangeMap({1: 'a', 4: 'b'}, + ... sort_params=dict(reverse=True), + ... key_match_comparator=operator.ge) + >>> r[1], r[2], r[3], r[4], r[5], r[6] + ('a', 'a', 'a', 'b', 'b', 'b') + + That wasn't nearly as easy as before, so an alternate constructor + is provided: + + >>> r = RangeMap.left({1: 'a', 4: 'b', 7: RangeMap.undefined_value}) + >>> r[1], r[2], r[3], r[4], r[5], r[6] + ('a', 'a', 'a', 'b', 'b', 'b') + + """ + + def __init__( + self, + source, + sort_params: Mapping[str, Any] = {}, + key_match_comparator=operator.le, + ): + dict.__init__(self, source) + self.sort_params = sort_params + self.match = key_match_comparator + + @classmethod + def left(cls, source): + return cls( + source, sort_params=dict(reverse=True), key_match_comparator=operator.ge + ) + + def __getitem__(self, item): + sorted_keys = sorted(self.keys(), **self.sort_params) + if isinstance(item, RangeMap.Item): + result = self.__getitem__(sorted_keys[item]) + else: + key = self._find_first_match_(sorted_keys, item) + result = dict.__getitem__(self, key) + if result is RangeMap.undefined_value: + raise KeyError(key) + return result + + def get(self, key, default=None): + """ + Return the value for key if key is in the dictionary, else default. + If default is not given, it defaults to None, so that this method + never raises a KeyError. + """ + try: + return self[key] + except KeyError: + return default + + def _find_first_match_(self, keys, item): + is_match = functools.partial(self.match, item) + matches = list(filter(is_match, keys)) + if matches: + return matches[0] + raise KeyError(item) + + def bounds(self): + sorted_keys = sorted(self.keys(), **self.sort_params) + return (sorted_keys[RangeMap.first_item], sorted_keys[RangeMap.last_item]) + + # some special values for the RangeMap + undefined_value = type('RangeValueUndefined', (), {})() + + class Item(int): + "RangeMap Item" + + first_item = Item(0) + last_item = Item(-1) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_functools.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_functools.py new file mode 100644 index 0000000000000000000000000000000000000000..e03365eafae275b173012be2deeba2e9d566e366 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_functools.py @@ -0,0 +1,73 @@ +import collections.abc +import functools + + +# from jaraco.functools 3.5 +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper + + +# from jaraco.functools 4.0 +@functools.singledispatch +def _splat_inner(args, func): + """Splat args to func.""" + return func(*args) + + +@_splat_inner.register +def _(args: collections.abc.Mapping, func): + """Splat kargs to func as kwargs.""" + return func(**args) + + +def splat(func): + """ + Wrap func to expect its parameters to be passed positionally in a tuple. + + Has a similar effect to that of ``itertools.starmap`` over + simple ``map``. + + >>> import itertools, operator + >>> pairs = [(-1, 1), (0, 2)] + >>> _ = tuple(itertools.starmap(print, pairs)) + -1 1 + 0 2 + >>> _ = tuple(map(splat(print), pairs)) + -1 1 + 0 2 + + The approach generalizes to other iterators that don't have a "star" + equivalent, such as a "starfilter". + + >>> list(filter(splat(operator.add), pairs)) + [(0, 2)] + + Splat also accepts a mapping argument. + + >>> def is_nice(msg, code): + ... return "smile" in msg or code == 0 + >>> msgs = [ + ... dict(msg='smile!', code=20), + ... dict(msg='error :(', code=1), + ... dict(msg='unknown', code=0), + ... ] + >>> for msg in filter(splat(is_nice), msgs): + ... print(msg) + {'msg': 'smile!', 'code': 20} + {'msg': 'unknown', 'code': 0} + """ + return functools.wraps(func)(functools.partial(_splat_inner, func=func)) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_itertools.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_itertools.py new file mode 100644 index 0000000000000000000000000000000000000000..85b29511861453f815e0942da24cd77538fe77b0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_itertools.py @@ -0,0 +1,52 @@ +# from more_itertools 10.2 +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_log.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_log.py new file mode 100644 index 0000000000000000000000000000000000000000..0148f157ff3bf8bd728c82f49cd3d1cd9cb5a43e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_log.py @@ -0,0 +1,3 @@ +import logging + +log = logging.getLogger() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_macos_compat.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_macos_compat.py new file mode 100644 index 0000000000000000000000000000000000000000..76ecb96abe478313ce7d09342b2643bf4a765331 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_macos_compat.py @@ -0,0 +1,12 @@ +import importlib +import sys + + +def bypass_compiler_fixup(cmd, args): + return cmd + + +if sys.platform == 'darwin': + compiler_fixup = importlib.import_module('_osx_support').compiler_fixup +else: + compiler_fixup = bypass_compiler_fixup diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_modified.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_modified.py new file mode 100644 index 0000000000000000000000000000000000000000..6532aa10731a01d58863131aed4a99a83d6e2cb0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_modified.py @@ -0,0 +1,72 @@ +"""Timestamp comparison of files and groups of files.""" + +import functools +import os.path + +from ._functools import splat +from .compat.py39 import zip_strict +from .errors import DistutilsFileError + + +def _newer(source, target): + return not os.path.exists(target) or ( + os.path.getmtime(source) > os.path.getmtime(target) + ) + + +def newer(source, target): + """ + Is source modified more recently than target. + + Returns True if 'source' is modified more recently than + 'target' or if 'target' does not exist. + + Raises DistutilsFileError if 'source' does not exist. + """ + if not os.path.exists(source): + raise DistutilsFileError(f"file '{os.path.abspath(source)}' does not exist") + + return _newer(source, target) + + +def newer_pairwise(sources, targets, newer=newer): + """ + Filter filenames where sources are newer than targets. + + Walk two filename iterables in parallel, testing if each source is newer + than its corresponding target. Returns a pair of lists (sources, + targets) where source is newer than target, according to the semantics + of 'newer()'. + """ + newer_pairs = filter(splat(newer), zip_strict(sources, targets)) + return tuple(map(list, zip(*newer_pairs))) or ([], []) + + +def newer_group(sources, target, missing='error'): + """ + Is target out-of-date with respect to any file in sources. + + Return True if 'target' is out-of-date with respect to any file + listed in 'sources'. In other words, if 'target' exists and is newer + than every file in 'sources', return False; otherwise return True. + ``missing`` controls how to handle a missing source file: + + - error (default): allow the ``stat()`` call to fail. + - ignore: silently disregard any missing source files. + - newer: treat missing source files as "target out of date". This + mode is handy in "dry-run" mode: it will pretend to carry out + commands that wouldn't work because inputs are missing, but + that doesn't matter because dry-run won't run the commands. + """ + + def missing_as_newer(source): + return missing == 'newer' and not os.path.exists(source) + + ignored = os.path.exists if missing == 'ignore' else None + return any( + missing_as_newer(source) or _newer(source, target) + for source in filter(ignored, sources) + ) + + +newer_pairwise_group = functools.partial(newer_pairwise, newer=newer_group) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_msvccompiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_msvccompiler.py new file mode 100644 index 0000000000000000000000000000000000000000..b0322410c59936a113e1f5ef2e8c1a3d3abd49b5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_msvccompiler.py @@ -0,0 +1,568 @@ +"""distutils._msvccompiler + +Contains MSVCCompiler, an implementation of the abstract CCompiler class +for Microsoft Visual Studio 2015. + +The module is compatible with VS 2015 and later. You can find legacy support +for older versions in distutils.msvc9compiler and distutils.msvccompiler. +""" + +# Written by Perry Stoll +# hacked by Robin Becker and Thomas Heller to do a better job of +# finding DevStudio (through the registry) +# ported to VS 2005 and VS 2008 by Christian Heimes +# ported to VS 2015 by Steve Dower + +import contextlib +import os +import subprocess +import unittest.mock as mock +import warnings + +with contextlib.suppress(ImportError): + import winreg + +from itertools import count + +from ._log import log +from .ccompiler import CCompiler, gen_lib_options +from .errors import ( + CompileError, + DistutilsExecError, + DistutilsPlatformError, + LibError, + LinkError, +) +from .util import get_platform + + +def _find_vc2015(): + try: + key = winreg.OpenKeyEx( + winreg.HKEY_LOCAL_MACHINE, + r"Software\Microsoft\VisualStudio\SxS\VC7", + access=winreg.KEY_READ | winreg.KEY_WOW64_32KEY, + ) + except OSError: + log.debug("Visual C++ is not registered") + return None, None + + best_version = 0 + best_dir = None + with key: + for i in count(): + try: + v, vc_dir, vt = winreg.EnumValue(key, i) + except OSError: + break + if v and vt == winreg.REG_SZ and os.path.isdir(vc_dir): + try: + version = int(float(v)) + except (ValueError, TypeError): + continue + if version >= 14 and version > best_version: + best_version, best_dir = version, vc_dir + return best_version, best_dir + + +def _find_vc2017(): + """Returns "15, path" based on the result of invoking vswhere.exe + If no install is found, returns "None, None" + + The version is returned to avoid unnecessarily changing the function + result. It may be ignored when the path is not None. + + If vswhere.exe is not available, by definition, VS 2017 is not + installed. + """ + root = os.environ.get("ProgramFiles(x86)") or os.environ.get("ProgramFiles") + if not root: + return None, None + + try: + path = subprocess.check_output( + [ + os.path.join( + root, "Microsoft Visual Studio", "Installer", "vswhere.exe" + ), + "-latest", + "-prerelease", + "-requires", + "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", + "-property", + "installationPath", + "-products", + "*", + ], + encoding="mbcs", + errors="strict", + ).strip() + except (subprocess.CalledProcessError, OSError, UnicodeDecodeError): + return None, None + + path = os.path.join(path, "VC", "Auxiliary", "Build") + if os.path.isdir(path): + return 15, path + + return None, None + + +PLAT_SPEC_TO_RUNTIME = { + 'x86': 'x86', + 'x86_amd64': 'x64', + 'x86_arm': 'arm', + 'x86_arm64': 'arm64', +} + + +def _find_vcvarsall(plat_spec): + # bpo-38597: Removed vcruntime return value + _, best_dir = _find_vc2017() + + if not best_dir: + best_version, best_dir = _find_vc2015() + + if not best_dir: + log.debug("No suitable Visual C++ version found") + return None, None + + vcvarsall = os.path.join(best_dir, "vcvarsall.bat") + if not os.path.isfile(vcvarsall): + log.debug("%s cannot be found", vcvarsall) + return None, None + + return vcvarsall, None + + +def _get_vc_env(plat_spec): + if os.getenv("DISTUTILS_USE_SDK"): + return {key.lower(): value for key, value in os.environ.items()} + + vcvarsall, _ = _find_vcvarsall(plat_spec) + if not vcvarsall: + raise DistutilsPlatformError("Unable to find vcvarsall.bat") + + try: + out = subprocess.check_output( + f'cmd /u /c "{vcvarsall}" {plat_spec} && set', + stderr=subprocess.STDOUT, + ).decode('utf-16le', errors='replace') + except subprocess.CalledProcessError as exc: + log.error(exc.output) + raise DistutilsPlatformError(f"Error executing {exc.cmd}") + + env = { + key.lower(): value + for key, _, value in (line.partition('=') for line in out.splitlines()) + if key and value + } + + return env + + +def _find_exe(exe, paths=None): + """Return path to an MSVC executable program. + + Tries to find the program in several places: first, one of the + MSVC program search paths from the registry; next, the directories + in the PATH environment variable. If any of those work, return an + absolute path that is known to exist. If none of them work, just + return the original program name, 'exe'. + """ + if not paths: + paths = os.getenv('path').split(os.pathsep) + for p in paths: + fn = os.path.join(os.path.abspath(p), exe) + if os.path.isfile(fn): + return fn + return exe + + +# A map keyed by get_platform() return values to values accepted by +# 'vcvarsall.bat'. Always cross-compile from x86 to work with the +# lighter-weight MSVC installs that do not include native 64-bit tools. +PLAT_TO_VCVARS = { + 'win32': 'x86', + 'win-amd64': 'x86_amd64', + 'win-arm32': 'x86_arm', + 'win-arm64': 'x86_arm64', +} + + +class MSVCCompiler(CCompiler): + """Concrete class that implements an interface to Microsoft Visual C++, + as defined by the CCompiler abstract class.""" + + compiler_type = 'msvc' + + # Just set this so CCompiler's constructor doesn't barf. We currently + # don't use the 'set_executables()' bureaucracy provided by CCompiler, + # as it really isn't necessary for this sort of single-compiler class. + # Would be nice to have a consistent interface with UnixCCompiler, + # though, so it's worth thinking about. + executables = {} + + # Private class data (need to distinguish C from C++ source for compiler) + _c_extensions = ['.c'] + _cpp_extensions = ['.cc', '.cpp', '.cxx'] + _rc_extensions = ['.rc'] + _mc_extensions = ['.mc'] + + # Needed for the filename generation methods provided by the + # base class, CCompiler. + src_extensions = _c_extensions + _cpp_extensions + _rc_extensions + _mc_extensions + res_extension = '.res' + obj_extension = '.obj' + static_lib_extension = '.lib' + shared_lib_extension = '.dll' + static_lib_format = shared_lib_format = '%s%s' + exe_extension = '.exe' + + def __init__(self, verbose=False, dry_run=False, force=False): + super().__init__(verbose, dry_run, force) + # target platform (.plat_name is consistent with 'bdist') + self.plat_name = None + self.initialized = False + + @classmethod + def _configure(cls, vc_env): + """ + Set class-level include/lib dirs. + """ + cls.include_dirs = cls._parse_path(vc_env.get('include', '')) + cls.library_dirs = cls._parse_path(vc_env.get('lib', '')) + + @staticmethod + def _parse_path(val): + return [dir.rstrip(os.sep) for dir in val.split(os.pathsep) if dir] + + def initialize(self, plat_name=None): + # multi-init means we would need to check platform same each time... + assert not self.initialized, "don't init multiple times" + if plat_name is None: + plat_name = get_platform() + # sanity check for platforms to prevent obscure errors later. + if plat_name not in PLAT_TO_VCVARS: + raise DistutilsPlatformError( + f"--plat-name must be one of {tuple(PLAT_TO_VCVARS)}" + ) + + # Get the vcvarsall.bat spec for the requested platform. + plat_spec = PLAT_TO_VCVARS[plat_name] + + vc_env = _get_vc_env(plat_spec) + if not vc_env: + raise DistutilsPlatformError( + "Unable to find a compatible Visual Studio installation." + ) + self._configure(vc_env) + + self._paths = vc_env.get('path', '') + paths = self._paths.split(os.pathsep) + self.cc = _find_exe("cl.exe", paths) + self.linker = _find_exe("link.exe", paths) + self.lib = _find_exe("lib.exe", paths) + self.rc = _find_exe("rc.exe", paths) # resource compiler + self.mc = _find_exe("mc.exe", paths) # message compiler + self.mt = _find_exe("mt.exe", paths) # message compiler + + self.preprocess_options = None + # bpo-38597: Always compile with dynamic linking + # Future releases of Python 3.x will include all past + # versions of vcruntime*.dll for compatibility. + self.compile_options = ['/nologo', '/O2', '/W3', '/GL', '/DNDEBUG', '/MD'] + + self.compile_options_debug = [ + '/nologo', + '/Od', + '/MDd', + '/Zi', + '/W3', + '/D_DEBUG', + ] + + ldflags = ['/nologo', '/INCREMENTAL:NO', '/LTCG'] + + ldflags_debug = ['/nologo', '/INCREMENTAL:NO', '/LTCG', '/DEBUG:FULL'] + + self.ldflags_exe = [*ldflags, '/MANIFEST:EMBED,ID=1'] + self.ldflags_exe_debug = [*ldflags_debug, '/MANIFEST:EMBED,ID=1'] + self.ldflags_shared = [ + *ldflags, + '/DLL', + '/MANIFEST:EMBED,ID=2', + '/MANIFESTUAC:NO', + ] + self.ldflags_shared_debug = [ + *ldflags_debug, + '/DLL', + '/MANIFEST:EMBED,ID=2', + '/MANIFESTUAC:NO', + ] + self.ldflags_static = [*ldflags] + self.ldflags_static_debug = [*ldflags_debug] + + self._ldflags = { + (CCompiler.EXECUTABLE, None): self.ldflags_exe, + (CCompiler.EXECUTABLE, False): self.ldflags_exe, + (CCompiler.EXECUTABLE, True): self.ldflags_exe_debug, + (CCompiler.SHARED_OBJECT, None): self.ldflags_shared, + (CCompiler.SHARED_OBJECT, False): self.ldflags_shared, + (CCompiler.SHARED_OBJECT, True): self.ldflags_shared_debug, + (CCompiler.SHARED_LIBRARY, None): self.ldflags_static, + (CCompiler.SHARED_LIBRARY, False): self.ldflags_static, + (CCompiler.SHARED_LIBRARY, True): self.ldflags_static_debug, + } + + self.initialized = True + + # -- Worker methods ------------------------------------------------ + + @property + def out_extensions(self): + return { + **super().out_extensions, + **{ + ext: self.res_extension + for ext in self._rc_extensions + self._mc_extensions + }, + } + + def compile( # noqa: C901 + self, + sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=False, + extra_preargs=None, + extra_postargs=None, + depends=None, + ): + if not self.initialized: + self.initialize() + compile_info = self._setup_compile( + output_dir, macros, include_dirs, sources, depends, extra_postargs + ) + macros, objects, extra_postargs, pp_opts, build = compile_info + + compile_opts = extra_preargs or [] + compile_opts.append('/c') + if debug: + compile_opts.extend(self.compile_options_debug) + else: + compile_opts.extend(self.compile_options) + + add_cpp_opts = False + + for obj in objects: + try: + src, ext = build[obj] + except KeyError: + continue + if debug: + # pass the full pathname to MSVC in debug mode, + # this allows the debugger to find the source file + # without asking the user to browse for it + src = os.path.abspath(src) + + if ext in self._c_extensions: + input_opt = "/Tc" + src + elif ext in self._cpp_extensions: + input_opt = "/Tp" + src + add_cpp_opts = True + elif ext in self._rc_extensions: + # compile .RC to .RES file + input_opt = src + output_opt = "/fo" + obj + try: + self.spawn([self.rc] + pp_opts + [output_opt, input_opt]) + except DistutilsExecError as msg: + raise CompileError(msg) + continue + elif ext in self._mc_extensions: + # Compile .MC to .RC file to .RES file. + # * '-h dir' specifies the directory for the + # generated include file + # * '-r dir' specifies the target directory of the + # generated RC file and the binary message resource + # it includes + # + # For now (since there are no options to change this), + # we use the source-directory for the include file and + # the build directory for the RC file and message + # resources. This works at least for win32all. + h_dir = os.path.dirname(src) + rc_dir = os.path.dirname(obj) + try: + # first compile .MC to .RC and .H file + self.spawn([self.mc, '-h', h_dir, '-r', rc_dir, src]) + base, _ = os.path.splitext(os.path.basename(src)) + rc_file = os.path.join(rc_dir, base + '.rc') + # then compile .RC to .RES file + self.spawn([self.rc, "/fo" + obj, rc_file]) + + except DistutilsExecError as msg: + raise CompileError(msg) + continue + else: + # how to handle this file? + raise CompileError(f"Don't know how to compile {src} to {obj}") + + args = [self.cc] + compile_opts + pp_opts + if add_cpp_opts: + args.append('/EHsc') + args.extend((input_opt, "/Fo" + obj)) + args.extend(extra_postargs) + + try: + self.spawn(args) + except DistutilsExecError as msg: + raise CompileError(msg) + + return objects + + def create_static_lib( + self, objects, output_libname, output_dir=None, debug=False, target_lang=None + ): + if not self.initialized: + self.initialize() + objects, output_dir = self._fix_object_args(objects, output_dir) + output_filename = self.library_filename(output_libname, output_dir=output_dir) + + if self._need_link(objects, output_filename): + lib_args = objects + ['/OUT:' + output_filename] + if debug: + pass # XXX what goes here? + try: + log.debug('Executing "%s" %s', self.lib, ' '.join(lib_args)) + self.spawn([self.lib] + lib_args) + except DistutilsExecError as msg: + raise LibError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def link( + self, + target_desc, + objects, + output_filename, + output_dir=None, + libraries=None, + library_dirs=None, + runtime_library_dirs=None, + export_symbols=None, + debug=False, + extra_preargs=None, + extra_postargs=None, + build_temp=None, + target_lang=None, + ): + if not self.initialized: + self.initialize() + objects, output_dir = self._fix_object_args(objects, output_dir) + fixed_args = self._fix_lib_args(libraries, library_dirs, runtime_library_dirs) + libraries, library_dirs, runtime_library_dirs = fixed_args + + if runtime_library_dirs: + self.warn( + "I don't know what to do with 'runtime_library_dirs': " + + str(runtime_library_dirs) + ) + + lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries) + if output_dir is not None: + output_filename = os.path.join(output_dir, output_filename) + + if self._need_link(objects, output_filename): + ldflags = self._ldflags[target_desc, debug] + + export_opts = ["/EXPORT:" + sym for sym in (export_symbols or [])] + + ld_args = ( + ldflags + lib_opts + export_opts + objects + ['/OUT:' + output_filename] + ) + + # The MSVC linker generates .lib and .exp files, which cannot be + # suppressed by any linker switches. The .lib files may even be + # needed! Make sure they are generated in the temporary build + # directory. Since they have different names for debug and release + # builds, they can go into the same directory. + build_temp = os.path.dirname(objects[0]) + if export_symbols is not None: + (dll_name, dll_ext) = os.path.splitext( + os.path.basename(output_filename) + ) + implib_file = os.path.join(build_temp, self.library_filename(dll_name)) + ld_args.append('/IMPLIB:' + implib_file) + + if extra_preargs: + ld_args[:0] = extra_preargs + if extra_postargs: + ld_args.extend(extra_postargs) + + output_dir = os.path.dirname(os.path.abspath(output_filename)) + self.mkpath(output_dir) + try: + log.debug('Executing "%s" %s', self.linker, ' '.join(ld_args)) + self.spawn([self.linker] + ld_args) + except DistutilsExecError as msg: + raise LinkError(msg) + else: + log.debug("skipping %s (up-to-date)", output_filename) + + def spawn(self, cmd): + env = dict(os.environ, PATH=self._paths) + with self._fallback_spawn(cmd, env) as fallback: + return super().spawn(cmd, env=env) + return fallback.value + + @contextlib.contextmanager + def _fallback_spawn(self, cmd, env): + """ + Discovered in pypa/distutils#15, some tools monkeypatch the compiler, + so the 'env' kwarg causes a TypeError. Detect this condition and + restore the legacy, unsafe behavior. + """ + bag = type('Bag', (), {})() + try: + yield bag + except TypeError as exc: + if "unexpected keyword argument 'env'" not in str(exc): + raise + else: + return + warnings.warn("Fallback spawn triggered. Please update distutils monkeypatch.") + with mock.patch.dict('os.environ', env): + bag.value = super().spawn(cmd) + + # -- Miscellaneous methods ----------------------------------------- + # These are all used by the 'gen_lib_options() function, in + # ccompiler.py. + + def library_dir_option(self, dir): + return "/LIBPATH:" + dir + + def runtime_library_dir_option(self, dir): + raise DistutilsPlatformError( + "don't know how to set runtime library search path for MSVC" + ) + + def library_option(self, lib): + return self.library_filename(lib) + + def find_library_file(self, dirs, lib, debug=False): + # Prefer a debugging library if found (and requested), but deal + # with it if we don't have one. + if debug: + try_names = [lib + "_d", lib] + else: + try_names = [lib] + for dir in dirs: + for name in try_names: + libfile = os.path.join(dir, self.library_filename(name)) + if os.path.isfile(libfile): + return libfile + else: + # Oops, didn't find it in *any* of 'dirs' + return None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..612873db9029bdf24cdda8a45b7af56bee76c91b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e7c0aa12ca950f230c8092436a985b2305702642 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__init__.py @@ -0,0 +1,15 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" + +__version__ = "24.0" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014 %s" % __author__ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b538cc2638b63b07dee37e5fa2a0dcbdce9ee29 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_elffile.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_elffile.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..06598b8a2739c5b158154f085a1ec888ce4a3ee7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_elffile.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_manylinux.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_manylinux.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e51e19805599694db81a3eb445414f57aa130a6d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_manylinux.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_musllinux.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_musllinux.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3de846f25e81c831af881bd83984906837c4ba9 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_musllinux.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_parser.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_parser.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4dd4ae710cea38ee5741606467a38307264cf64 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_parser.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_structures.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_structures.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d340543c1614229466112d3ccf723b4d30edd694 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_structures.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_tokenizer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_tokenizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01e5aabe535fdd3ad5962fb8a53f8bb35800f554 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/_tokenizer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/markers.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/markers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5304a9ee93bbd1441cecec4c939599658f41c79c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/markers.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/metadata.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/metadata.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a7792fc4b9f32b5c3e04558bfcd0ee3ea46fb60 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/metadata.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/requirements.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/requirements.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c4b6078c81e92ddf7384e977636c2d175bfdb27 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/requirements.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/specifiers.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/specifiers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5290c262ae60da773273b710857f9f8a6abf7883 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/specifiers.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/tags.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/tags.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fc12c96c6a001340b1def2741ac03013d16dbc4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/tags.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6da7e0ca2be5e96b33774396373655c3249d06c7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/version.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/version.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1072a2a96bc5bf43ada69c534dc8e950a4043293 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/__pycache__/version.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_elffile.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_elffile.py new file mode 100644 index 0000000000000000000000000000000000000000..6fb19b30bb53c18f38a9ef02dd7c4478670fb962 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_manylinux.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_manylinux.py new file mode 100644 index 0000000000000000000000000000000000000000..ad62505f3ff66c3d4da07ce1f2a50d9f10bc1bdd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_manylinux.py @@ -0,0 +1,260 @@ +import collections +import contextlib +import functools +import os +import re +import sys +import warnings +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Sequence, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: + try: + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None + + +def _is_linux_armhf(executable: str) -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) + + +def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool: + if "armv7l" in archs: + return _is_linux_armhf(executable) + if "i686" in archs: + return _is_linux_i686(executable) + allowed_archs = { + "x86_64", + "aarch64", + "ppc64", + "ppc64le", + "s390x", + "loongarch64", + "riscv64", + } + return any(arch in allowed_archs for arch in archs) + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> Optional[str]: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # Should be a string like "glibc 2.17". + version_string: Optional[str] = os.confstr("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.rsplit() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> Optional[str]: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> Optional[str]: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> Tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", + RuntimeWarning, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache() +def _get_glibc_version() -> Tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate manylinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be manylinux-compatible. + + :returns: An iterator of compatible manylinux tags. + """ + if not _have_compatible_abi(sys.executable, archs): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if set(archs) & {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for arch in archs: + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(arch, glibc_version): + yield f"{tag}_{arch}" + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(arch, glibc_version): + yield f"{legacy_tag}_{arch}" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_musllinux.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_musllinux.py new file mode 100644 index 0000000000000000000000000000000000000000..86419df9d7087f3f8b6d0096f32a52c24b05e7c1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_musllinux.py @@ -0,0 +1,83 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +import functools +import re +import subprocess +import sys +from typing import Iterator, NamedTuple, Optional, Sequence + +from ._elffile import ELFFile + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> Optional[_MuslVersion]: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache() +def _get_musl_version(executable: str) -> Optional[_MuslVersion]: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for arch in archs: + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_parser.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..684df75457cb82d3683dc99ff52c5bf911f3341b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_parser.py @@ -0,0 +1,356 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not support recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: + retval = _parse_marker(tokenizer) + tokenizer.expect("END", expected="end of marker expression") + return retval + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if env_var in ("platform_python_implementation", "python_implementation"): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_structures.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_structures.py new file mode 100644 index 0000000000000000000000000000000000000000..90a6465f9682c886363eea5327dac64bf623a6ff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_structures.py @@ -0,0 +1,61 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + + +class InfinityType: + def __repr__(self) -> str: + return "Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return False + + def __le__(self, other: object) -> bool: + return False + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return True + + def __ge__(self, other: object) -> bool: + return True + + def __neg__(self: object) -> "NegativeInfinityType": + return NegativeInfinity + + +Infinity = InfinityType() + + +class NegativeInfinityType: + def __repr__(self) -> str: + return "-Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return True + + def __le__(self, other: object) -> bool: + return True + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return False + + def __ge__(self, other: object) -> bool: + return False + + def __neg__(self: object) -> InfinityType: + return Infinity + + +NegativeInfinity = NegativeInfinityType() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_tokenizer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..dd0d648d49a7c1a62d25ce5c9107aa448a8a22d1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/_tokenizer.py @@ -0,0 +1,192 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/markers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/markers.py new file mode 100644 index 0000000000000000000000000000000000000000..8b98fca7233be6dd9324cd2b6d71b6a8ac91a6cb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/markers.py @@ -0,0 +1,252 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import operator +import os +import platform +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from ._parser import ( + MarkerAtom, + MarkerList, + Op, + Value, + Variable, + parse_marker as _parse_marker, +) +from ._tokenizer import ParserSyntaxError +from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name + +__all__ = [ + "InvalidMarker", + "UndefinedComparison", + "UndefinedEnvironmentName", + "Marker", + "default_environment", +] + +Operator = Callable[[str, str], bool] + + +class InvalidMarker(ValueError): + """ + An invalid marker was found, users should refer to PEP 508. + """ + + +class UndefinedComparison(ValueError): + """ + An invalid operation was attempted on a value that doesn't support it. + """ + + +class UndefinedEnvironmentName(ValueError): + """ + A name was attempted to be used that does not exist inside of the + environment. + """ + + +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results + + +def _format_marker( + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True +) -> str: + + assert isinstance(marker, (list, tuple, str)) + + # Sometimes we have a structure like [[...]] which is a single item list + # where the single item is itself it's own list. In that case we want skip + # the rest of this function so that we don't get extraneous () on the + # outside. + if ( + isinstance(marker, list) + and len(marker) == 1 + and isinstance(marker[0], (list, tuple)) + ): + return _format_marker(marker[0]) + + if isinstance(marker, list): + inner = (_format_marker(m, first=False) for m in marker) + if first: + return " ".join(inner) + else: + return "(" + " ".join(inner) + ")" + elif isinstance(marker, tuple): + return " ".join([m.serialize() for m in marker]) + else: + return marker + + +_operators: Dict[str, Operator] = { + "in": lambda lhs, rhs: lhs in rhs, + "not in": lambda lhs, rhs: lhs not in rhs, + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, +} + + +def _eval_op(lhs: str, op: Op, rhs: str) -> bool: + try: + spec = Specifier("".join([op.serialize(), rhs])) + except InvalidSpecifier: + pass + else: + return spec.contains(lhs, prereleases=True) + + oper: Optional[Operator] = _operators.get(op.serialize()) + if oper is None: + raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.") + + return oper(lhs, rhs) + + +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) + + # other environment markers don't have such standards + return values + + +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: + groups: List[List[bool]] = [[]] + + for marker in markers: + assert isinstance(marker, (list, tuple, str)) + + if isinstance(marker, list): + groups[-1].append(_evaluate_markers(marker, environment)) + elif isinstance(marker, tuple): + lhs, op, rhs = marker + + if isinstance(lhs, Variable): + environment_key = lhs.value + lhs_value = environment[environment_key] + rhs_value = rhs.value + else: + lhs_value = lhs.value + environment_key = rhs.value + rhs_value = environment[environment_key] + + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) + groups[-1].append(_eval_op(lhs_value, op, rhs_value)) + else: + assert marker in ["and", "or"] + if marker == "or": + groups.append([]) + + return any(all(item) for item in groups) + + +def format_full_version(info: "sys._version_info") -> str: + version = "{0.major}.{0.minor}.{0.micro}".format(info) + kind = info.releaselevel + if kind != "final": + version += kind[0] + str(info.serial) + return version + + +def default_environment() -> Dict[str, str]: + iver = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name + return { + "implementation_name": implementation_name, + "implementation_version": iver, + "os_name": os.name, + "platform_machine": platform.machine(), + "platform_release": platform.release(), + "platform_system": platform.system(), + "platform_version": platform.version(), + "python_full_version": platform.python_version(), + "platform_python_implementation": platform.python_implementation(), + "python_version": ".".join(platform.python_version_tuple()[:2]), + "sys_platform": sys.platform, + } + + +class Marker: + def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. + try: + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e + + def __str__(self) -> str: + return _format_marker(self._markers) + + def __repr__(self) -> str: + return f"" + + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: + """Evaluate a marker. + + Return the boolean from evaluating the given marker against the + environment. environment is an optional argument to override all or + part of the determined environment. + + The environment is determined from the current Python process. + """ + current_environment = default_environment() + current_environment["extra"] = "" + if environment is not None: + current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if current_environment["extra"] is None: + current_environment["extra"] = "" + + return _evaluate_markers(self._markers, current_environment) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/metadata.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..fb274930799da0f8ee17566b5b587b4047282c7b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/metadata.py @@ -0,0 +1,825 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import ( + Any, + Callable, + Dict, + Generic, + List, + Optional, + Tuple, + Type, + Union, + cast, +) + +from . import requirements, specifiers, utils, version as version_module + +T = typing.TypeVar("T") +if sys.version_info[:2] >= (3, 8): # pragma: no cover + from typing import Literal, TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from typing_extensions import Literal, TypedDict + else: + try: + from typing_extensions import Literal, TypedDict + except ImportError: + + class Literal: + def __init_subclass__(*_args, **_kwargs): + pass + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +try: + ExceptionGroup +except NameError: # pragma: no cover + + class ExceptionGroup(Exception): # noqa: N818 + """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. + + If :external:exc:`ExceptionGroup` is already defined by Python itself, + that version is used instead. + """ + + message: str + exceptions: List[Exception] + + def __init__(self, message: str, exceptions: List[Exception]) -> None: + self.message = message + self.exceptions = exceptions + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" + +else: # pragma: no cover + ExceptionGroup = ExceptionGroup + + +class InvalidMetadata(ValueError): + """A metadata field contains invalid data.""" + + field: str + """The name of the field that contains invalid data.""" + + def __init__(self, field: str, message: str) -> None: + self.field = field + super().__init__(message) + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. See :class:`Metadata` whose attributes + match the keys of this dictionary. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emitting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + +_DICT_FIELDS = { + "project_urls", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} +_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) or [] + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed + + +_NOT_FOUND = object() + + +# Keep the two values in sync. +_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] +_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] + +_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) + + +class _Validator(Generic[T]): + """Validate a metadata field. + + All _process_*() methods correspond to a core metadata field. The method is + called with the field's raw value. If the raw value is valid it is returned + in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). + If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause + as appropriate). + """ + + name: str + raw_name: str + added: _MetadataVersion + + def __init__( + self, + *, + added: _MetadataVersion = "1.0", + ) -> None: + self.added = added + + def __set_name__(self, _owner: "Metadata", name: str) -> None: + self.name = name + self.raw_name = _RAW_TO_EMAIL_MAPPING[name] + + def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: + # With Python 3.8, the caching can be replaced with functools.cached_property(). + # No need to check the cache as attribute lookup will resolve into the + # instance's __dict__ before __get__ is called. + cache = instance.__dict__ + value = instance._raw.get(self.name) + + # To make the _process_* methods easier, we'll check if the value is None + # and if this field is NOT a required attribute, and if both of those + # things are true, we'll skip the the converter. This will mean that the + # converters never have to deal with the None union. + if self.name in _REQUIRED_ATTRS or value is not None: + try: + converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") + except AttributeError: + pass + else: + value = converter(value) + + cache[self.name] = value + try: + del instance._raw[self.name] # type: ignore[misc] + except KeyError: + pass + + return cast(T, value) + + def _invalid_metadata( + self, msg: str, cause: Optional[Exception] = None + ) -> InvalidMetadata: + exc = InvalidMetadata( + self.raw_name, msg.format_map({"field": repr(self.raw_name)}) + ) + exc.__cause__ = cause + return exc + + def _process_metadata_version(self, value: str) -> _MetadataVersion: + # Implicitly makes Metadata-Version required. + if value not in _VALID_METADATA_VERSIONS: + raise self._invalid_metadata(f"{value!r} is not a valid metadata version") + return cast(_MetadataVersion, value) + + def _process_name(self, value: str) -> str: + if not value: + raise self._invalid_metadata("{field} is a required field") + # Validate the name as a side-effect. + try: + utils.canonicalize_name(value, validate=True) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + else: + return value + + def _process_version(self, value: str) -> version_module.Version: + if not value: + raise self._invalid_metadata("{field} is a required field") + try: + return version_module.parse(value) + except version_module.InvalidVersion as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_summary(self, value: str) -> str: + """Check the field contains no newlines.""" + if "\n" in value: + raise self._invalid_metadata("{field} must be a single line") + return value + + def _process_description_content_type(self, value: str) -> str: + content_types = {"text/plain", "text/x-rst", "text/markdown"} + message = email.message.EmailMessage() + message["content-type"] = value + + content_type, parameters = ( + # Defaults to `text/plain` if parsing failed. + message.get_content_type().lower(), + message["content-type"].params, + ) + # Check if content-type is valid or defaulted to `text/plain` and thus was + # not parseable. + if content_type not in content_types or content_type not in value.lower(): + raise self._invalid_metadata( + f"{{field}} must be one of {list(content_types)}, not {value!r}" + ) + + charset = parameters.get("charset", "UTF-8") + if charset != "UTF-8": + raise self._invalid_metadata( + f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" + ) + + markdown_variants = {"GFM", "CommonMark"} + variant = parameters.get("variant", "GFM") # Use an acceptable default. + if content_type == "text/markdown" and variant not in markdown_variants: + raise self._invalid_metadata( + f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " + f"not {variant!r}", + ) + return value + + def _process_dynamic(self, value: List[str]) -> List[str]: + for dynamic_field in map(str.lower, value): + if dynamic_field in {"name", "version", "metadata-version"}: + raise self._invalid_metadata( + f"{value!r} is not allowed as a dynamic field" + ) + elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: + raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") + return list(map(str.lower, value)) + + def _process_provides_extra( + self, + value: List[str], + ) -> List[utils.NormalizedName]: + normalized_names = [] + try: + for name in value: + normalized_names.append(utils.canonicalize_name(name, validate=True)) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{name!r} is invalid for {{field}}", cause=exc + ) + else: + return normalized_names + + def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: + try: + return specifiers.SpecifierSet(value) + except specifiers.InvalidSpecifier as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_requires_dist( + self, + value: List[str], + ) -> List[requirements.Requirement]: + reqs = [] + try: + for req in value: + reqs.append(requirements.Requirement(req)) + except requirements.InvalidRequirement as exc: + raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) + else: + return reqs + + +class Metadata: + """Representation of distribution metadata. + + Compared to :class:`RawMetadata`, this class provides objects representing + metadata fields instead of only using built-in types. Any invalid metadata + will cause :exc:`InvalidMetadata` to be raised (with a + :py:attr:`~BaseException.__cause__` attribute as appropriate). + """ + + _raw: RawMetadata + + @classmethod + def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": + """Create an instance from :class:`RawMetadata`. + + If *validate* is true, all metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + ins = cls() + ins._raw = data.copy() # Mutations occur due to caching enriched values. + + if validate: + exceptions: List[Exception] = [] + try: + metadata_version = ins.metadata_version + metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) + except InvalidMetadata as metadata_version_exc: + exceptions.append(metadata_version_exc) + metadata_version = None + + # Make sure to check for the fields that are present, the required + # fields (so their absence can be reported). + fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS + # Remove fields that have already been checked. + fields_to_check -= {"metadata_version"} + + for key in fields_to_check: + try: + if metadata_version: + # Can't use getattr() as that triggers descriptor protocol which + # will fail due to no value for the instance argument. + try: + field_metadata_version = cls.__dict__[key].added + except KeyError: + exc = InvalidMetadata(key, f"unrecognized field: {key!r}") + exceptions.append(exc) + continue + field_age = _VALID_METADATA_VERSIONS.index( + field_metadata_version + ) + if field_age > metadata_age: + field = _RAW_TO_EMAIL_MAPPING[key] + exc = InvalidMetadata( + field, + "{field} introduced in metadata version " + "{field_metadata_version}, not {metadata_version}", + ) + exceptions.append(exc) + continue + getattr(ins, key) + except InvalidMetadata as exc: + exceptions.append(exc) + + if exceptions: + raise ExceptionGroup("invalid metadata", exceptions) + + return ins + + @classmethod + def from_email( + cls, data: Union[bytes, str], *, validate: bool = True + ) -> "Metadata": + """Parse metadata from email headers. + + If *validate* is true, the metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + raw, unparsed = parse_email(data) + + if validate: + exceptions: list[Exception] = [] + for unparsed_key in unparsed: + if unparsed_key in _EMAIL_TO_RAW_MAPPING: + message = f"{unparsed_key!r} has invalid data" + else: + message = f"unrecognized field: {unparsed_key!r}" + exceptions.append(InvalidMetadata(unparsed_key, message)) + + if exceptions: + raise ExceptionGroup("unparsed", exceptions) + + try: + return cls.from_raw(raw, validate=validate) + except ExceptionGroup as exc_group: + raise ExceptionGroup( + "invalid or unparsed metadata", exc_group.exceptions + ) from None + + metadata_version: _Validator[_MetadataVersion] = _Validator() + """:external:ref:`core-metadata-metadata-version` + (required; validated to be a valid metadata version)""" + name: _Validator[str] = _Validator() + """:external:ref:`core-metadata-name` + (required; validated using :func:`~packaging.utils.canonicalize_name` and its + *validate* parameter)""" + version: _Validator[version_module.Version] = _Validator() + """:external:ref:`core-metadata-version` (required)""" + dynamic: _Validator[Optional[List[str]]] = _Validator( + added="2.2", + ) + """:external:ref:`core-metadata-dynamic` + (validated against core metadata field names and lowercased)""" + platforms: _Validator[Optional[List[str]]] = _Validator() + """:external:ref:`core-metadata-platform`""" + supported_platforms: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """:external:ref:`core-metadata-supported-platform`""" + summary: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" + description: _Validator[Optional[str]] = _Validator() # TODO 2.1: can be in body + """:external:ref:`core-metadata-description`""" + description_content_type: _Validator[Optional[str]] = _Validator(added="2.1") + """:external:ref:`core-metadata-description-content-type` (validated)""" + keywords: _Validator[Optional[List[str]]] = _Validator() + """:external:ref:`core-metadata-keywords`""" + home_page: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-home-page`""" + download_url: _Validator[Optional[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-download-url`""" + author: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-author`""" + author_email: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-author-email`""" + maintainer: _Validator[Optional[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer`""" + maintainer_email: _Validator[Optional[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer-email`""" + license: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-license`""" + classifiers: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """:external:ref:`core-metadata-classifier`""" + requires_dist: _Validator[Optional[List[requirements.Requirement]]] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-dist`""" + requires_python: _Validator[Optional[specifiers.SpecifierSet]] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-python`""" + # Because `Requires-External` allows for non-PEP 440 version specifiers, we + # don't do any processing on the values. + requires_external: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-external`""" + project_urls: _Validator[Optional[Dict[str, str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-project-url`""" + # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation + # regardless of metadata version. + provides_extra: _Validator[Optional[List[utils.NormalizedName]]] = _Validator( + added="2.1", + ) + """:external:ref:`core-metadata-provides-extra`""" + provides_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-provides-dist`""" + obsoletes_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-obsoletes-dist`""" + requires: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Requires`` (deprecated)""" + provides: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Provides`` (deprecated)""" + obsoletes: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Obsoletes`` (deprecated)""" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/requirements.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/requirements.py new file mode 100644 index 0000000000000000000000000000000000000000..bdc43a7e98d87dba0c2069bfb4554f71d228cad4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/requirements.py @@ -0,0 +1,90 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +from typing import Any, Iterator, Optional, Set + +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet +from .utils import canonicalize_name + + +class InvalidRequirement(ValueError): + """ + An invalid requirement was found, users should refer to PEP 508. + """ + + +class Requirement: + """Parse a requirement. + + Parse a given requirement string into its parts, such as name, specifier, + URL, and extras. Raises InvalidRequirement on a badly-formed requirement + string. + """ + + # TODO: Can we test whether something is contained within a requirement? + # If so how do we do that? Do we need to test against the _name_ of + # the thing as well as the version? What about the markers? + # TODO: Can we normalize the name and extra name? + + def __init__(self, requirement_string: str) -> None: + try: + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e + + self.name: str = parsed.name + self.url: Optional[str] = parsed.url or None + self.extras: Set[str] = set(parsed.extras or []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) + + def _iter_parts(self, name: str) -> Iterator[str]: + yield name + + if self.extras: + formatted_extras = ",".join(sorted(self.extras)) + yield f"[{formatted_extras}]" + + if self.specifier: + yield str(self.specifier) + + if self.url: + yield f"@ {self.url}" + if self.marker: + yield " " + + if self.marker: + yield f"; {self.marker}" + + def __str__(self) -> str: + return "".join(self._iter_parts(self.name)) + + def __repr__(self) -> str: + return f"" + + def __hash__(self) -> int: + return hash( + ( + self.__class__.__name__, + *self._iter_parts(canonicalize_name(self.name)), + ) + ) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + canonicalize_name(self.name) == canonicalize_name(other.name) + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/specifiers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/specifiers.py new file mode 100644 index 0000000000000000000000000000000000000000..2d015bab5958fd9767cf5c9e449f2fa33292c962 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/specifiers.py @@ -0,0 +1,1017 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +""" +.. testsetup:: + + from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from packaging.version import Version +""" + +import abc +import itertools +import re +from typing import Callable, Iterable, Iterator, List, Optional, Tuple, TypeVar, Union + +from .utils import canonicalize_version +from .version import Version + +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] + + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version + + +class InvalidSpecifier(ValueError): + """ + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' + """ + + +class BaseSpecifier(metaclass=abc.ABCMeta): + @abc.abstractmethod + def __str__(self) -> str: + """ + Returns the str representation of this Specifier-like object. This + should be representative of the Specifier itself. + """ + + @abc.abstractmethod + def __hash__(self) -> int: + """ + Returns a hash value for this Specifier-like object. + """ + + @abc.abstractmethod + def __eq__(self, other: object) -> bool: + """ + Returns a boolean representing whether or not the two Specifier-like + objects are equal. + + :param other: The other object to check against. + """ + + @property + @abc.abstractmethod + def prereleases(self) -> Optional[bool]: + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. + """ + + @prereleases.setter + def prereleases(self, value: bool) -> None: + """Setter for :attr:`prereleases`. + + :param value: The value to set. + """ + + @abc.abstractmethod + def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: + """ + Determines if the given item is contained within this specifier. + """ + + @abc.abstractmethod + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """ + Takes an iterable of items and filters them so that only items which + are contained within this specifier are allowed in it. + """ + + +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. + + .. tip:: + + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ + + _operator_regex_str = r""" + (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" + (?P + (?: + # The identity operators allow for an escape hatch that will + # do an exact string match of the version you wish to install. + # This will not be parsed by PEP 440 and we cannot determine + # any semantic meaning from it. This operator is discouraged + # but included entirely as an escape hatch. + (?<====) # Only match for the identity operator + \s* + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. + ) + | + (?: + # The (non)equality operators allow for wild card and local + # versions to be specified so we have to define these two + # operators separately to enable that. + (?<===|!=) # Only match for equals and not equals + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)* # release + + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. + (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local + )? + ) + | + (?: + # The compatible operator requires at least two digits in the + # release segment. + (?<=~=) # Only match for the compatible operator + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + ) + | + (?: + # All other operators only allow a sub set of what the + # (non)equality operators do. Specifically they do not allow + # local versions to be specified nor do they allow the prefix + # matching wild cards. + (?=": "greater_than_equal", + "<": "less_than", + ">": "greater_than", + "===": "arbitrary", + } + + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: + + # Compatible releases have an equivalent combination of >= and ==. That + # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to + # implement this in terms of the other specifiers instead of + # implementing it ourselves. The only thing we need to do is construct + # the other specifiers. + + # We want everything but the last item in the version, but we want to + # ignore suffix segments. + prefix = _version_join( + list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] + ) + + # Add the prefix notation to the end of our string + prefix += ".*" + + return self._get_operator(">=")(prospective, spec) and self._get_operator("==")( + prospective, prefix + ) + + def _compare_equal(self, prospective: Version, spec: str) -> bool: + + # We need special logic to handle prefix matching + if spec.endswith(".*"): + # In the case of prefix matching we want to ignore local segment. + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) + # Split the spec out by bangs and dots, and pretend that there is + # an implicit dot in between a release segment and a pre-release segment. + split_spec = _version_split(normalized_spec) + + # Split the prospective version out by bangs and dots, and pretend + # that there is an implicit dot in between a release segment and + # a pre-release segment. + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) + + # Shorten the prospective version to be the same length as the spec + # so that we can determine if the specifier is a prefix of the + # prospective version or not. + shortened_prospective = padded_prospective[: len(split_spec)] + + return shortened_prospective == split_spec + else: + # Convert our spec string into a Version + spec_version = Version(spec) + + # If the specifier does not have a local segment, then we want to + # act as if the prospective version also does not have a local + # segment. + if not spec_version.local: + prospective = Version(prospective.public) + + return prospective == spec_version + + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: + return not self._compare_equal(prospective, spec) + + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) <= Version(spec) + + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) >= Version(spec) + + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is less than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective < spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a pre-release version, that we do not accept pre-release + # versions for the version mentioned in the specifier (e.g. <3.1 should + # not match 3.1.dev0, but should match 3.0.dev0). + if not spec.is_prerelease and prospective.is_prerelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # less than the spec version *and* it's not a pre-release of the same + # version in the spec. + return True + + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is greater than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective > spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a post-release version, that we do not accept + # post-release versions for the version mentioned in the specifier + # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0). + if not spec.is_postrelease and prospective.is_postrelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # Ensure that we do not allow a local version of the version mentioned + # in the specifier, which is technically greater than, to match. + if prospective.local is not None: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # greater than the spec version *and* it's not a pre-release of the + # same version in the spec. + return True + + def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: + return str(prospective).lower() == str(spec).lower() + + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) + + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ + + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version + + +_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") + + +def _version_split(version: str) -> List[str]: + """Split version into components. + + The split components are intended for version comparison. The logic does + not attempt to retain the original version string, so joining the + components back with :func:`_version_join` may not produce the original + version string. + """ + result: List[str] = [] + + epoch, _, rest = version.rpartition("!") + result.append(epoch or "0") + + for item in rest.split("."): + match = _prefix_regex.search(item) + if match: + result.extend(match.groups()) + else: + result.append(item) + return result + + +def _version_join(components: List[str]) -> str: + """Join split version components into a version string. + + This function assumes the input came from :func:`_version_split`, where the + first component must be the epoch (either empty or numeric), and all other + components numeric. + """ + epoch, *rest = components + return f"{epoch}!{'.'.join(rest)}" + + +def _is_not_suffix(segment: str) -> bool: + return not any( + segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") + ) + + +def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]: + left_split, right_split = [], [] + + # Get the release segment of our versions + left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left))) + right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right))) + + # Get the rest of our versions + left_split.append(left[len(left_split[0]) :]) + right_split.append(right[len(right_split[0]) :]) + + # Insert our padding + left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0]))) + right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0]))) + + return ( + list(itertools.chain.from_iterable(left_split)), + list(itertools.chain.from_iterable(right_split)), + ) + + +class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + + def __init__( + self, specifiers: str = "", prereleases: Optional[bool] = None + ) -> None: + """Initialize a SpecifierSet instance. + + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ + + # Split on `,` to break each individual specifier into it's own item, and + # strip each item to remove leading/trailing whitespace. + split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] + + # Make each individual specifier a Specifier and save in a frozen set for later. + self._specs = frozenset(map(Specifier, split_specifiers)) + + # Store our prereleases value so we can use it later to determine if + # we accept prereleases or not. + self._prereleases = prereleases + + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"" + + def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ + return ",".join(sorted(str(s) for s in self._specs)) + + def __hash__(self) -> int: + return hash(self._specs) + + def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ + if isinstance(other, str): + other = SpecifierSet(other) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + specifier = SpecifierSet() + specifier._specs = frozenset(self._specs | other._specs) + + if self._prereleases is None and other._prereleases is not None: + specifier._prereleases = other._prereleases + elif self._prereleases is not None and other._prereleases is None: + specifier._prereleases = self._prereleases + elif self._prereleases == other._prereleases: + specifier._prereleases = self._prereleases + else: + raise ValueError( + "Cannot combine SpecifierSets with True and False prerelease " + "overrides." + ) + + return specifier + + def __eq__(self, other: object) -> bool: + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): + other = SpecifierSet(str(other)) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + return self._specs == other._specs + + def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" + return len(self._specs) + + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. + + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ + return iter(self._specs) + + def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ + return self.contains(item) + + def contains( + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, + ) -> bool: + """Return whether or not the item is contained in this SpecifierSet. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) + + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # We can determine if we're going to allow pre-releases by looking to + # see if any of the underlying items supports them. If none of them do + # and this item is a pre-release then we do not allow it and we can + # short circuit that here. + # Note: This means that 1.0.dev1 would not be contained in something + # like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0 + if not prereleases and item.is_prerelease: + return False + + if installed and item.is_prerelease: + item = Version(item.base_version) + + # We simply dispatch to the underlying specs here to make sure that the + # given version is contained within all of them. + # Note: This use of all() here means that an empty set of specifiers + # will always return True, this is an explicit design decision. + return all(s.contains(item, prereleases=prereleases) for s in self._specs) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # If we have any specifiers, then we want to wrap our iterable in the + # filter method for each one, this will act as a logical AND amongst + # each specifier. + if self._specs: + for spec in self._specs: + iterable = spec.filter(iterable, prereleases=bool(prereleases)) + return iter(iterable) + # If we do not have any specifiers, then we need to have a rough filter + # which will filter out any pre-releases, unless there are no final + # releases. + else: + filtered: List[UnparsedVersionVar] = [] + found_prereleases: List[UnparsedVersionVar] = [] + + for item in iterable: + parsed_version = _coerce_version(item) + + # Store any item which is a pre-release for later unless we've + # already found a final version or we are accepting prereleases + if parsed_version.is_prerelease and not prereleases: + if not filtered: + found_prereleases.append(item) + else: + filtered.append(item) + + # If we've found no items except for pre-releases, then we'll go + # ahead and use the pre-releases + if not filtered and found_prereleases and prereleases is None: + return iter(found_prereleases) + + return iter(filtered) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/tags.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/tags.py new file mode 100644 index 0000000000000000000000000000000000000000..89f1926137dd2d2a6bd63616bf5b9f722fc8d584 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/tags.py @@ -0,0 +1,571 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import logging +import platform +import re +import struct +import subprocess +import sys +import sysconfig +from importlib.machinery import EXTENSION_SUFFIXES +from typing import ( + Dict, + FrozenSet, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from . import _manylinux, _musllinux + +logger = logging.getLogger(__name__) + +PythonVersion = Sequence[int] +MacVersion = Tuple[int, int] + +INTERPRETER_SHORT_NAMES: Dict[str, str] = { + "python": "py", # Generic. + "cpython": "cp", + "pypy": "pp", + "ironpython": "ip", + "jython": "jy", +} + + +_32_BIT_INTERPRETER = struct.calcsize("P") == 4 + + +class Tag: + """ + A representation of the tag triple for a wheel. + + Instances are considered immutable and thus are hashable. Equality checking + is also supported. + """ + + __slots__ = ["_interpreter", "_abi", "_platform", "_hash"] + + def __init__(self, interpreter: str, abi: str, platform: str) -> None: + self._interpreter = interpreter.lower() + self._abi = abi.lower() + self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) + + @property + def interpreter(self) -> str: + return self._interpreter + + @property + def abi(self) -> str: + return self._abi + + @property + def platform(self) -> str: + return self._platform + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Tag): + return NotImplemented + + return ( + (self._hash == other._hash) # Short-circuit ASAP for perf reasons. + and (self._platform == other._platform) + and (self._abi == other._abi) + and (self._interpreter == other._interpreter) + ) + + def __hash__(self) -> int: + return self._hash + + def __str__(self) -> str: + return f"{self._interpreter}-{self._abi}-{self._platform}" + + def __repr__(self) -> str: + return f"<{self} @ {id(self)}>" + + +def parse_tag(tag: str) -> FrozenSet[Tag]: + """ + Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. + + Returning a set is required due to the possibility that the tag is a + compressed tag set. + """ + tags = set() + interpreters, abis, platforms = tag.split("-") + for interpreter in interpreters.split("."): + for abi in abis.split("."): + for platform_ in platforms.split("."): + tags.add(Tag(interpreter, abi, platform_)) + return frozenset(tags) + + +def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: + value: Union[int, str, None] = sysconfig.get_config_var(name) + if value is None and warn: + logger.debug( + "Config variable '%s' is unset, Python ABI tag may be incorrect", name + ) + return value + + +def _normalize_string(string: str) -> str: + return string.replace(".", "_").replace("-", "_").replace(" ", "_") + + +def _is_threaded_cpython(abis: List[str]) -> bool: + """ + Determine if the ABI corresponds to a threaded (`--disable-gil`) build. + + The threaded builds are indicated by a "t" in the abiflags. + """ + if len(abis) == 0: + return False + # expect e.g., cp313 + m = re.match(r"cp\d+(.*)", abis[0]) + if not m: + return False + abiflags = m.group(1) + return "t" in abiflags + + +def _abi3_applies(python_version: PythonVersion, threading: bool) -> bool: + """ + Determine if the Python version supports abi3. + + PEP 384 was first implemented in Python 3.2. The threaded (`--disable-gil`) + builds do not support abi3. + """ + return len(python_version) > 1 and tuple(python_version) >= (3, 2) and not threading + + +def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: + py_version = tuple(py_version) # To allow for version comparison. + abis = [] + version = _version_nodot(py_version[:2]) + threading = debug = pymalloc = ucs4 = "" + with_debug = _get_config_var("Py_DEBUG", warn) + has_refcount = hasattr(sys, "gettotalrefcount") + # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled + # extension modules is the best option. + # https://github.com/pypa/pip/issues/3383#issuecomment-173267692 + has_ext = "_d.pyd" in EXTENSION_SUFFIXES + if with_debug or (with_debug is None and (has_refcount or has_ext)): + debug = "d" + if py_version >= (3, 13) and _get_config_var("Py_GIL_DISABLED", warn): + threading = "t" + if py_version < (3, 8): + with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) + if with_pymalloc or with_pymalloc is None: + pymalloc = "m" + if py_version < (3, 3): + unicode_size = _get_config_var("Py_UNICODE_SIZE", warn) + if unicode_size == 4 or ( + unicode_size is None and sys.maxunicode == 0x10FFFF + ): + ucs4 = "u" + elif debug: + # Debug builds can also load "normal" extension modules. + # We can also assume no UCS-4 or pymalloc requirement. + abis.append(f"cp{version}{threading}") + abis.insert(0, f"cp{version}{threading}{debug}{pymalloc}{ucs4}") + return abis + + +def cpython_tags( + python_version: Optional[PythonVersion] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a CPython interpreter. + + The tags consist of: + - cp-- + - cp-abi3- + - cp-none- + - cp-abi3- # Older Python versions down to 3.2. + + If python_version only specifies a major version then user-provided ABIs and + the 'none' ABItag will be used. + + If 'abi3' or 'none' are specified in 'abis' then they will be yielded at + their normal position and not at the beginning. + """ + if not python_version: + python_version = sys.version_info[:2] + + interpreter = f"cp{_version_nodot(python_version[:2])}" + + if abis is None: + if len(python_version) > 1: + abis = _cpython_abis(python_version, warn) + else: + abis = [] + abis = list(abis) + # 'abi3' and 'none' are explicitly handled later. + for explicit_abi in ("abi3", "none"): + try: + abis.remove(explicit_abi) + except ValueError: + pass + + platforms = list(platforms or platform_tags()) + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + threading = _is_threaded_cpython(abis) + use_abi3 = _abi3_applies(python_version, threading) + if use_abi3: + yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) + yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) + + if use_abi3: + for minor_version in range(python_version[1] - 1, 1, -1): + for platform_ in platforms: + interpreter = "cp{version}".format( + version=_version_nodot((python_version[0], minor_version)) + ) + yield Tag(interpreter, "abi3", platform_) + + +def _generic_abi() -> List[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] + + +def generic_tags( + interpreter: Optional[str] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a generic interpreter. + + The tags consist of: + - -- + + The "none" ABI will be added if it was not explicitly provided. + """ + if not interpreter: + interp_name = interpreter_name() + interp_version = interpreter_version(warn=warn) + interpreter = "".join([interp_name, interp_version]) + if abis is None: + abis = _generic_abi() + else: + abis = list(abis) + platforms = list(platforms or platform_tags()) + if "none" not in abis: + abis.append("none") + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + +def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: + """ + Yields Python versions in descending order. + + After the latest version, the major-only version will be yielded, and then + all previous versions of that major version. + """ + if len(py_version) > 1: + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" + if len(py_version) > 1: + for minor in range(py_version[1] - 1, -1, -1): + yield f"py{_version_nodot((py_version[0], minor))}" + + +def compatible_tags( + python_version: Optional[PythonVersion] = None, + interpreter: Optional[str] = None, + platforms: Optional[Iterable[str]] = None, +) -> Iterator[Tag]: + """ + Yields the sequence of tags that are compatible with a specific version of Python. + + The tags consist of: + - py*-none- + - -none-any # ... if `interpreter` is provided. + - py*-none-any + """ + if not python_version: + python_version = sys.version_info[:2] + platforms = list(platforms or platform_tags()) + for version in _py_interpreter_range(python_version): + for platform_ in platforms: + yield Tag(version, "none", platform_) + if interpreter: + yield Tag(interpreter, "none", "any") + for version in _py_interpreter_range(python_version): + yield Tag(version, "none", "any") + + +def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str: + if not is_32bit: + return arch + + if arch.startswith("ppc"): + return "ppc" + + return "i386" + + +def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]: + formats = [cpu_arch] + if cpu_arch == "x86_64": + if version < (10, 4): + return [] + formats.extend(["intel", "fat64", "fat32"]) + + elif cpu_arch == "i386": + if version < (10, 4): + return [] + formats.extend(["intel", "fat32", "fat"]) + + elif cpu_arch == "ppc64": + # TODO: Need to care about 32-bit PPC for ppc64 through 10.2? + if version > (10, 5) or version < (10, 4): + return [] + formats.append("fat64") + + elif cpu_arch == "ppc": + if version > (10, 6): + return [] + formats.extend(["fat32", "fat"]) + + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + + return formats + + +def mac_platforms( + version: Optional[MacVersion] = None, arch: Optional[str] = None +) -> Iterator[str]: + """ + Yields the platform tags for a macOS system. + + The `version` parameter is a two-item tuple specifying the macOS version to + generate platform tags for. The `arch` parameter is the CPU architecture to + generate platform tags for. Both parameters default to the appropriate value + for the current system. + """ + version_str, _, cpu_arch = platform.mac_ver() + if version is None: + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + text=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + else: + version = version + if arch is None: + arch = _mac_arch(cpu_arch) + else: + arch = arch + + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + for minor_version in range(version[1], -1, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=10, minor=minor_version, binary_format=binary_format + ) + + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + for major_version in range(version[0], 10, -1): + compat_version = major_version, 0 + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=major_version, minor=0, binary_format=binary_format + ) + + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + else: + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_format = "universal2" + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + + +def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: + linux = _normalize_string(sysconfig.get_platform()) + if not linux.startswith("linux_"): + # we should never be here, just yield the sysconfig one and return + yield linux + return + if is_32bit: + if linux == "linux_x86_64": + linux = "linux_i686" + elif linux == "linux_aarch64": + linux = "linux_armv8l" + _, arch = linux.split("_", 1) + archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch]) + yield from _manylinux.platform_tags(archs) + yield from _musllinux.platform_tags(archs) + for arch in archs: + yield f"linux_{arch}" + + +def _generic_platforms() -> Iterator[str]: + yield _normalize_string(sysconfig.get_platform()) + + +def platform_tags() -> Iterator[str]: + """ + Provides the platform tags for this installation. + """ + if platform.system() == "Darwin": + return mac_platforms() + elif platform.system() == "Linux": + return _linux_platforms() + else: + return _generic_platforms() + + +def interpreter_name() -> str: + """ + Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. + """ + name = sys.implementation.name + return INTERPRETER_SHORT_NAMES.get(name) or name + + +def interpreter_version(*, warn: bool = False) -> str: + """ + Returns the version of the running interpreter. + """ + version = _get_config_var("py_version_nodot", warn=warn) + if version: + version = str(version) + else: + version = _version_nodot(sys.version_info[:2]) + return version + + +def _version_nodot(version: PythonVersion) -> str: + return "".join(map(str, version)) + + +def sys_tags(*, warn: bool = False) -> Iterator[Tag]: + """ + Returns the sequence of tag triples for the running interpreter. + + The order of the sequence corresponds to priority order for the + interpreter, from most to least important. + """ + + interp_name = interpreter_name() + if interp_name == "cp": + yield from cpython_tags(warn=warn) + else: + yield from generic_tags() + + if interp_name == "pp": + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) + else: + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c2c2f75aa806282d322c76c2117c0f0fdfb09d25 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/utils.py @@ -0,0 +1,172 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import re +from typing import FrozenSet, NewType, Tuple, Union, cast + +from .tags import Tag, parse_tag +from .version import InvalidVersion, Version + +BuildTag = Union[Tuple[()], Tuple[int, str]] +NormalizedName = NewType("NormalizedName", str) + + +class InvalidName(ValueError): + """ + An invalid distribution name; users should refer to the packaging user guide. + """ + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ + + +# Core metadata spec for `Name` +_validate_regex = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE +) +_canonicalize_regex = re.compile(r"[-_.]+") +_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") + + +def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName: + if validate and not _validate_regex.match(name): + raise InvalidName(f"name is invalid: {name!r}") + # This is taken from PEP 503. + value = _canonicalize_regex.sub("-", name).lower() + return cast(NormalizedName, value) + + +def is_normalized_name(name: str) -> bool: + return _normalized_regex.match(name) is not None + + +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: + """ + This is very similar to Version.__str__, but has one subtle difference + with the way it handles the release segment. + """ + if isinstance(version, str): + try: + parsed = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + else: + parsed = version + + parts = [] + + # Epoch + if parsed.epoch != 0: + parts.append(f"{parsed.epoch}!") + + # Release segment + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) + + # Pre-release + if parsed.pre is not None: + parts.append("".join(str(x) for x in parsed.pre)) + + # Post-release + if parsed.post is not None: + parts.append(f".post{parsed.post}") + + # Development release + if parsed.dev is not None: + parts.append(f".dev{parsed.dev}") + + # Local version segment + if parsed.local is not None: + parts.append(f"+{parsed.local}") + + return "".join(parts) + + +def parse_wheel_filename( + filename: str, +) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]: + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + f"Invalid wheel filename (extension must be '.whl'): {filename}" + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + f"Invalid wheel filename (wrong number of parts): {filename}" + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name. + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename(f"Invalid project name: {filename}") + name = canonicalize_name(name_part) + + try: + version = Version(parts[1]) + except InvalidVersion as e: + raise InvalidWheelFilename( + f"Invalid wheel filename (invalid version): {filename}" + ) from e + + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + f"Invalid build number: {build_part} in '{filename}'" + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: + if filename.endswith(".tar.gz"): + file_stem = filename[: -len(".tar.gz")] + elif filename.endswith(".zip"): + file_stem = filename[: -len(".zip")] + else: + raise InvalidSdistFilename( + f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):" + f" {filename}" + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = file_stem.rpartition("-") + if not sep: + raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") + + name = canonicalize_name(name_part) + + try: + version = Version(version_part) + except InvalidVersion as e: + raise InvalidSdistFilename( + f"Invalid sdist filename (invalid version): {filename}" + ) from e + + return (name, version) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/version.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/version.py new file mode 100644 index 0000000000000000000000000000000000000000..5faab9bd0dcf28847960162b2b4f13a8a556ef20 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/_vendor/packaging/version.py @@ -0,0 +1,563 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +""" +.. testsetup:: + + from packaging.version import parse, Version +""" + +import itertools +import re +from typing import Any, Callable, NamedTuple, Optional, SupportsInt, Tuple, Union + +from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType + +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] + +LocalType = Tuple[Union[int, str], ...] + +CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]] +CmpLocalType = Union[ + NegativeInfinityType, + Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...], +] +CmpKey = Tuple[ + int, + Tuple[int, ...], + CmpPrePostDevType, + CmpPrePostDevType, + CmpPrePostDevType, + CmpLocalType, +] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] + + +class _Version(NamedTuple): + epoch: int + release: Tuple[int, ...] + dev: Optional[Tuple[str, int]] + pre: Optional[Tuple[str, int]] + post: Optional[Tuple[str, int]] + local: Optional[LocalType] + + +def parse(version: str) -> "Version": + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. + """ + return Version(version) + + +class InvalidVersion(ValueError): + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' + """ + + +class _BaseVersion: + _key: Tuple[Any, ...] + + def __hash__(self) -> int: + return hash(self._key) + + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key < other._key + + def __le__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key + + +# Deliberately not anchored to the start and end of the string, to make it +# easier for 3rd party code to reuse +_VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?Palpha|a|beta|b|preview|pre|c|rc)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+VERSION_PATTERN = _VERSION_PATTERN
+"""
+A string containing the regular expression used to match a valid version.
+
+The pattern is not anchored at either end, and is intended for embedding in larger
+expressions (for example, matching a version number as part of a file name). The
+regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
+flags set.
+
+:meta hide-value:
+"""
+
+
+class Version(_BaseVersion):
+    """This class abstracts handling of a project's versions.
+
+    A :class:`Version` instance is comparison aware and can be compared and
+    sorted using the standard Python interfaces.
+
+    >>> v1 = Version("1.0a5")
+    >>> v2 = Version("1.0")
+    >>> v1
+    
+    >>> v2
+    
+    >>> v1 < v2
+    True
+    >>> v1 == v2
+    False
+    >>> v1 > v2
+    False
+    >>> v1 >= v2
+    False
+    >>> v1 <= v2
+    True
+    """
+
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
+
+    def __init__(self, version: str) -> None:
+        """Initialize a Version object.
+
+        :param version:
+            The string representation of a version which will be parsed and normalized
+            before use.
+        :raises InvalidVersion:
+            If the ``version`` does not conform to PEP 440 in any way then this
+            exception will be raised.
+        """
+
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion(f"Invalid version: '{version}'")
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self) -> str:
+        """A representation of the Version that shows all internal state.
+
+        >>> Version('1.0.0')
+        
+        """
+        return f""
+
+    def __str__(self) -> str:
+        """A string representation of the version that can be rounded-tripped.
+
+        >>> str(Version("1.0a5"))
+        '1.0a5'
+        """
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join(str(x) for x in self.pre))
+
+        # Post-release
+        if self.post is not None:
+            parts.append(f".post{self.post}")
+
+        # Development release
+        if self.dev is not None:
+            parts.append(f".dev{self.dev}")
+
+        # Local version segment
+        if self.local is not None:
+            parts.append(f"+{self.local}")
+
+        return "".join(parts)
+
+    @property
+    def epoch(self) -> int:
+        """The epoch of the version.
+
+        >>> Version("2.0.0").epoch
+        0
+        >>> Version("1!2.0.0").epoch
+        1
+        """
+        return self._version.epoch
+
+    @property
+    def release(self) -> Tuple[int, ...]:
+        """The components of the "release" segment of the version.
+
+        >>> Version("1.2.3").release
+        (1, 2, 3)
+        >>> Version("2.0.0").release
+        (2, 0, 0)
+        >>> Version("1!2.0.0.post0").release
+        (2, 0, 0)
+
+        Includes trailing zeroes but not the epoch or any pre-release / development /
+        post-release suffixes.
+        """
+        return self._version.release
+
+    @property
+    def pre(self) -> Optional[Tuple[str, int]]:
+        """The pre-release segment of the version.
+
+        >>> print(Version("1.2.3").pre)
+        None
+        >>> Version("1.2.3a1").pre
+        ('a', 1)
+        >>> Version("1.2.3b1").pre
+        ('b', 1)
+        >>> Version("1.2.3rc1").pre
+        ('rc', 1)
+        """
+        return self._version.pre
+
+    @property
+    def post(self) -> Optional[int]:
+        """The post-release number of the version.
+
+        >>> print(Version("1.2.3").post)
+        None
+        >>> Version("1.2.3.post1").post
+        1
+        """
+        return self._version.post[1] if self._version.post else None
+
+    @property
+    def dev(self) -> Optional[int]:
+        """The development number of the version.
+
+        >>> print(Version("1.2.3").dev)
+        None
+        >>> Version("1.2.3.dev1").dev
+        1
+        """
+        return self._version.dev[1] if self._version.dev else None
+
+    @property
+    def local(self) -> Optional[str]:
+        """The local version segment of the version.
+
+        >>> print(Version("1.2.3").local)
+        None
+        >>> Version("1.2.3+abc").local
+        'abc'
+        """
+        if self._version.local:
+            return ".".join(str(x) for x in self._version.local)
+        else:
+            return None
+
+    @property
+    def public(self) -> str:
+        """The public portion of the version.
+
+        >>> Version("1.2.3").public
+        '1.2.3'
+        >>> Version("1.2.3+abc").public
+        '1.2.3'
+        >>> Version("1.2.3+abc.dev1").public
+        '1.2.3'
+        """
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self) -> str:
+        """The "base version" of the version.
+
+        >>> Version("1.2.3").base_version
+        '1.2.3'
+        >>> Version("1.2.3+abc").base_version
+        '1.2.3'
+        >>> Version("1!1.2.3+abc.dev1").base_version
+        '1!1.2.3'
+
+        The "base version" is the public version of the project without any pre or post
+        release markers.
+        """
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        return "".join(parts)
+
+    @property
+    def is_prerelease(self) -> bool:
+        """Whether this version is a pre-release.
+
+        >>> Version("1.2.3").is_prerelease
+        False
+        >>> Version("1.2.3a1").is_prerelease
+        True
+        >>> Version("1.2.3b1").is_prerelease
+        True
+        >>> Version("1.2.3rc1").is_prerelease
+        True
+        >>> Version("1.2.3dev1").is_prerelease
+        True
+        """
+        return self.dev is not None or self.pre is not None
+
+    @property
+    def is_postrelease(self) -> bool:
+        """Whether this version is a post-release.
+
+        >>> Version("1.2.3").is_postrelease
+        False
+        >>> Version("1.2.3.post1").is_postrelease
+        True
+        """
+        return self.post is not None
+
+    @property
+    def is_devrelease(self) -> bool:
+        """Whether this version is a development release.
+
+        >>> Version("1.2.3").is_devrelease
+        False
+        >>> Version("1.2.3.dev1").is_devrelease
+        True
+        """
+        return self.dev is not None
+
+    @property
+    def major(self) -> int:
+        """The first item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").major
+        1
+        """
+        return self.release[0] if len(self.release) >= 1 else 0
+
+    @property
+    def minor(self) -> int:
+        """The second item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").minor
+        2
+        >>> Version("1").minor
+        0
+        """
+        return self.release[1] if len(self.release) >= 2 else 0
+
+    @property
+    def micro(self) -> int:
+        """The third item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").micro
+        3
+        >>> Version("1").micro
+        0
+        """
+        return self.release[2] if len(self.release) >= 3 else 0
+
+
+def _parse_letter_version(
+    letter: Optional[str], number: Union[str, bytes, SupportsInt, None]
+) -> Optional[Tuple[str, int]]:
+
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+    return None
+
+
+_local_version_separators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local: Optional[str]) -> Optional[LocalType]:
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+
+
+def _cmpkey(
+    epoch: int,
+    release: Tuple[int, ...],
+    pre: Optional[Tuple[str, int]],
+    post: Optional[Tuple[str, int]],
+    dev: Optional[Tuple[str, int]],
+    local: Optional[LocalType],
+) -> CmpKey:
+
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre: CmpPrePostDevType = NegativeInfinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post: CmpPrePostDevType = NegativeInfinity
+
+    else:
+        _post = post
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev: CmpPrePostDevType = Infinity
+
+    else:
+        _dev = dev
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local: CmpLocalType = NegativeInfinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
+        )
+
+    return epoch, _release, _pre, _post, _dev, _local
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/archive_util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/archive_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..07cd97f4d05925f5909e5ce5722cda9c7b5439f4
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/archive_util.py
@@ -0,0 +1,283 @@
+"""distutils.archive_util
+
+Utility functions for creating archive files (tarballs, zip files,
+that sort of thing)."""
+
+import os
+import sys
+from warnings import warn
+
+try:
+    import zipfile
+except ImportError:
+    zipfile = None
+
+
+from ._log import log
+from .dir_util import mkpath
+from .errors import DistutilsExecError
+from .spawn import spawn
+
+try:
+    from pwd import getpwnam
+except ImportError:
+    getpwnam = None
+
+try:
+    from grp import getgrnam
+except ImportError:
+    getgrnam = None
+
+
+def _get_gid(name):
+    """Returns a gid, given a group name."""
+    if getgrnam is None or name is None:
+        return None
+    try:
+        result = getgrnam(name)
+    except KeyError:
+        result = None
+    if result is not None:
+        return result[2]
+    return None
+
+
+def _get_uid(name):
+    """Returns an uid, given a user name."""
+    if getpwnam is None or name is None:
+        return None
+    try:
+        result = getpwnam(name)
+    except KeyError:
+        result = None
+    if result is not None:
+        return result[2]
+    return None
+
+
+def make_tarball(
+    base_name,
+    base_dir,
+    compress="gzip",
+    verbose=False,
+    dry_run=False,
+    owner=None,
+    group=None,
+):
+    """Create a (possibly compressed) tar file from all the files under
+    'base_dir'.
+
+    'compress' must be "gzip" (the default), "bzip2", "xz", "compress", or
+    None.  ("compress" will be deprecated in Python 3.2)
+
+    'owner' and 'group' can be used to define an owner and a group for the
+    archive that is being built. If not provided, the current owner and group
+    will be used.
+
+    The output tar file will be named 'base_dir' +  ".tar", possibly plus
+    the appropriate compression extension (".gz", ".bz2", ".xz" or ".Z").
+
+    Returns the output filename.
+    """
+    tar_compression = {
+        'gzip': 'gz',
+        'bzip2': 'bz2',
+        'xz': 'xz',
+        None: '',
+        'compress': '',
+    }
+    compress_ext = {'gzip': '.gz', 'bzip2': '.bz2', 'xz': '.xz', 'compress': '.Z'}
+
+    # flags for compression program, each element of list will be an argument
+    if compress is not None and compress not in compress_ext.keys():
+        raise ValueError(
+            "bad value for 'compress': must be None, 'gzip', 'bzip2', "
+            "'xz' or 'compress'"
+        )
+
+    archive_name = base_name + '.tar'
+    if compress != 'compress':
+        archive_name += compress_ext.get(compress, '')
+
+    mkpath(os.path.dirname(archive_name), dry_run=dry_run)
+
+    # creating the tarball
+    import tarfile  # late import so Python build itself doesn't break
+
+    log.info('Creating tar archive')
+
+    uid = _get_uid(owner)
+    gid = _get_gid(group)
+
+    def _set_uid_gid(tarinfo):
+        if gid is not None:
+            tarinfo.gid = gid
+            tarinfo.gname = group
+        if uid is not None:
+            tarinfo.uid = uid
+            tarinfo.uname = owner
+        return tarinfo
+
+    if not dry_run:
+        tar = tarfile.open(archive_name, f'w|{tar_compression[compress]}')
+        try:
+            tar.add(base_dir, filter=_set_uid_gid)
+        finally:
+            tar.close()
+
+    # compression using `compress`
+    if compress == 'compress':
+        warn("'compress' is deprecated.", DeprecationWarning)
+        # the option varies depending on the platform
+        compressed_name = archive_name + compress_ext[compress]
+        if sys.platform == 'win32':
+            cmd = [compress, archive_name, compressed_name]
+        else:
+            cmd = [compress, '-f', archive_name]
+        spawn(cmd, dry_run=dry_run)
+        return compressed_name
+
+    return archive_name
+
+
+def make_zipfile(base_name, base_dir, verbose=False, dry_run=False):  # noqa: C901
+    """Create a zip file from all the files under 'base_dir'.
+
+    The output zip file will be named 'base_name' + ".zip".  Uses either the
+    "zipfile" Python module (if available) or the InfoZIP "zip" utility
+    (if installed and found on the default search path).  If neither tool is
+    available, raises DistutilsExecError.  Returns the name of the output zip
+    file.
+    """
+    zip_filename = base_name + ".zip"
+    mkpath(os.path.dirname(zip_filename), dry_run=dry_run)
+
+    # If zipfile module is not available, try spawning an external
+    # 'zip' command.
+    if zipfile is None:
+        if verbose:
+            zipoptions = "-r"
+        else:
+            zipoptions = "-rq"
+
+        try:
+            spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
+        except DistutilsExecError:
+            # XXX really should distinguish between "couldn't find
+            # external 'zip' command" and "zip failed".
+            raise DistutilsExecError(
+                f"unable to create zip file '{zip_filename}': "
+                "could neither import the 'zipfile' module nor "
+                "find a standalone zip utility"
+            )
+
+    else:
+        log.info("creating '%s' and adding '%s' to it", zip_filename, base_dir)
+
+        if not dry_run:
+            try:
+                zip = zipfile.ZipFile(
+                    zip_filename, "w", compression=zipfile.ZIP_DEFLATED
+                )
+            except RuntimeError:
+                zip = zipfile.ZipFile(zip_filename, "w", compression=zipfile.ZIP_STORED)
+
+            with zip:
+                if base_dir != os.curdir:
+                    path = os.path.normpath(os.path.join(base_dir, ''))
+                    zip.write(path, path)
+                    log.info("adding '%s'", path)
+                for dirpath, dirnames, filenames in os.walk(base_dir):
+                    for name in dirnames:
+                        path = os.path.normpath(os.path.join(dirpath, name, ''))
+                        zip.write(path, path)
+                        log.info("adding '%s'", path)
+                    for name in filenames:
+                        path = os.path.normpath(os.path.join(dirpath, name))
+                        if os.path.isfile(path):
+                            zip.write(path, path)
+                            log.info("adding '%s'", path)
+
+    return zip_filename
+
+
+ARCHIVE_FORMATS = {
+    'gztar': (make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
+    'bztar': (make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
+    'xztar': (make_tarball, [('compress', 'xz')], "xz'ed tar-file"),
+    'ztar': (make_tarball, [('compress', 'compress')], "compressed tar file"),
+    'tar': (make_tarball, [('compress', None)], "uncompressed tar file"),
+    'zip': (make_zipfile, [], "ZIP file"),
+}
+
+
+def check_archive_formats(formats):
+    """Returns the first format from the 'format' list that is unknown.
+
+    If all formats are known, returns None
+    """
+    for format in formats:
+        if format not in ARCHIVE_FORMATS:
+            return format
+    return None
+
+
+def make_archive(
+    base_name,
+    format,
+    root_dir=None,
+    base_dir=None,
+    verbose=False,
+    dry_run=False,
+    owner=None,
+    group=None,
+):
+    """Create an archive file (eg. zip or tar).
+
+    'base_name' is the name of the file to create, minus any format-specific
+    extension; 'format' is the archive format: one of "zip", "tar", "gztar",
+    "bztar", "xztar", or "ztar".
+
+    'root_dir' is a directory that will be the root directory of the
+    archive; ie. we typically chdir into 'root_dir' before creating the
+    archive.  'base_dir' is the directory where we start archiving from;
+    ie. 'base_dir' will be the common prefix of all files and
+    directories in the archive.  'root_dir' and 'base_dir' both default
+    to the current directory.  Returns the name of the archive file.
+
+    'owner' and 'group' are used when creating a tar archive. By default,
+    uses the current owner and group.
+    """
+    save_cwd = os.getcwd()
+    if root_dir is not None:
+        log.debug("changing into '%s'", root_dir)
+        base_name = os.path.abspath(base_name)
+        if not dry_run:
+            os.chdir(root_dir)
+
+    if base_dir is None:
+        base_dir = os.curdir
+
+    kwargs = {'dry_run': dry_run}
+
+    try:
+        format_info = ARCHIVE_FORMATS[format]
+    except KeyError:
+        raise ValueError(f"unknown archive format '{format}'")
+
+    func = format_info[0]
+    for arg, val in format_info[1]:
+        kwargs[arg] = val
+
+    if format != 'zip':
+        kwargs['owner'] = owner
+        kwargs['group'] = group
+
+    try:
+        filename = func(base_name, base_dir, **kwargs)
+    finally:
+        if root_dir is not None:
+            log.debug("changing back to '%s'", save_cwd)
+            os.chdir(save_cwd)
+
+    return filename
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/bcppcompiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/bcppcompiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..e47dca5d099d86df6faed008c588d9dce0b9be25
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/bcppcompiler.py
@@ -0,0 +1,397 @@
+"""distutils.bcppcompiler
+
+Contains BorlandCCompiler, an implementation of the abstract CCompiler class
+for the Borland C++ compiler.
+"""
+
+# This implementation by Lyle Johnson, based on the original msvccompiler.py
+# module and using the directions originally published by Gordon Williams.
+
+# XXX looks like there's a LOT of overlap between these two classes:
+# someone should sit down and factor out the common code as
+# WindowsCCompiler!  --GPW
+
+import os
+import warnings
+
+from ._log import log
+from ._modified import newer
+from .ccompiler import CCompiler, gen_preprocess_options
+from .errors import (
+    CompileError,
+    DistutilsExecError,
+    LibError,
+    LinkError,
+    UnknownFileError,
+)
+from .file_util import write_file
+
+warnings.warn(
+    "bcppcompiler is deprecated and slated to be removed "
+    "in the future. Please discontinue use or file an issue "
+    "with pypa/distutils describing your use case.",
+    DeprecationWarning,
+)
+
+
+class BCPPCompiler(CCompiler):
+    """Concrete class that implements an interface to the Borland C/C++
+    compiler, as defined by the CCompiler abstract class.
+    """
+
+    compiler_type = 'bcpp'
+
+    # Just set this so CCompiler's constructor doesn't barf.  We currently
+    # don't use the 'set_executables()' bureaucracy provided by CCompiler,
+    # as it really isn't necessary for this sort of single-compiler class.
+    # Would be nice to have a consistent interface with UnixCCompiler,
+    # though, so it's worth thinking about.
+    executables = {}
+
+    # Private class data (need to distinguish C from C++ source for compiler)
+    _c_extensions = ['.c']
+    _cpp_extensions = ['.cc', '.cpp', '.cxx']
+
+    # Needed for the filename generation methods provided by the
+    # base class, CCompiler.
+    src_extensions = _c_extensions + _cpp_extensions
+    obj_extension = '.obj'
+    static_lib_extension = '.lib'
+    shared_lib_extension = '.dll'
+    static_lib_format = shared_lib_format = '%s%s'
+    exe_extension = '.exe'
+
+    def __init__(self, verbose=False, dry_run=False, force=False):
+        super().__init__(verbose, dry_run, force)
+
+        # These executables are assumed to all be in the path.
+        # Borland doesn't seem to use any special registry settings to
+        # indicate their installation locations.
+
+        self.cc = "bcc32.exe"
+        self.linker = "ilink32.exe"
+        self.lib = "tlib.exe"
+
+        self.preprocess_options = None
+        self.compile_options = ['/tWM', '/O2', '/q', '/g0']
+        self.compile_options_debug = ['/tWM', '/Od', '/q', '/g0']
+
+        self.ldflags_shared = ['/Tpd', '/Gn', '/q', '/x']
+        self.ldflags_shared_debug = ['/Tpd', '/Gn', '/q', '/x']
+        self.ldflags_static = []
+        self.ldflags_exe = ['/Gn', '/q', '/x']
+        self.ldflags_exe_debug = ['/Gn', '/q', '/x', '/r']
+
+    # -- Worker methods ------------------------------------------------
+
+    def compile(
+        self,
+        sources,
+        output_dir=None,
+        macros=None,
+        include_dirs=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        depends=None,
+    ):
+        macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
+            output_dir, macros, include_dirs, sources, depends, extra_postargs
+        )
+        compile_opts = extra_preargs or []
+        compile_opts.append('-c')
+        if debug:
+            compile_opts.extend(self.compile_options_debug)
+        else:
+            compile_opts.extend(self.compile_options)
+
+        for obj in objects:
+            try:
+                src, ext = build[obj]
+            except KeyError:
+                continue
+            # XXX why do the normpath here?
+            src = os.path.normpath(src)
+            obj = os.path.normpath(obj)
+            # XXX _setup_compile() did a mkpath() too but before the normpath.
+            # Is it possible to skip the normpath?
+            self.mkpath(os.path.dirname(obj))
+
+            if ext == '.res':
+                # This is already a binary file -- skip it.
+                continue  # the 'for' loop
+            if ext == '.rc':
+                # This needs to be compiled to a .res file -- do it now.
+                try:
+                    self.spawn(["brcc32", "-fo", obj, src])
+                except DistutilsExecError as msg:
+                    raise CompileError(msg)
+                continue  # the 'for' loop
+
+            # The next two are both for the real compiler.
+            if ext in self._c_extensions:
+                input_opt = ""
+            elif ext in self._cpp_extensions:
+                input_opt = "-P"
+            else:
+                # Unknown file type -- no extra options.  The compiler
+                # will probably fail, but let it just in case this is a
+                # file the compiler recognizes even if we don't.
+                input_opt = ""
+
+            output_opt = "-o" + obj
+
+            # Compiler command line syntax is: "bcc32 [options] file(s)".
+            # Note that the source file names must appear at the end of
+            # the command line.
+            try:
+                self.spawn(
+                    [self.cc]
+                    + compile_opts
+                    + pp_opts
+                    + [input_opt, output_opt]
+                    + extra_postargs
+                    + [src]
+                )
+            except DistutilsExecError as msg:
+                raise CompileError(msg)
+
+        return objects
+
+    # compile ()
+
+    def create_static_lib(
+        self, objects, output_libname, output_dir=None, debug=False, target_lang=None
+    ):
+        (objects, output_dir) = self._fix_object_args(objects, output_dir)
+        output_filename = self.library_filename(output_libname, output_dir=output_dir)
+
+        if self._need_link(objects, output_filename):
+            lib_args = [output_filename, '/u'] + objects
+            if debug:
+                pass  # XXX what goes here?
+            try:
+                self.spawn([self.lib] + lib_args)
+            except DistutilsExecError as msg:
+                raise LibError(msg)
+        else:
+            log.debug("skipping %s (up-to-date)", output_filename)
+
+    # create_static_lib ()
+
+    def link(  # noqa: C901
+        self,
+        target_desc,
+        objects,
+        output_filename,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        # XXX this ignores 'build_temp'!  should follow the lead of
+        # msvccompiler.py
+
+        (objects, output_dir) = self._fix_object_args(objects, output_dir)
+        (libraries, library_dirs, runtime_library_dirs) = self._fix_lib_args(
+            libraries, library_dirs, runtime_library_dirs
+        )
+
+        if runtime_library_dirs:
+            log.warning(
+                "I don't know what to do with 'runtime_library_dirs': %s",
+                str(runtime_library_dirs),
+            )
+
+        if output_dir is not None:
+            output_filename = os.path.join(output_dir, output_filename)
+
+        if self._need_link(objects, output_filename):
+            # Figure out linker args based on type of target.
+            if target_desc == CCompiler.EXECUTABLE:
+                startup_obj = 'c0w32'
+                if debug:
+                    ld_args = self.ldflags_exe_debug[:]
+                else:
+                    ld_args = self.ldflags_exe[:]
+            else:
+                startup_obj = 'c0d32'
+                if debug:
+                    ld_args = self.ldflags_shared_debug[:]
+                else:
+                    ld_args = self.ldflags_shared[:]
+
+            # Create a temporary exports file for use by the linker
+            if export_symbols is None:
+                def_file = ''
+            else:
+                head, tail = os.path.split(output_filename)
+                modname, ext = os.path.splitext(tail)
+                temp_dir = os.path.dirname(objects[0])  # preserve tree structure
+                def_file = os.path.join(temp_dir, f'{modname}.def')
+                contents = ['EXPORTS']
+                for sym in export_symbols or []:
+                    contents.append(f'  {sym}=_{sym}')
+                self.execute(write_file, (def_file, contents), f"writing {def_file}")
+
+            # Borland C++ has problems with '/' in paths
+            objects2 = map(os.path.normpath, objects)
+            # split objects in .obj and .res files
+            # Borland C++ needs them at different positions in the command line
+            objects = [startup_obj]
+            resources = []
+            for file in objects2:
+                (base, ext) = os.path.splitext(os.path.normcase(file))
+                if ext == '.res':
+                    resources.append(file)
+                else:
+                    objects.append(file)
+
+            for ell in library_dirs:
+                ld_args.append(f"/L{os.path.normpath(ell)}")
+            ld_args.append("/L.")  # we sometimes use relative paths
+
+            # list of object files
+            ld_args.extend(objects)
+
+            # XXX the command-line syntax for Borland C++ is a bit wonky;
+            # certain filenames are jammed together in one big string, but
+            # comma-delimited.  This doesn't mesh too well with the
+            # Unix-centric attitude (with a DOS/Windows quoting hack) of
+            # 'spawn()', so constructing the argument list is a bit
+            # awkward.  Note that doing the obvious thing and jamming all
+            # the filenames and commas into one argument would be wrong,
+            # because 'spawn()' would quote any filenames with spaces in
+            # them.  Arghghh!.  Apparently it works fine as coded...
+
+            # name of dll/exe file
+            ld_args.extend([',', output_filename])
+            # no map file and start libraries
+            ld_args.append(',,')
+
+            for lib in libraries:
+                # see if we find it and if there is a bcpp specific lib
+                # (xxx_bcpp.lib)
+                libfile = self.find_library_file(library_dirs, lib, debug)
+                if libfile is None:
+                    ld_args.append(lib)
+                    # probably a BCPP internal library -- don't warn
+                else:
+                    # full name which prefers bcpp_xxx.lib over xxx.lib
+                    ld_args.append(libfile)
+
+            # some default libraries
+            ld_args.extend(('import32', 'cw32mt'))
+
+            # def file for export symbols
+            ld_args.extend([',', def_file])
+            # add resource files
+            ld_args.append(',')
+            ld_args.extend(resources)
+
+            if extra_preargs:
+                ld_args[:0] = extra_preargs
+            if extra_postargs:
+                ld_args.extend(extra_postargs)
+
+            self.mkpath(os.path.dirname(output_filename))
+            try:
+                self.spawn([self.linker] + ld_args)
+            except DistutilsExecError as msg:
+                raise LinkError(msg)
+
+        else:
+            log.debug("skipping %s (up-to-date)", output_filename)
+
+    # link ()
+
+    # -- Miscellaneous methods -----------------------------------------
+
+    def find_library_file(self, dirs, lib, debug=False):
+        # List of effective library names to try, in order of preference:
+        # xxx_bcpp.lib is better than xxx.lib
+        # and xxx_d.lib is better than xxx.lib if debug is set
+        #
+        # The "_bcpp" suffix is to handle a Python installation for people
+        # with multiple compilers (primarily Distutils hackers, I suspect
+        # ;-).  The idea is they'd have one static library for each
+        # compiler they care about, since (almost?) every Windows compiler
+        # seems to have a different format for static libraries.
+        if debug:
+            dlib = lib + "_d"
+            try_names = (dlib + "_bcpp", lib + "_bcpp", dlib, lib)
+        else:
+            try_names = (lib + "_bcpp", lib)
+
+        for dir in dirs:
+            for name in try_names:
+                libfile = os.path.join(dir, self.library_filename(name))
+                if os.path.exists(libfile):
+                    return libfile
+        else:
+            # Oops, didn't find it in *any* of 'dirs'
+            return None
+
+    # overwrite the one from CCompiler to support rc and res-files
+    def object_filenames(self, source_filenames, strip_dir=False, output_dir=''):
+        if output_dir is None:
+            output_dir = ''
+        obj_names = []
+        for src_name in source_filenames:
+            # use normcase to make sure '.rc' is really '.rc' and not '.RC'
+            (base, ext) = os.path.splitext(os.path.normcase(src_name))
+            if ext not in (self.src_extensions + ['.rc', '.res']):
+                raise UnknownFileError(f"unknown file type '{ext}' (from '{src_name}')")
+            if strip_dir:
+                base = os.path.basename(base)
+            if ext == '.res':
+                # these can go unchanged
+                obj_names.append(os.path.join(output_dir, base + ext))
+            elif ext == '.rc':
+                # these need to be compiled to .res-files
+                obj_names.append(os.path.join(output_dir, base + '.res'))
+            else:
+                obj_names.append(os.path.join(output_dir, base + self.obj_extension))
+        return obj_names
+
+    # object_filenames ()
+
+    def preprocess(
+        self,
+        source,
+        output_file=None,
+        macros=None,
+        include_dirs=None,
+        extra_preargs=None,
+        extra_postargs=None,
+    ):
+        (_, macros, include_dirs) = self._fix_compile_args(None, macros, include_dirs)
+        pp_opts = gen_preprocess_options(macros, include_dirs)
+        pp_args = ['cpp32.exe'] + pp_opts
+        if output_file is not None:
+            pp_args.append('-o' + output_file)
+        if extra_preargs:
+            pp_args[:0] = extra_preargs
+        if extra_postargs:
+            pp_args.extend(extra_postargs)
+        pp_args.append(source)
+
+        # We need to preprocess: either we're being forced to, or the
+        # source file is newer than the target (or the target doesn't
+        # exist).
+        if self.force or output_file is None or newer(source, output_file):
+            if output_file:
+                self.mkpath(os.path.dirname(output_file))
+            try:
+                self.spawn(pp_args)
+            except DistutilsExecError as msg:
+                print(msg)
+                raise CompileError(msg)
+
+    # preprocess()
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/ccompiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/ccompiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d5297b9444ad8557ac5f669d78c7f5d44083e01
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/ccompiler.py
@@ -0,0 +1,1259 @@
+"""distutils.ccompiler
+
+Contains CCompiler, an abstract base class that defines the interface
+for the Distutils compiler abstraction model."""
+
+import os
+import re
+import sys
+import types
+import warnings
+
+from ._itertools import always_iterable
+from ._log import log
+from ._modified import newer_group
+from .dir_util import mkpath
+from .errors import (
+    CompileError,
+    DistutilsModuleError,
+    DistutilsPlatformError,
+    LinkError,
+    UnknownFileError,
+)
+from .file_util import move_file
+from .spawn import spawn
+from .util import execute, split_quoted, is_mingw
+
+
+class CCompiler:
+    """Abstract base class to define the interface that must be implemented
+    by real compiler classes.  Also has some utility methods used by
+    several compiler classes.
+
+    The basic idea behind a compiler abstraction class is that each
+    instance can be used for all the compile/link steps in building a
+    single project.  Thus, attributes common to all of those compile and
+    link steps -- include directories, macros to define, libraries to link
+    against, etc. -- are attributes of the compiler instance.  To allow for
+    variability in how individual files are treated, most of those
+    attributes may be varied on a per-compilation or per-link basis.
+    """
+
+    # 'compiler_type' is a class attribute that identifies this class.  It
+    # keeps code that wants to know what kind of compiler it's dealing with
+    # from having to import all possible compiler classes just to do an
+    # 'isinstance'.  In concrete CCompiler subclasses, 'compiler_type'
+    # should really, really be one of the keys of the 'compiler_class'
+    # dictionary (see below -- used by the 'new_compiler()' factory
+    # function) -- authors of new compiler interface classes are
+    # responsible for updating 'compiler_class'!
+    compiler_type = None
+
+    # XXX things not handled by this compiler abstraction model:
+    #   * client can't provide additional options for a compiler,
+    #     e.g. warning, optimization, debugging flags.  Perhaps this
+    #     should be the domain of concrete compiler abstraction classes
+    #     (UnixCCompiler, MSVCCompiler, etc.) -- or perhaps the base
+    #     class should have methods for the common ones.
+    #   * can't completely override the include or library searchg
+    #     path, ie. no "cc -I -Idir1 -Idir2" or "cc -L -Ldir1 -Ldir2".
+    #     I'm not sure how widely supported this is even by Unix
+    #     compilers, much less on other platforms.  And I'm even less
+    #     sure how useful it is; maybe for cross-compiling, but
+    #     support for that is a ways off.  (And anyways, cross
+    #     compilers probably have a dedicated binary with the
+    #     right paths compiled in.  I hope.)
+    #   * can't do really freaky things with the library list/library
+    #     dirs, e.g. "-Ldir1 -lfoo -Ldir2 -lfoo" to link against
+    #     different versions of libfoo.a in different locations.  I
+    #     think this is useless without the ability to null out the
+    #     library search path anyways.
+
+    # Subclasses that rely on the standard filename generation methods
+    # implemented below should override these; see the comment near
+    # those methods ('object_filenames()' et. al.) for details:
+    src_extensions = None  # list of strings
+    obj_extension = None  # string
+    static_lib_extension = None
+    shared_lib_extension = None  # string
+    static_lib_format = None  # format string
+    shared_lib_format = None  # prob. same as static_lib_format
+    exe_extension = None  # string
+
+    # Default language settings. language_map is used to detect a source
+    # file or Extension target language, checking source filenames.
+    # language_order is used to detect the language precedence, when deciding
+    # what language to use when mixing source types. For example, if some
+    # extension has two files with ".c" extension, and one with ".cpp", it
+    # is still linked as c++.
+    language_map = {
+        ".c": "c",
+        ".cc": "c++",
+        ".cpp": "c++",
+        ".cxx": "c++",
+        ".m": "objc",
+    }
+    language_order = ["c++", "objc", "c"]
+
+    include_dirs = []
+    """
+    include dirs specific to this compiler class
+    """
+
+    library_dirs = []
+    """
+    library dirs specific to this compiler class
+    """
+
+    def __init__(self, verbose=False, dry_run=False, force=False):
+        self.dry_run = dry_run
+        self.force = force
+        self.verbose = verbose
+
+        # 'output_dir': a common output directory for object, library,
+        # shared object, and shared library files
+        self.output_dir = None
+
+        # 'macros': a list of macro definitions (or undefinitions).  A
+        # macro definition is a 2-tuple (name, value), where the value is
+        # either a string or None (no explicit value).  A macro
+        # undefinition is a 1-tuple (name,).
+        self.macros = []
+
+        # 'include_dirs': a list of directories to search for include files
+        self.include_dirs = []
+
+        # 'libraries': a list of libraries to include in any link
+        # (library names, not filenames: eg. "foo" not "libfoo.a")
+        self.libraries = []
+
+        # 'library_dirs': a list of directories to search for libraries
+        self.library_dirs = []
+
+        # 'runtime_library_dirs': a list of directories to search for
+        # shared libraries/objects at runtime
+        self.runtime_library_dirs = []
+
+        # 'objects': a list of object files (or similar, such as explicitly
+        # named library files) to include on any link
+        self.objects = []
+
+        for key in self.executables.keys():
+            self.set_executable(key, self.executables[key])
+
+    def set_executables(self, **kwargs):
+        """Define the executables (and options for them) that will be run
+        to perform the various stages of compilation.  The exact set of
+        executables that may be specified here depends on the compiler
+        class (via the 'executables' class attribute), but most will have:
+          compiler      the C/C++ compiler
+          linker_so     linker used to create shared objects and libraries
+          linker_exe    linker used to create binary executables
+          archiver      static library creator
+
+        On platforms with a command-line (Unix, DOS/Windows), each of these
+        is a string that will be split into executable name and (optional)
+        list of arguments.  (Splitting the string is done similarly to how
+        Unix shells operate: words are delimited by spaces, but quotes and
+        backslashes can override this.  See
+        'distutils.util.split_quoted()'.)
+        """
+
+        # Note that some CCompiler implementation classes will define class
+        # attributes 'cpp', 'cc', etc. with hard-coded executable names;
+        # this is appropriate when a compiler class is for exactly one
+        # compiler/OS combination (eg. MSVCCompiler).  Other compiler
+        # classes (UnixCCompiler, in particular) are driven by information
+        # discovered at run-time, since there are many different ways to do
+        # basically the same things with Unix C compilers.
+
+        for key in kwargs:
+            if key not in self.executables:
+                raise ValueError(
+                    f"unknown executable '{key}' for class {self.__class__.__name__}"
+                )
+            self.set_executable(key, kwargs[key])
+
+    def set_executable(self, key, value):
+        if isinstance(value, str):
+            setattr(self, key, split_quoted(value))
+        else:
+            setattr(self, key, value)
+
+    def _find_macro(self, name):
+        i = 0
+        for defn in self.macros:
+            if defn[0] == name:
+                return i
+            i += 1
+        return None
+
+    def _check_macro_definitions(self, definitions):
+        """Ensure that every element of 'definitions' is valid."""
+        for defn in definitions:
+            self._check_macro_definition(*defn)
+
+    def _check_macro_definition(self, defn):
+        """
+        Raise a TypeError if defn is not valid.
+
+        A valid definition is either a (name, value) 2-tuple or a (name,) tuple.
+        """
+        if not isinstance(defn, tuple) or not self._is_valid_macro(*defn):
+            raise TypeError(
+                f"invalid macro definition '{defn}': "
+                "must be tuple (string,), (string, string), or (string, None)"
+            )
+
+    @staticmethod
+    def _is_valid_macro(name, value=None):
+        """
+        A valid macro is a ``name : str`` and a ``value : str | None``.
+        """
+        return isinstance(name, str) and isinstance(value, (str, types.NoneType))
+
+    # -- Bookkeeping methods -------------------------------------------
+
+    def define_macro(self, name, value=None):
+        """Define a preprocessor macro for all compilations driven by this
+        compiler object.  The optional parameter 'value' should be a
+        string; if it is not supplied, then the macro will be defined
+        without an explicit value and the exact outcome depends on the
+        compiler used (XXX true? does ANSI say anything about this?)
+        """
+        # Delete from the list of macro definitions/undefinitions if
+        # already there (so that this one will take precedence).
+        i = self._find_macro(name)
+        if i is not None:
+            del self.macros[i]
+
+        self.macros.append((name, value))
+
+    def undefine_macro(self, name):
+        """Undefine a preprocessor macro for all compilations driven by
+        this compiler object.  If the same macro is defined by
+        'define_macro()' and undefined by 'undefine_macro()' the last call
+        takes precedence (including multiple redefinitions or
+        undefinitions).  If the macro is redefined/undefined on a
+        per-compilation basis (ie. in the call to 'compile()'), then that
+        takes precedence.
+        """
+        # Delete from the list of macro definitions/undefinitions if
+        # already there (so that this one will take precedence).
+        i = self._find_macro(name)
+        if i is not None:
+            del self.macros[i]
+
+        undefn = (name,)
+        self.macros.append(undefn)
+
+    def add_include_dir(self, dir):
+        """Add 'dir' to the list of directories that will be searched for
+        header files.  The compiler is instructed to search directories in
+        the order in which they are supplied by successive calls to
+        'add_include_dir()'.
+        """
+        self.include_dirs.append(dir)
+
+    def set_include_dirs(self, dirs):
+        """Set the list of directories that will be searched to 'dirs' (a
+        list of strings).  Overrides any preceding calls to
+        'add_include_dir()'; subsequence calls to 'add_include_dir()' add
+        to the list passed to 'set_include_dirs()'.  This does not affect
+        any list of standard include directories that the compiler may
+        search by default.
+        """
+        self.include_dirs = dirs[:]
+
+    def add_library(self, libname):
+        """Add 'libname' to the list of libraries that will be included in
+        all links driven by this compiler object.  Note that 'libname'
+        should *not* be the name of a file containing a library, but the
+        name of the library itself: the actual filename will be inferred by
+        the linker, the compiler, or the compiler class (depending on the
+        platform).
+
+        The linker will be instructed to link against libraries in the
+        order they were supplied to 'add_library()' and/or
+        'set_libraries()'.  It is perfectly valid to duplicate library
+        names; the linker will be instructed to link against libraries as
+        many times as they are mentioned.
+        """
+        self.libraries.append(libname)
+
+    def set_libraries(self, libnames):
+        """Set the list of libraries to be included in all links driven by
+        this compiler object to 'libnames' (a list of strings).  This does
+        not affect any standard system libraries that the linker may
+        include by default.
+        """
+        self.libraries = libnames[:]
+
+    def add_library_dir(self, dir):
+        """Add 'dir' to the list of directories that will be searched for
+        libraries specified to 'add_library()' and 'set_libraries()'.  The
+        linker will be instructed to search for libraries in the order they
+        are supplied to 'add_library_dir()' and/or 'set_library_dirs()'.
+        """
+        self.library_dirs.append(dir)
+
+    def set_library_dirs(self, dirs):
+        """Set the list of library search directories to 'dirs' (a list of
+        strings).  This does not affect any standard library search path
+        that the linker may search by default.
+        """
+        self.library_dirs = dirs[:]
+
+    def add_runtime_library_dir(self, dir):
+        """Add 'dir' to the list of directories that will be searched for
+        shared libraries at runtime.
+        """
+        self.runtime_library_dirs.append(dir)
+
+    def set_runtime_library_dirs(self, dirs):
+        """Set the list of directories to search for shared libraries at
+        runtime to 'dirs' (a list of strings).  This does not affect any
+        standard search path that the runtime linker may search by
+        default.
+        """
+        self.runtime_library_dirs = dirs[:]
+
+    def add_link_object(self, object):
+        """Add 'object' to the list of object files (or analogues, such as
+        explicitly named library files or the output of "resource
+        compilers") to be included in every link driven by this compiler
+        object.
+        """
+        self.objects.append(object)
+
+    def set_link_objects(self, objects):
+        """Set the list of object files (or analogues) to be included in
+        every link to 'objects'.  This does not affect any standard object
+        files that the linker may include by default (such as system
+        libraries).
+        """
+        self.objects = objects[:]
+
+    # -- Private utility methods --------------------------------------
+    # (here for the convenience of subclasses)
+
+    # Helper method to prep compiler in subclass compile() methods
+
+    def _setup_compile(self, outdir, macros, incdirs, sources, depends, extra):
+        """Process arguments and decide which source files to compile."""
+        outdir, macros, incdirs = self._fix_compile_args(outdir, macros, incdirs)
+
+        if extra is None:
+            extra = []
+
+        # Get the list of expected output (object) files
+        objects = self.object_filenames(sources, strip_dir=False, output_dir=outdir)
+        assert len(objects) == len(sources)
+
+        pp_opts = gen_preprocess_options(macros, incdirs)
+
+        build = {}
+        for i in range(len(sources)):
+            src = sources[i]
+            obj = objects[i]
+            ext = os.path.splitext(src)[1]
+            self.mkpath(os.path.dirname(obj))
+            build[obj] = (src, ext)
+
+        return macros, objects, extra, pp_opts, build
+
+    def _get_cc_args(self, pp_opts, debug, before):
+        # works for unixccompiler, cygwinccompiler
+        cc_args = pp_opts + ['-c']
+        if debug:
+            cc_args[:0] = ['-g']
+        if before:
+            cc_args[:0] = before
+        return cc_args
+
+    def _fix_compile_args(self, output_dir, macros, include_dirs):
+        """Typecheck and fix-up some of the arguments to the 'compile()'
+        method, and return fixed-up values.  Specifically: if 'output_dir'
+        is None, replaces it with 'self.output_dir'; ensures that 'macros'
+        is a list, and augments it with 'self.macros'; ensures that
+        'include_dirs' is a list, and augments it with 'self.include_dirs'.
+        Guarantees that the returned values are of the correct type,
+        i.e. for 'output_dir' either string or None, and for 'macros' and
+        'include_dirs' either list or None.
+        """
+        if output_dir is None:
+            output_dir = self.output_dir
+        elif not isinstance(output_dir, str):
+            raise TypeError("'output_dir' must be a string or None")
+
+        if macros is None:
+            macros = list(self.macros)
+        elif isinstance(macros, list):
+            macros = macros + (self.macros or [])
+        else:
+            raise TypeError("'macros' (if supplied) must be a list of tuples")
+
+        if include_dirs is None:
+            include_dirs = list(self.include_dirs)
+        elif isinstance(include_dirs, (list, tuple)):
+            include_dirs = list(include_dirs) + (self.include_dirs or [])
+        else:
+            raise TypeError("'include_dirs' (if supplied) must be a list of strings")
+
+        # add include dirs for class
+        include_dirs += self.__class__.include_dirs
+
+        return output_dir, macros, include_dirs
+
+    def _prep_compile(self, sources, output_dir, depends=None):
+        """Decide which source files must be recompiled.
+
+        Determine the list of object files corresponding to 'sources',
+        and figure out which ones really need to be recompiled.
+        Return a list of all object files and a dictionary telling
+        which source files can be skipped.
+        """
+        # Get the list of expected output (object) files
+        objects = self.object_filenames(sources, output_dir=output_dir)
+        assert len(objects) == len(sources)
+
+        # Return an empty dict for the "which source files can be skipped"
+        # return value to preserve API compatibility.
+        return objects, {}
+
+    def _fix_object_args(self, objects, output_dir):
+        """Typecheck and fix up some arguments supplied to various methods.
+        Specifically: ensure that 'objects' is a list; if output_dir is
+        None, replace with self.output_dir.  Return fixed versions of
+        'objects' and 'output_dir'.
+        """
+        if not isinstance(objects, (list, tuple)):
+            raise TypeError("'objects' must be a list or tuple of strings")
+        objects = list(objects)
+
+        if output_dir is None:
+            output_dir = self.output_dir
+        elif not isinstance(output_dir, str):
+            raise TypeError("'output_dir' must be a string or None")
+
+        return (objects, output_dir)
+
+    def _fix_lib_args(self, libraries, library_dirs, runtime_library_dirs):
+        """Typecheck and fix up some of the arguments supplied to the
+        'link_*' methods.  Specifically: ensure that all arguments are
+        lists, and augment them with their permanent versions
+        (eg. 'self.libraries' augments 'libraries').  Return a tuple with
+        fixed versions of all arguments.
+        """
+        if libraries is None:
+            libraries = list(self.libraries)
+        elif isinstance(libraries, (list, tuple)):
+            libraries = list(libraries) + (self.libraries or [])
+        else:
+            raise TypeError("'libraries' (if supplied) must be a list of strings")
+
+        if library_dirs is None:
+            library_dirs = list(self.library_dirs)
+        elif isinstance(library_dirs, (list, tuple)):
+            library_dirs = list(library_dirs) + (self.library_dirs or [])
+        else:
+            raise TypeError("'library_dirs' (if supplied) must be a list of strings")
+
+        # add library dirs for class
+        library_dirs += self.__class__.library_dirs
+
+        if runtime_library_dirs is None:
+            runtime_library_dirs = list(self.runtime_library_dirs)
+        elif isinstance(runtime_library_dirs, (list, tuple)):
+            runtime_library_dirs = list(runtime_library_dirs) + (
+                self.runtime_library_dirs or []
+            )
+        else:
+            raise TypeError(
+                "'runtime_library_dirs' (if supplied) must be a list of strings"
+            )
+
+        return (libraries, library_dirs, runtime_library_dirs)
+
+    def _need_link(self, objects, output_file):
+        """Return true if we need to relink the files listed in 'objects'
+        to recreate 'output_file'.
+        """
+        if self.force:
+            return True
+        else:
+            if self.dry_run:
+                newer = newer_group(objects, output_file, missing='newer')
+            else:
+                newer = newer_group(objects, output_file)
+            return newer
+
+    def detect_language(self, sources):
+        """Detect the language of a given file, or list of files. Uses
+        language_map, and language_order to do the job.
+        """
+        if not isinstance(sources, list):
+            sources = [sources]
+        lang = None
+        index = len(self.language_order)
+        for source in sources:
+            base, ext = os.path.splitext(source)
+            extlang = self.language_map.get(ext)
+            try:
+                extindex = self.language_order.index(extlang)
+                if extindex < index:
+                    lang = extlang
+                    index = extindex
+            except ValueError:
+                pass
+        return lang
+
+    # -- Worker methods ------------------------------------------------
+    # (must be implemented by subclasses)
+
+    def preprocess(
+        self,
+        source,
+        output_file=None,
+        macros=None,
+        include_dirs=None,
+        extra_preargs=None,
+        extra_postargs=None,
+    ):
+        """Preprocess a single C/C++ source file, named in 'source'.
+        Output will be written to file named 'output_file', or stdout if
+        'output_file' not supplied.  'macros' is a list of macro
+        definitions as for 'compile()', which will augment the macros set
+        with 'define_macro()' and 'undefine_macro()'.  'include_dirs' is a
+        list of directory names that will be added to the default list.
+
+        Raises PreprocessError on failure.
+        """
+        pass
+
+    def compile(
+        self,
+        sources,
+        output_dir=None,
+        macros=None,
+        include_dirs=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        depends=None,
+    ):
+        """Compile one or more source files.
+
+        'sources' must be a list of filenames, most likely C/C++
+        files, but in reality anything that can be handled by a
+        particular compiler and compiler class (eg. MSVCCompiler can
+        handle resource files in 'sources').  Return a list of object
+        filenames, one per source filename in 'sources'.  Depending on
+        the implementation, not all source files will necessarily be
+        compiled, but all corresponding object filenames will be
+        returned.
+
+        If 'output_dir' is given, object files will be put under it, while
+        retaining their original path component.  That is, "foo/bar.c"
+        normally compiles to "foo/bar.o" (for a Unix implementation); if
+        'output_dir' is "build", then it would compile to
+        "build/foo/bar.o".
+
+        'macros', if given, must be a list of macro definitions.  A macro
+        definition is either a (name, value) 2-tuple or a (name,) 1-tuple.
+        The former defines a macro; if the value is None, the macro is
+        defined without an explicit value.  The 1-tuple case undefines a
+        macro.  Later definitions/redefinitions/ undefinitions take
+        precedence.
+
+        'include_dirs', if given, must be a list of strings, the
+        directories to add to the default include file search path for this
+        compilation only.
+
+        'debug' is a boolean; if true, the compiler will be instructed to
+        output debug symbols in (or alongside) the object file(s).
+
+        'extra_preargs' and 'extra_postargs' are implementation- dependent.
+        On platforms that have the notion of a command-line (e.g. Unix,
+        DOS/Windows), they are most likely lists of strings: extra
+        command-line arguments to prepend/append to the compiler command
+        line.  On other platforms, consult the implementation class
+        documentation.  In any event, they are intended as an escape hatch
+        for those occasions when the abstract compiler framework doesn't
+        cut the mustard.
+
+        'depends', if given, is a list of filenames that all targets
+        depend on.  If a source file is older than any file in
+        depends, then the source file will be recompiled.  This
+        supports dependency tracking, but only at a coarse
+        granularity.
+
+        Raises CompileError on failure.
+        """
+        # A concrete compiler class can either override this method
+        # entirely or implement _compile().
+        macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
+            output_dir, macros, include_dirs, sources, depends, extra_postargs
+        )
+        cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
+
+        for obj in objects:
+            try:
+                src, ext = build[obj]
+            except KeyError:
+                continue
+            self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
+
+        # Return *all* object filenames, not just the ones we just built.
+        return objects
+
+    def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
+        """Compile 'src' to product 'obj'."""
+        # A concrete compiler class that does not override compile()
+        # should implement _compile().
+        pass
+
+    def create_static_lib(
+        self, objects, output_libname, output_dir=None, debug=False, target_lang=None
+    ):
+        """Link a bunch of stuff together to create a static library file.
+        The "bunch of stuff" consists of the list of object files supplied
+        as 'objects', the extra object files supplied to
+        'add_link_object()' and/or 'set_link_objects()', the libraries
+        supplied to 'add_library()' and/or 'set_libraries()', and the
+        libraries supplied as 'libraries' (if any).
+
+        'output_libname' should be a library name, not a filename; the
+        filename will be inferred from the library name.  'output_dir' is
+        the directory where the library file will be put.
+
+        'debug' is a boolean; if true, debugging information will be
+        included in the library (note that on most platforms, it is the
+        compile step where this matters: the 'debug' flag is included here
+        just for consistency).
+
+        'target_lang' is the target language for which the given objects
+        are being compiled. This allows specific linkage time treatment of
+        certain languages.
+
+        Raises LibError on failure.
+        """
+        pass
+
+    # values for target_desc parameter in link()
+    SHARED_OBJECT = "shared_object"
+    SHARED_LIBRARY = "shared_library"
+    EXECUTABLE = "executable"
+
+    def link(
+        self,
+        target_desc,
+        objects,
+        output_filename,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        """Link a bunch of stuff together to create an executable or
+        shared library file.
+
+        The "bunch of stuff" consists of the list of object files supplied
+        as 'objects'.  'output_filename' should be a filename.  If
+        'output_dir' is supplied, 'output_filename' is relative to it
+        (i.e. 'output_filename' can provide directory components if
+        needed).
+
+        'libraries' is a list of libraries to link against.  These are
+        library names, not filenames, since they're translated into
+        filenames in a platform-specific way (eg. "foo" becomes "libfoo.a"
+        on Unix and "foo.lib" on DOS/Windows).  However, they can include a
+        directory component, which means the linker will look in that
+        specific directory rather than searching all the normal locations.
+
+        'library_dirs', if supplied, should be a list of directories to
+        search for libraries that were specified as bare library names
+        (ie. no directory component).  These are on top of the system
+        default and those supplied to 'add_library_dir()' and/or
+        'set_library_dirs()'.  'runtime_library_dirs' is a list of
+        directories that will be embedded into the shared library and used
+        to search for other shared libraries that *it* depends on at
+        run-time.  (This may only be relevant on Unix.)
+
+        'export_symbols' is a list of symbols that the shared library will
+        export.  (This appears to be relevant only on Windows.)
+
+        'debug' is as for 'compile()' and 'create_static_lib()', with the
+        slight distinction that it actually matters on most platforms (as
+        opposed to 'create_static_lib()', which includes a 'debug' flag
+        mostly for form's sake).
+
+        'extra_preargs' and 'extra_postargs' are as for 'compile()' (except
+        of course that they supply command-line arguments for the
+        particular linker being used).
+
+        'target_lang' is the target language for which the given objects
+        are being compiled. This allows specific linkage time treatment of
+        certain languages.
+
+        Raises LinkError on failure.
+        """
+        raise NotImplementedError
+
+    # Old 'link_*()' methods, rewritten to use the new 'link()' method.
+
+    def link_shared_lib(
+        self,
+        objects,
+        output_libname,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        self.link(
+            CCompiler.SHARED_LIBRARY,
+            objects,
+            self.library_filename(output_libname, lib_type='shared'),
+            output_dir,
+            libraries,
+            library_dirs,
+            runtime_library_dirs,
+            export_symbols,
+            debug,
+            extra_preargs,
+            extra_postargs,
+            build_temp,
+            target_lang,
+        )
+
+    def link_shared_object(
+        self,
+        objects,
+        output_filename,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        self.link(
+            CCompiler.SHARED_OBJECT,
+            objects,
+            output_filename,
+            output_dir,
+            libraries,
+            library_dirs,
+            runtime_library_dirs,
+            export_symbols,
+            debug,
+            extra_preargs,
+            extra_postargs,
+            build_temp,
+            target_lang,
+        )
+
+    def link_executable(
+        self,
+        objects,
+        output_progname,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        target_lang=None,
+    ):
+        self.link(
+            CCompiler.EXECUTABLE,
+            objects,
+            self.executable_filename(output_progname),
+            output_dir,
+            libraries,
+            library_dirs,
+            runtime_library_dirs,
+            None,
+            debug,
+            extra_preargs,
+            extra_postargs,
+            None,
+            target_lang,
+        )
+
+    # -- Miscellaneous methods -----------------------------------------
+    # These are all used by the 'gen_lib_options() function; there is
+    # no appropriate default implementation so subclasses should
+    # implement all of these.
+
+    def library_dir_option(self, dir):
+        """Return the compiler option to add 'dir' to the list of
+        directories searched for libraries.
+        """
+        raise NotImplementedError
+
+    def runtime_library_dir_option(self, dir):
+        """Return the compiler option to add 'dir' to the list of
+        directories searched for runtime libraries.
+        """
+        raise NotImplementedError
+
+    def library_option(self, lib):
+        """Return the compiler option to add 'lib' to the list of libraries
+        linked into the shared library or executable.
+        """
+        raise NotImplementedError
+
+    def has_function(  # noqa: C901
+        self,
+        funcname,
+        includes=None,
+        include_dirs=None,
+        libraries=None,
+        library_dirs=None,
+    ):
+        """Return a boolean indicating whether funcname is provided as
+        a symbol on the current platform.  The optional arguments can
+        be used to augment the compilation environment.
+
+        The libraries argument is a list of flags to be passed to the
+        linker to make additional symbol definitions available for
+        linking.
+
+        The includes and include_dirs arguments are deprecated.
+        Usually, supplying include files with function declarations
+        will cause function detection to fail even in cases where the
+        symbol is available for linking.
+
+        """
+        # this can't be included at module scope because it tries to
+        # import math which might not be available at that point - maybe
+        # the necessary logic should just be inlined?
+        import tempfile
+
+        if includes is None:
+            includes = []
+        else:
+            warnings.warn("includes is deprecated", DeprecationWarning)
+        if include_dirs is None:
+            include_dirs = []
+        else:
+            warnings.warn("include_dirs is deprecated", DeprecationWarning)
+        if libraries is None:
+            libraries = []
+        if library_dirs is None:
+            library_dirs = []
+        fd, fname = tempfile.mkstemp(".c", funcname, text=True)
+        with os.fdopen(fd, "w", encoding='utf-8') as f:
+            for incl in includes:
+                f.write(f"""#include "{incl}"\n""")
+            if not includes:
+                # Use "char func(void);" as the prototype to follow
+                # what autoconf does.  This prototype does not match
+                # any well-known function the compiler might recognize
+                # as a builtin, so this ends up as a true link test.
+                # Without a fake prototype, the test would need to
+                # know the exact argument types, and the has_function
+                # interface does not provide that level of information.
+                f.write(
+                    f"""\
+#ifdef __cplusplus
+extern "C"
+#endif
+char {funcname}(void);
+"""
+                )
+            f.write(
+                f"""\
+int main (int argc, char **argv) {{
+    {funcname}();
+    return 0;
+}}
+"""
+            )
+
+        try:
+            objects = self.compile([fname], include_dirs=include_dirs)
+        except CompileError:
+            return False
+        finally:
+            os.remove(fname)
+
+        try:
+            self.link_executable(
+                objects, "a.out", libraries=libraries, library_dirs=library_dirs
+            )
+        except (LinkError, TypeError):
+            return False
+        else:
+            os.remove(
+                self.executable_filename("a.out", output_dir=self.output_dir or '')
+            )
+        finally:
+            for fn in objects:
+                os.remove(fn)
+        return True
+
+    def find_library_file(self, dirs, lib, debug=False):
+        """Search the specified list of directories for a static or shared
+        library file 'lib' and return the full path to that file.  If
+        'debug' true, look for a debugging version (if that makes sense on
+        the current platform).  Return None if 'lib' wasn't found in any of
+        the specified directories.
+        """
+        raise NotImplementedError
+
+    # -- Filename generation methods -----------------------------------
+
+    # The default implementation of the filename generating methods are
+    # prejudiced towards the Unix/DOS/Windows view of the world:
+    #   * object files are named by replacing the source file extension
+    #     (eg. .c/.cpp -> .o/.obj)
+    #   * library files (shared or static) are named by plugging the
+    #     library name and extension into a format string, eg.
+    #     "lib%s.%s" % (lib_name, ".a") for Unix static libraries
+    #   * executables are named by appending an extension (possibly
+    #     empty) to the program name: eg. progname + ".exe" for
+    #     Windows
+    #
+    # To reduce redundant code, these methods expect to find
+    # several attributes in the current object (presumably defined
+    # as class attributes):
+    #   * src_extensions -
+    #     list of C/C++ source file extensions, eg. ['.c', '.cpp']
+    #   * obj_extension -
+    #     object file extension, eg. '.o' or '.obj'
+    #   * static_lib_extension -
+    #     extension for static library files, eg. '.a' or '.lib'
+    #   * shared_lib_extension -
+    #     extension for shared library/object files, eg. '.so', '.dll'
+    #   * static_lib_format -
+    #     format string for generating static library filenames,
+    #     eg. 'lib%s.%s' or '%s.%s'
+    #   * shared_lib_format
+    #     format string for generating shared library filenames
+    #     (probably same as static_lib_format, since the extension
+    #     is one of the intended parameters to the format string)
+    #   * exe_extension -
+    #     extension for executable files, eg. '' or '.exe'
+
+    def object_filenames(self, source_filenames, strip_dir=False, output_dir=''):
+        if output_dir is None:
+            output_dir = ''
+        return list(
+            self._make_out_path(output_dir, strip_dir, src_name)
+            for src_name in source_filenames
+        )
+
+    @property
+    def out_extensions(self):
+        return dict.fromkeys(self.src_extensions, self.obj_extension)
+
+    def _make_out_path(self, output_dir, strip_dir, src_name):
+        base, ext = os.path.splitext(src_name)
+        base = self._make_relative(base)
+        try:
+            new_ext = self.out_extensions[ext]
+        except LookupError:
+            raise UnknownFileError(f"unknown file type '{ext}' (from '{src_name}')")
+        if strip_dir:
+            base = os.path.basename(base)
+        return os.path.join(output_dir, base + new_ext)
+
+    @staticmethod
+    def _make_relative(base):
+        """
+        In order to ensure that a filename always honors the
+        indicated output_dir, make sure it's relative.
+        Ref python/cpython#37775.
+        """
+        # Chop off the drive
+        no_drive = os.path.splitdrive(base)[1]
+        # If abs, chop off leading /
+        return no_drive[os.path.isabs(no_drive) :]
+
+    def shared_object_filename(self, basename, strip_dir=False, output_dir=''):
+        assert output_dir is not None
+        if strip_dir:
+            basename = os.path.basename(basename)
+        return os.path.join(output_dir, basename + self.shared_lib_extension)
+
+    def executable_filename(self, basename, strip_dir=False, output_dir=''):
+        assert output_dir is not None
+        if strip_dir:
+            basename = os.path.basename(basename)
+        return os.path.join(output_dir, basename + (self.exe_extension or ''))
+
+    def library_filename(
+        self,
+        libname,
+        lib_type='static',
+        strip_dir=False,
+        output_dir='',  # or 'shared'
+    ):
+        assert output_dir is not None
+        expected = '"static", "shared", "dylib", "xcode_stub"'
+        if lib_type not in eval(expected):
+            raise ValueError(f"'lib_type' must be {expected}")
+        fmt = getattr(self, lib_type + "_lib_format")
+        ext = getattr(self, lib_type + "_lib_extension")
+
+        dir, base = os.path.split(libname)
+        filename = fmt % (base, ext)
+        if strip_dir:
+            dir = ''
+
+        return os.path.join(output_dir, dir, filename)
+
+    # -- Utility methods -----------------------------------------------
+
+    def announce(self, msg, level=1):
+        log.debug(msg)
+
+    def debug_print(self, msg):
+        from distutils.debug import DEBUG
+
+        if DEBUG:
+            print(msg)
+
+    def warn(self, msg):
+        sys.stderr.write(f"warning: {msg}\n")
+
+    def execute(self, func, args, msg=None, level=1):
+        execute(func, args, msg, self.dry_run)
+
+    def spawn(self, cmd, **kwargs):
+        spawn(cmd, dry_run=self.dry_run, **kwargs)
+
+    def move_file(self, src, dst):
+        return move_file(src, dst, dry_run=self.dry_run)
+
+    def mkpath(self, name, mode=0o777):
+        mkpath(name, mode, dry_run=self.dry_run)
+
+
+# Map a sys.platform/os.name ('posix', 'nt') to the default compiler
+# type for that platform. Keys are interpreted as re match
+# patterns. Order is important; platform mappings are preferred over
+# OS names.
+_default_compilers = (
+    # Platform string mappings
+    # on a cygwin built python we can use gcc like an ordinary UNIXish
+    # compiler
+    ('cygwin.*', 'unix'),
+    ('zos', 'zos'),
+    # OS name mappings
+    ('posix', 'unix'),
+    ('nt', 'msvc'),
+)
+
+
+def get_default_compiler(osname=None, platform=None):
+    """Determine the default compiler to use for the given platform.
+
+    osname should be one of the standard Python OS names (i.e. the
+    ones returned by os.name) and platform the common value
+    returned by sys.platform for the platform in question.
+
+    The default values are os.name and sys.platform in case the
+    parameters are not given.
+    """
+    if osname is None:
+        osname = os.name
+    if platform is None:
+        platform = sys.platform
+    # Mingw is a special case where sys.platform is 'win32' but we
+    # want to use the 'mingw32' compiler, so check it first
+    if is_mingw():
+        return 'mingw32'
+    for pattern, compiler in _default_compilers:
+        if (
+            re.match(pattern, platform) is not None
+            or re.match(pattern, osname) is not None
+        ):
+            return compiler
+    # Default to Unix compiler
+    return 'unix'
+
+
+# Map compiler types to (module_name, class_name) pairs -- ie. where to
+# find the code that implements an interface to this compiler.  (The module
+# is assumed to be in the 'distutils' package.)
+compiler_class = {
+    'unix': ('unixccompiler', 'UnixCCompiler', "standard UNIX-style compiler"),
+    'msvc': ('_msvccompiler', 'MSVCCompiler', "Microsoft Visual C++"),
+    'cygwin': (
+        'cygwinccompiler',
+        'CygwinCCompiler',
+        "Cygwin port of GNU C Compiler for Win32",
+    ),
+    'mingw32': (
+        'cygwinccompiler',
+        'Mingw32CCompiler',
+        "Mingw32 port of GNU C Compiler for Win32",
+    ),
+    'bcpp': ('bcppcompiler', 'BCPPCompiler', "Borland C++ Compiler"),
+    'zos': ('zosccompiler', 'zOSCCompiler', 'IBM XL C/C++ Compilers'),
+}
+
+
+def show_compilers():
+    """Print list of available compilers (used by the "--help-compiler"
+    options to "build", "build_ext", "build_clib").
+    """
+    # XXX this "knows" that the compiler option it's describing is
+    # "--compiler", which just happens to be the case for the three
+    # commands that use it.
+    from distutils.fancy_getopt import FancyGetopt
+
+    compilers = []
+    for compiler in compiler_class.keys():
+        compilers.append(("compiler=" + compiler, None, compiler_class[compiler][2]))
+    compilers.sort()
+    pretty_printer = FancyGetopt(compilers)
+    pretty_printer.print_help("List of available compilers:")
+
+
+def new_compiler(plat=None, compiler=None, verbose=False, dry_run=False, force=False):
+    """Generate an instance of some CCompiler subclass for the supplied
+    platform/compiler combination.  'plat' defaults to 'os.name'
+    (eg. 'posix', 'nt'), and 'compiler' defaults to the default compiler
+    for that platform.  Currently only 'posix' and 'nt' are supported, and
+    the default compilers are "traditional Unix interface" (UnixCCompiler
+    class) and Visual C++ (MSVCCompiler class).  Note that it's perfectly
+    possible to ask for a Unix compiler object under Windows, and a
+    Microsoft compiler object under Unix -- if you supply a value for
+    'compiler', 'plat' is ignored.
+    """
+    if plat is None:
+        plat = os.name
+
+    try:
+        if compiler is None:
+            compiler = get_default_compiler(plat)
+
+        (module_name, class_name, long_description) = compiler_class[compiler]
+    except KeyError:
+        msg = f"don't know how to compile C/C++ code on platform '{plat}'"
+        if compiler is not None:
+            msg = msg + f" with '{compiler}' compiler"
+        raise DistutilsPlatformError(msg)
+
+    try:
+        module_name = "distutils." + module_name
+        __import__(module_name)
+        module = sys.modules[module_name]
+        klass = vars(module)[class_name]
+    except ImportError:
+        raise DistutilsModuleError(
+            f"can't compile C/C++ code: unable to load module '{module_name}'"
+        )
+    except KeyError:
+        raise DistutilsModuleError(
+            f"can't compile C/C++ code: unable to find class '{class_name}' "
+            f"in module '{module_name}'"
+        )
+
+    # XXX The None is necessary to preserve backwards compatibility
+    # with classes that expect verbose to be the first positional
+    # argument.
+    return klass(None, dry_run, force)
+
+
+def gen_preprocess_options(macros, include_dirs):
+    """Generate C pre-processor options (-D, -U, -I) as used by at least
+    two types of compilers: the typical Unix compiler and Visual C++.
+    'macros' is the usual thing, a list of 1- or 2-tuples, where (name,)
+    means undefine (-U) macro 'name', and (name,value) means define (-D)
+    macro 'name' to 'value'.  'include_dirs' is just a list of directory
+    names to be added to the header file search path (-I).  Returns a list
+    of command-line options suitable for either Unix compilers or Visual
+    C++.
+    """
+    # XXX it would be nice (mainly aesthetic, and so we don't generate
+    # stupid-looking command lines) to go over 'macros' and eliminate
+    # redundant definitions/undefinitions (ie. ensure that only the
+    # latest mention of a particular macro winds up on the command
+    # line).  I don't think it's essential, though, since most (all?)
+    # Unix C compilers only pay attention to the latest -D or -U
+    # mention of a macro on their command line.  Similar situation for
+    # 'include_dirs'.  I'm punting on both for now.  Anyways, weeding out
+    # redundancies like this should probably be the province of
+    # CCompiler, since the data structures used are inherited from it
+    # and therefore common to all CCompiler classes.
+    pp_opts = []
+    for macro in macros:
+        if not (isinstance(macro, tuple) and 1 <= len(macro) <= 2):
+            raise TypeError(
+                f"bad macro definition '{macro}': "
+                "each element of 'macros' list must be a 1- or 2-tuple"
+            )
+
+        if len(macro) == 1:  # undefine this macro
+            pp_opts.append(f"-U{macro[0]}")
+        elif len(macro) == 2:
+            if macro[1] is None:  # define with no explicit value
+                pp_opts.append(f"-D{macro[0]}")
+            else:
+                # XXX *don't* need to be clever about quoting the
+                # macro value here, because we're going to avoid the
+                # shell at all costs when we spawn the command!
+                pp_opts.append("-D{}={}".format(*macro))
+
+    for dir in include_dirs:
+        pp_opts.append(f"-I{dir}")
+    return pp_opts
+
+
+def gen_lib_options(compiler, library_dirs, runtime_library_dirs, libraries):
+    """Generate linker options for searching library directories and
+    linking with specific libraries.  'libraries' and 'library_dirs' are,
+    respectively, lists of library names (not filenames!) and search
+    directories.  Returns a list of command-line options suitable for use
+    with some compiler (depending on the two format strings passed in).
+    """
+    lib_opts = []
+
+    for dir in library_dirs:
+        lib_opts.append(compiler.library_dir_option(dir))
+
+    for dir in runtime_library_dirs:
+        lib_opts.extend(always_iterable(compiler.runtime_library_dir_option(dir)))
+
+    # XXX it's important that we *not* remove redundant library mentions!
+    # sometimes you really do have to say "-lfoo -lbar -lfoo" in order to
+    # resolve all symbols.  I just hope we never have to say "-lfoo obj.o
+    # -lbar" to get things to work -- that's certainly a possibility, but a
+    # pretty nasty way to arrange your C code.
+
+    for lib in libraries:
+        (lib_dir, lib_name) = os.path.split(lib)
+        if lib_dir:
+            lib_file = compiler.find_library_file([lib_dir], lib_name)
+            if lib_file:
+                lib_opts.append(lib_file)
+            else:
+                compiler.warn(
+                    f"no library file corresponding to '{lib}' found (skipping)"
+                )
+        else:
+            lib_opts.append(compiler.library_option(lib))
+    return lib_opts
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/cmd.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/cmd.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bb97956abb4b56e90b3fdd956911ebbd7001cba
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/cmd.py
@@ -0,0 +1,439 @@
+"""distutils.cmd
+
+Provides the Command class, the base class for the command classes
+in the distutils.command package.
+"""
+
+import logging
+import os
+import re
+import sys
+
+from . import _modified, archive_util, dir_util, file_util, util
+from ._log import log
+from .errors import DistutilsOptionError
+
+
+class Command:
+    """Abstract base class for defining command classes, the "worker bees"
+    of the Distutils.  A useful analogy for command classes is to think of
+    them as subroutines with local variables called "options".  The options
+    are "declared" in 'initialize_options()' and "defined" (given their
+    final values, aka "finalized") in 'finalize_options()', both of which
+    must be defined by every command class.  The distinction between the
+    two is necessary because option values might come from the outside
+    world (command line, config file, ...), and any options dependent on
+    other options must be computed *after* these outside influences have
+    been processed -- hence 'finalize_options()'.  The "body" of the
+    subroutine, where it does all its work based on the values of its
+    options, is the 'run()' method, which must also be implemented by every
+    command class.
+    """
+
+    # 'sub_commands' formalizes the notion of a "family" of commands,
+    # eg. "install" as the parent with sub-commands "install_lib",
+    # "install_headers", etc.  The parent of a family of commands
+    # defines 'sub_commands' as a class attribute; it's a list of
+    #    (command_name : string, predicate : unbound_method | string | None)
+    # tuples, where 'predicate' is a method of the parent command that
+    # determines whether the corresponding command is applicable in the
+    # current situation.  (Eg. we "install_headers" is only applicable if
+    # we have any C header files to install.)  If 'predicate' is None,
+    # that command is always applicable.
+    #
+    # 'sub_commands' is usually defined at the *end* of a class, because
+    # predicates can be unbound methods, so they must already have been
+    # defined.  The canonical example is the "install" command.
+    sub_commands = []
+
+    # -- Creation/initialization methods -------------------------------
+
+    def __init__(self, dist):
+        """Create and initialize a new Command object.  Most importantly,
+        invokes the 'initialize_options()' method, which is the real
+        initializer and depends on the actual command being
+        instantiated.
+        """
+        # late import because of mutual dependence between these classes
+        from distutils.dist import Distribution
+
+        if not isinstance(dist, Distribution):
+            raise TypeError("dist must be a Distribution instance")
+        if self.__class__ is Command:
+            raise RuntimeError("Command is an abstract class")
+
+        self.distribution = dist
+        self.initialize_options()
+
+        # Per-command versions of the global flags, so that the user can
+        # customize Distutils' behaviour command-by-command and let some
+        # commands fall back on the Distribution's behaviour.  None means
+        # "not defined, check self.distribution's copy", while 0 or 1 mean
+        # false and true (duh).  Note that this means figuring out the real
+        # value of each flag is a touch complicated -- hence "self._dry_run"
+        # will be handled by __getattr__, below.
+        # XXX This needs to be fixed.
+        self._dry_run = None
+
+        # verbose is largely ignored, but needs to be set for
+        # backwards compatibility (I think)?
+        self.verbose = dist.verbose
+
+        # Some commands define a 'self.force' option to ignore file
+        # timestamps, but methods defined *here* assume that
+        # 'self.force' exists for all commands.  So define it here
+        # just to be safe.
+        self.force = None
+
+        # The 'help' flag is just used for command-line parsing, so
+        # none of that complicated bureaucracy is needed.
+        self.help = False
+
+        # 'finalized' records whether or not 'finalize_options()' has been
+        # called.  'finalize_options()' itself should not pay attention to
+        # this flag: it is the business of 'ensure_finalized()', which
+        # always calls 'finalize_options()', to respect/update it.
+        self.finalized = False
+
+    # XXX A more explicit way to customize dry_run would be better.
+    def __getattr__(self, attr):
+        if attr == 'dry_run':
+            myval = getattr(self, "_" + attr)
+            if myval is None:
+                return getattr(self.distribution, attr)
+            else:
+                return myval
+        else:
+            raise AttributeError(attr)
+
+    def ensure_finalized(self):
+        if not self.finalized:
+            self.finalize_options()
+        self.finalized = True
+
+    # Subclasses must define:
+    #   initialize_options()
+    #     provide default values for all options; may be customized by
+    #     setup script, by options from config file(s), or by command-line
+    #     options
+    #   finalize_options()
+    #     decide on the final values for all options; this is called
+    #     after all possible intervention from the outside world
+    #     (command-line, option file, etc.) has been processed
+    #   run()
+    #     run the command: do whatever it is we're here to do,
+    #     controlled by the command's various option values
+
+    def initialize_options(self):
+        """Set default values for all the options that this command
+        supports.  Note that these defaults may be overridden by other
+        commands, by the setup script, by config files, or by the
+        command-line.  Thus, this is not the place to code dependencies
+        between options; generally, 'initialize_options()' implementations
+        are just a bunch of "self.foo = None" assignments.
+
+        This method must be implemented by all command classes.
+        """
+        raise RuntimeError(
+            f"abstract method -- subclass {self.__class__} must override"
+        )
+
+    def finalize_options(self):
+        """Set final values for all the options that this command supports.
+        This is always called as late as possible, ie.  after any option
+        assignments from the command-line or from other commands have been
+        done.  Thus, this is the place to code option dependencies: if
+        'foo' depends on 'bar', then it is safe to set 'foo' from 'bar' as
+        long as 'foo' still has the same value it was assigned in
+        'initialize_options()'.
+
+        This method must be implemented by all command classes.
+        """
+        raise RuntimeError(
+            f"abstract method -- subclass {self.__class__} must override"
+        )
+
+    def dump_options(self, header=None, indent=""):
+        from distutils.fancy_getopt import longopt_xlate
+
+        if header is None:
+            header = f"command options for '{self.get_command_name()}':"
+        self.announce(indent + header, level=logging.INFO)
+        indent = indent + "  "
+        for option, _, _ in self.user_options:
+            option = option.translate(longopt_xlate)
+            if option[-1] == "=":
+                option = option[:-1]
+            value = getattr(self, option)
+            self.announce(indent + f"{option} = {value}", level=logging.INFO)
+
+    def run(self):
+        """A command's raison d'etre: carry out the action it exists to
+        perform, controlled by the options initialized in
+        'initialize_options()', customized by other commands, the setup
+        script, the command-line, and config files, and finalized in
+        'finalize_options()'.  All terminal output and filesystem
+        interaction should be done by 'run()'.
+
+        This method must be implemented by all command classes.
+        """
+        raise RuntimeError(
+            f"abstract method -- subclass {self.__class__} must override"
+        )
+
+    def announce(self, msg, level=logging.DEBUG):
+        log.log(level, msg)
+
+    def debug_print(self, msg):
+        """Print 'msg' to stdout if the global DEBUG (taken from the
+        DISTUTILS_DEBUG environment variable) flag is true.
+        """
+        from distutils.debug import DEBUG
+
+        if DEBUG:
+            print(msg)
+            sys.stdout.flush()
+
+    # -- Option validation methods -------------------------------------
+    # (these are very handy in writing the 'finalize_options()' method)
+    #
+    # NB. the general philosophy here is to ensure that a particular option
+    # value meets certain type and value constraints.  If not, we try to
+    # force it into conformance (eg. if we expect a list but have a string,
+    # split the string on comma and/or whitespace).  If we can't force the
+    # option into conformance, raise DistutilsOptionError.  Thus, command
+    # classes need do nothing more than (eg.)
+    #   self.ensure_string_list('foo')
+    # and they can be guaranteed that thereafter, self.foo will be
+    # a list of strings.
+
+    def _ensure_stringlike(self, option, what, default=None):
+        val = getattr(self, option)
+        if val is None:
+            setattr(self, option, default)
+            return default
+        elif not isinstance(val, str):
+            raise DistutilsOptionError(f"'{option}' must be a {what} (got `{val}`)")
+        return val
+
+    def ensure_string(self, option, default=None):
+        """Ensure that 'option' is a string; if not defined, set it to
+        'default'.
+        """
+        self._ensure_stringlike(option, "string", default)
+
+    def ensure_string_list(self, option):
+        r"""Ensure that 'option' is a list of strings.  If 'option' is
+        currently a string, we split it either on /,\s*/ or /\s+/, so
+        "foo bar baz", "foo,bar,baz", and "foo,   bar baz" all become
+        ["foo", "bar", "baz"].
+        """
+        val = getattr(self, option)
+        if val is None:
+            return
+        elif isinstance(val, str):
+            setattr(self, option, re.split(r',\s*|\s+', val))
+        else:
+            if isinstance(val, list):
+                ok = all(isinstance(v, str) for v in val)
+            else:
+                ok = False
+            if not ok:
+                raise DistutilsOptionError(
+                    f"'{option}' must be a list of strings (got {val!r})"
+                )
+
+    def _ensure_tested_string(self, option, tester, what, error_fmt, default=None):
+        val = self._ensure_stringlike(option, what, default)
+        if val is not None and not tester(val):
+            raise DistutilsOptionError(
+                ("error in '%s' option: " + error_fmt) % (option, val)
+            )
+
+    def ensure_filename(self, option):
+        """Ensure that 'option' is the name of an existing file."""
+        self._ensure_tested_string(
+            option, os.path.isfile, "filename", "'%s' does not exist or is not a file"
+        )
+
+    def ensure_dirname(self, option):
+        self._ensure_tested_string(
+            option,
+            os.path.isdir,
+            "directory name",
+            "'%s' does not exist or is not a directory",
+        )
+
+    # -- Convenience methods for commands ------------------------------
+
+    def get_command_name(self):
+        if hasattr(self, 'command_name'):
+            return self.command_name
+        else:
+            return self.__class__.__name__
+
+    def set_undefined_options(self, src_cmd, *option_pairs):
+        """Set the values of any "undefined" options from corresponding
+        option values in some other command object.  "Undefined" here means
+        "is None", which is the convention used to indicate that an option
+        has not been changed between 'initialize_options()' and
+        'finalize_options()'.  Usually called from 'finalize_options()' for
+        options that depend on some other command rather than another
+        option of the same command.  'src_cmd' is the other command from
+        which option values will be taken (a command object will be created
+        for it if necessary); the remaining arguments are
+        '(src_option,dst_option)' tuples which mean "take the value of
+        'src_option' in the 'src_cmd' command object, and copy it to
+        'dst_option' in the current command object".
+        """
+        # Option_pairs: list of (src_option, dst_option) tuples
+        src_cmd_obj = self.distribution.get_command_obj(src_cmd)
+        src_cmd_obj.ensure_finalized()
+        for src_option, dst_option in option_pairs:
+            if getattr(self, dst_option) is None:
+                setattr(self, dst_option, getattr(src_cmd_obj, src_option))
+
+    def get_finalized_command(self, command, create=True):
+        """Wrapper around Distribution's 'get_command_obj()' method: find
+        (create if necessary and 'create' is true) the command object for
+        'command', call its 'ensure_finalized()' method, and return the
+        finalized command object.
+        """
+        cmd_obj = self.distribution.get_command_obj(command, create)
+        cmd_obj.ensure_finalized()
+        return cmd_obj
+
+    # XXX rename to 'get_reinitialized_command()'? (should do the
+    # same in dist.py, if so)
+    def reinitialize_command(self, command, reinit_subcommands=False):
+        return self.distribution.reinitialize_command(command, reinit_subcommands)
+
+    def run_command(self, command):
+        """Run some other command: uses the 'run_command()' method of
+        Distribution, which creates and finalizes the command object if
+        necessary and then invokes its 'run()' method.
+        """
+        self.distribution.run_command(command)
+
+    def get_sub_commands(self):
+        """Determine the sub-commands that are relevant in the current
+        distribution (ie., that need to be run).  This is based on the
+        'sub_commands' class attribute: each tuple in that list may include
+        a method that we call to determine if the subcommand needs to be
+        run for the current distribution.  Return a list of command names.
+        """
+        commands = []
+        for cmd_name, method in self.sub_commands:
+            if method is None or method(self):
+                commands.append(cmd_name)
+        return commands
+
+    # -- External world manipulation -----------------------------------
+
+    def warn(self, msg):
+        log.warning("warning: %s: %s\n", self.get_command_name(), msg)
+
+    def execute(self, func, args, msg=None, level=1):
+        util.execute(func, args, msg, dry_run=self.dry_run)
+
+    def mkpath(self, name, mode=0o777):
+        dir_util.mkpath(name, mode, dry_run=self.dry_run)
+
+    def copy_file(
+        self,
+        infile,
+        outfile,
+        preserve_mode=True,
+        preserve_times=True,
+        link=None,
+        level=1,
+    ):
+        """Copy a file respecting verbose, dry-run and force flags.  (The
+        former two default to whatever is in the Distribution object, and
+        the latter defaults to false for commands that don't define it.)"""
+        return file_util.copy_file(
+            infile,
+            outfile,
+            preserve_mode,
+            preserve_times,
+            not self.force,
+            link,
+            dry_run=self.dry_run,
+        )
+
+    def copy_tree(
+        self,
+        infile,
+        outfile,
+        preserve_mode=True,
+        preserve_times=True,
+        preserve_symlinks=False,
+        level=1,
+    ):
+        """Copy an entire directory tree respecting verbose, dry-run,
+        and force flags.
+        """
+        return dir_util.copy_tree(
+            infile,
+            outfile,
+            preserve_mode,
+            preserve_times,
+            preserve_symlinks,
+            not self.force,
+            dry_run=self.dry_run,
+        )
+
+    def move_file(self, src, dst, level=1):
+        """Move a file respecting dry-run flag."""
+        return file_util.move_file(src, dst, dry_run=self.dry_run)
+
+    def spawn(self, cmd, search_path=True, level=1):
+        """Spawn an external command respecting dry-run flag."""
+        from distutils.spawn import spawn
+
+        spawn(cmd, search_path, dry_run=self.dry_run)
+
+    def make_archive(
+        self, base_name, format, root_dir=None, base_dir=None, owner=None, group=None
+    ):
+        return archive_util.make_archive(
+            base_name,
+            format,
+            root_dir,
+            base_dir,
+            dry_run=self.dry_run,
+            owner=owner,
+            group=group,
+        )
+
+    def make_file(
+        self, infiles, outfile, func, args, exec_msg=None, skip_msg=None, level=1
+    ):
+        """Special case of 'execute()' for operations that process one or
+        more input files and generate one output file.  Works just like
+        'execute()', except the operation is skipped and a different
+        message printed if 'outfile' already exists and is newer than all
+        files listed in 'infiles'.  If the command defined 'self.force',
+        and it is true, then the command is unconditionally run -- does no
+        timestamp checks.
+        """
+        if skip_msg is None:
+            skip_msg = f"skipping {outfile} (inputs unchanged)"
+
+        # Allow 'infiles' to be a single string
+        if isinstance(infiles, str):
+            infiles = (infiles,)
+        elif not isinstance(infiles, (list, tuple)):
+            raise TypeError("'infiles' must be a string, or a list or tuple of strings")
+
+        if exec_msg is None:
+            exec_msg = "generating {} from {}".format(outfile, ', '.join(infiles))
+
+        # If 'outfile' must be regenerated (either because it doesn't
+        # exist, is out-of-date, or the 'force' flag is true) then
+        # perform the action that presumably regenerates it
+        if self.force or _modified.newer_group(infiles, outfile):
+            self.execute(func, args, exec_msg, level)
+        # Otherwise, print the "skip" message
+        else:
+            log.debug(skip_msg)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e8fbe60c29dcd1d3f1955c0df3420117792aba7
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__init__.py
@@ -0,0 +1,25 @@
+"""distutils.command
+
+Package containing implementation of all the standard Distutils
+commands."""
+
+__all__ = [
+    'build',
+    'build_py',
+    'build_ext',
+    'build_clib',
+    'build_scripts',
+    'clean',
+    'install',
+    'install_lib',
+    'install_headers',
+    'install_scripts',
+    'install_data',
+    'sdist',
+    'register',
+    'bdist',
+    'bdist_dumb',
+    'bdist_rpm',
+    'check',
+    'upload',
+]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef76eba6072a3f84a2cb1891b817d4b58b7e0072
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/_framework_compat.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/_framework_compat.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a4309cfae059dbfed67365a0a4ca310e35a65668
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/_framework_compat.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93543b471dd5f9b5574fcd009116fa1bb9a1d9cd
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist_dumb.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist_dumb.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b61250d4ac85a1549cc8b25685091458079e82f6
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist_dumb.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist_rpm.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist_rpm.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..969e980301d7960e13bfe3271bfff4a3dffa8837
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/bdist_rpm.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a24e9304d8257b5ca901acc16489b071a30a3d90
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_clib.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_clib.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..69d9317329a34c8ea762c01a407f337fb9ad5384
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_clib.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_ext.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_ext.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb6e45b9a032450893c165dce06e05a952e0ea0e
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_ext.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_py.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_py.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6202cf93cf97b3bc838d9c40c48ebeed3f2f511f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_py.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_scripts.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_scripts.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8b8b1ab3480fd8cb76b869f63473c9ea414b18ef
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/build_scripts.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/check.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/check.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3d89e6a7013aa25a2e5fc8adb1c308656014d3de
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/check.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/clean.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/clean.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35db8046e8647206ec41e6468b78643f9d632b65
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/clean.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/config.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/config.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..272a4b72a930a7313bee120fd499a440a57fefc1
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/config.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8cc644601b936182f01b1c1228fb62aee145b813
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_data.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_data.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c838c2ffbdb28d45150c5d8ea9e6ac5cf0c500f9
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_data.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_egg_info.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_egg_info.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0a0a85dcb71a46b2f5bff2feafad2467783bdded
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_egg_info.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_headers.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_headers.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3540cee2ed69dead83cffe0a0634f94228188385
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_headers.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_lib.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_lib.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8f6d1734a65c77c26367957f6af90d365bda3b09
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_lib.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_scripts.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_scripts.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..badc00fb6d8f149fc4ac6d9e6be5183e9ceb7d99
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/install_scripts.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/register.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/register.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f490a7f40b0cdd0891c1b24bb5c709f4142fbe4d
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/register.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/sdist.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/sdist.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7eaaef90eb46c6eeb58c5b7d008c1002855ba207
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/sdist.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/upload.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/upload.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5e995adae2bda56d00e2b28dbdb079ee29c6f937
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/__pycache__/upload.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/_framework_compat.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/_framework_compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..00d34bc7d8c85126dbae716ebfa48e35b0833222
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/_framework_compat.py
@@ -0,0 +1,54 @@
+"""
+Backward compatibility for homebrew builds on macOS.
+"""
+
+import functools
+import os
+import subprocess
+import sys
+import sysconfig
+
+
+@functools.lru_cache
+def enabled():
+    """
+    Only enabled for Python 3.9 framework homebrew builds
+    except ensurepip and venv.
+    """
+    PY39 = (3, 9) < sys.version_info < (3, 10)
+    framework = sys.platform == 'darwin' and sys._framework
+    homebrew = "Cellar" in sysconfig.get_config_var('projectbase')
+    venv = sys.prefix != sys.base_prefix
+    ensurepip = os.environ.get("ENSUREPIP_OPTIONS")
+    return PY39 and framework and homebrew and not venv and not ensurepip
+
+
+schemes = dict(
+    osx_framework_library=dict(
+        stdlib='{installed_base}/{platlibdir}/python{py_version_short}',
+        platstdlib='{platbase}/{platlibdir}/python{py_version_short}',
+        purelib='{homebrew_prefix}/lib/python{py_version_short}/site-packages',
+        platlib='{homebrew_prefix}/{platlibdir}/python{py_version_short}/site-packages',
+        include='{installed_base}/include/python{py_version_short}{abiflags}',
+        platinclude='{installed_platbase}/include/python{py_version_short}{abiflags}',
+        scripts='{homebrew_prefix}/bin',
+        data='{homebrew_prefix}',
+    )
+)
+
+
+@functools.lru_cache
+def vars():
+    if not enabled():
+        return {}
+    homebrew_prefix = subprocess.check_output(['brew', '--prefix'], text=True).strip()
+    return locals()
+
+
+def scheme(name):
+    """
+    Override the selected scheme for posix_prefix.
+    """
+    if not enabled() or not name.endswith('_prefix'):
+        return name
+    return 'osx_framework_library'
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist.py
new file mode 100644
index 0000000000000000000000000000000000000000..1738f4e56b8110e811ac29291b0e4a807eb4dcfe
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist.py
@@ -0,0 +1,154 @@
+"""distutils.command.bdist
+
+Implements the Distutils 'bdist' command (create a built [binary]
+distribution)."""
+
+import os
+import warnings
+
+from ..core import Command
+from ..errors import DistutilsOptionError, DistutilsPlatformError
+from ..util import get_platform
+
+
+def show_formats():
+    """Print list of available formats (arguments to "--format" option)."""
+    from ..fancy_getopt import FancyGetopt
+
+    formats = []
+    for format in bdist.format_commands:
+        formats.append(("formats=" + format, None, bdist.format_commands[format][1]))
+    pretty_printer = FancyGetopt(formats)
+    pretty_printer.print_help("List of available distribution formats:")
+
+
+class ListCompat(dict):
+    # adapter to allow for Setuptools compatibility in format_commands
+    def append(self, item):
+        warnings.warn(
+            """format_commands is now a dict. append is deprecated.""",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+
+class bdist(Command):
+    description = "create a built (binary) distribution"
+
+    user_options = [
+        ('bdist-base=', 'b', "temporary directory for creating built distributions"),
+        (
+            'plat-name=',
+            'p',
+            "platform name to embed in generated filenames "
+            f"[default: {get_platform()}]",
+        ),
+        ('formats=', None, "formats for distribution (comma-separated list)"),
+        (
+            'dist-dir=',
+            'd',
+            "directory to put final built distributions in [default: dist]",
+        ),
+        ('skip-build', None, "skip rebuilding everything (for testing/debugging)"),
+        (
+            'owner=',
+            'u',
+            "Owner name used when creating a tar file [default: current user]",
+        ),
+        (
+            'group=',
+            'g',
+            "Group name used when creating a tar file [default: current group]",
+        ),
+    ]
+
+    boolean_options = ['skip-build']
+
+    help_options = [
+        ('help-formats', None, "lists available distribution formats", show_formats),
+    ]
+
+    # The following commands do not take a format option from bdist
+    no_format_option = ('bdist_rpm',)
+
+    # This won't do in reality: will need to distinguish RPM-ish Linux,
+    # Debian-ish Linux, Solaris, FreeBSD, ..., Windows, Mac OS.
+    default_format = {'posix': 'gztar', 'nt': 'zip'}
+
+    # Define commands in preferred order for the --help-formats option
+    format_commands = ListCompat({
+        'rpm': ('bdist_rpm', "RPM distribution"),
+        'gztar': ('bdist_dumb', "gzip'ed tar file"),
+        'bztar': ('bdist_dumb', "bzip2'ed tar file"),
+        'xztar': ('bdist_dumb', "xz'ed tar file"),
+        'ztar': ('bdist_dumb', "compressed tar file"),
+        'tar': ('bdist_dumb', "tar file"),
+        'zip': ('bdist_dumb', "ZIP file"),
+    })
+
+    # for compatibility until consumers only reference format_commands
+    format_command = format_commands
+
+    def initialize_options(self):
+        self.bdist_base = None
+        self.plat_name = None
+        self.formats = None
+        self.dist_dir = None
+        self.skip_build = False
+        self.group = None
+        self.owner = None
+
+    def finalize_options(self):
+        # have to finalize 'plat_name' before 'bdist_base'
+        if self.plat_name is None:
+            if self.skip_build:
+                self.plat_name = get_platform()
+            else:
+                self.plat_name = self.get_finalized_command('build').plat_name
+
+        # 'bdist_base' -- parent of per-built-distribution-format
+        # temporary directories (eg. we'll probably have
+        # "build/bdist./dumb", "build/bdist./rpm", etc.)
+        if self.bdist_base is None:
+            build_base = self.get_finalized_command('build').build_base
+            self.bdist_base = os.path.join(build_base, 'bdist.' + self.plat_name)
+
+        self.ensure_string_list('formats')
+        if self.formats is None:
+            try:
+                self.formats = [self.default_format[os.name]]
+            except KeyError:
+                raise DistutilsPlatformError(
+                    "don't know how to create built distributions "
+                    f"on platform {os.name}"
+                )
+
+        if self.dist_dir is None:
+            self.dist_dir = "dist"
+
+    def run(self):
+        # Figure out which sub-commands we need to run.
+        commands = []
+        for format in self.formats:
+            try:
+                commands.append(self.format_commands[format][0])
+            except KeyError:
+                raise DistutilsOptionError(f"invalid format '{format}'")
+
+        # Reinitialize and run each command.
+        for i in range(len(self.formats)):
+            cmd_name = commands[i]
+            sub_cmd = self.reinitialize_command(cmd_name)
+            if cmd_name not in self.no_format_option:
+                sub_cmd.format = self.formats[i]
+
+            # passing the owner and group names for tar archiving
+            if cmd_name == 'bdist_dumb':
+                sub_cmd.owner = self.owner
+                sub_cmd.group = self.group
+
+            # If we're going to need to run this command again, tell it to
+            # keep its temporary files around so subsequent runs go faster.
+            if cmd_name in commands[i + 1 :]:
+                sub_cmd.keep_temp = True
+            self.run_command(cmd_name)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist_dumb.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist_dumb.py
new file mode 100644
index 0000000000000000000000000000000000000000..67b0c8cce9fae2012c8f990118da83be79b0699d
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist_dumb.py
@@ -0,0 +1,140 @@
+"""distutils.command.bdist_dumb
+
+Implements the Distutils 'bdist_dumb' command (create a "dumb" built
+distribution -- i.e., just an archive to be unpacked under $prefix or
+$exec_prefix)."""
+
+import os
+from distutils._log import log
+
+from ..core import Command
+from ..dir_util import ensure_relative, remove_tree
+from ..errors import DistutilsPlatformError
+from ..sysconfig import get_python_version
+from ..util import get_platform
+
+
+class bdist_dumb(Command):
+    description = "create a \"dumb\" built distribution"
+
+    user_options = [
+        ('bdist-dir=', 'd', "temporary directory for creating the distribution"),
+        (
+            'plat-name=',
+            'p',
+            "platform name to embed in generated filenames "
+            f"[default: {get_platform()}]",
+        ),
+        (
+            'format=',
+            'f',
+            "archive format to create (tar, gztar, bztar, xztar, ztar, zip)",
+        ),
+        (
+            'keep-temp',
+            'k',
+            "keep the pseudo-installation tree around after creating the distribution archive",
+        ),
+        ('dist-dir=', 'd', "directory to put final built distributions in"),
+        ('skip-build', None, "skip rebuilding everything (for testing/debugging)"),
+        (
+            'relative',
+            None,
+            "build the archive using relative paths [default: false]",
+        ),
+        (
+            'owner=',
+            'u',
+            "Owner name used when creating a tar file [default: current user]",
+        ),
+        (
+            'group=',
+            'g',
+            "Group name used when creating a tar file [default: current group]",
+        ),
+    ]
+
+    boolean_options = ['keep-temp', 'skip-build', 'relative']
+
+    default_format = {'posix': 'gztar', 'nt': 'zip'}
+
+    def initialize_options(self):
+        self.bdist_dir = None
+        self.plat_name = None
+        self.format = None
+        self.keep_temp = False
+        self.dist_dir = None
+        self.skip_build = None
+        self.relative = False
+        self.owner = None
+        self.group = None
+
+    def finalize_options(self):
+        if self.bdist_dir is None:
+            bdist_base = self.get_finalized_command('bdist').bdist_base
+            self.bdist_dir = os.path.join(bdist_base, 'dumb')
+
+        if self.format is None:
+            try:
+                self.format = self.default_format[os.name]
+            except KeyError:
+                raise DistutilsPlatformError(
+                    "don't know how to create dumb built distributions "
+                    f"on platform {os.name}"
+                )
+
+        self.set_undefined_options(
+            'bdist',
+            ('dist_dir', 'dist_dir'),
+            ('plat_name', 'plat_name'),
+            ('skip_build', 'skip_build'),
+        )
+
+    def run(self):
+        if not self.skip_build:
+            self.run_command('build')
+
+        install = self.reinitialize_command('install', reinit_subcommands=True)
+        install.root = self.bdist_dir
+        install.skip_build = self.skip_build
+        install.warn_dir = False
+
+        log.info("installing to %s", self.bdist_dir)
+        self.run_command('install')
+
+        # And make an archive relative to the root of the
+        # pseudo-installation tree.
+        archive_basename = f"{self.distribution.get_fullname()}.{self.plat_name}"
+
+        pseudoinstall_root = os.path.join(self.dist_dir, archive_basename)
+        if not self.relative:
+            archive_root = self.bdist_dir
+        else:
+            if self.distribution.has_ext_modules() and (
+                install.install_base != install.install_platbase
+            ):
+                raise DistutilsPlatformError(
+                    "can't make a dumb built distribution where "
+                    f"base and platbase are different ({install.install_base!r}, {install.install_platbase!r})"
+                )
+            else:
+                archive_root = os.path.join(
+                    self.bdist_dir, ensure_relative(install.install_base)
+                )
+
+        # Make the archive
+        filename = self.make_archive(
+            pseudoinstall_root,
+            self.format,
+            root_dir=archive_root,
+            owner=self.owner,
+            group=self.group,
+        )
+        if self.distribution.has_ext_modules():
+            pyversion = get_python_version()
+        else:
+            pyversion = 'any'
+        self.distribution.dist_files.append(('bdist_dumb', pyversion, filename))
+
+        if not self.keep_temp:
+            remove_tree(self.bdist_dir, dry_run=self.dry_run)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist_rpm.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist_rpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..d443eb09b53761e5741e568a563eca5b94e09ef6
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/bdist_rpm.py
@@ -0,0 +1,597 @@
+"""distutils.command.bdist_rpm
+
+Implements the Distutils 'bdist_rpm' command (create RPM source and binary
+distributions)."""
+
+import os
+import subprocess
+import sys
+from distutils._log import log
+
+from ..core import Command
+from ..debug import DEBUG
+from ..errors import (
+    DistutilsExecError,
+    DistutilsFileError,
+    DistutilsOptionError,
+    DistutilsPlatformError,
+)
+from ..file_util import write_file
+from ..sysconfig import get_python_version
+
+
+class bdist_rpm(Command):
+    description = "create an RPM distribution"
+
+    user_options = [
+        ('bdist-base=', None, "base directory for creating built distributions"),
+        (
+            'rpm-base=',
+            None,
+            "base directory for creating RPMs (defaults to \"rpm\" under "
+            "--bdist-base; must be specified for RPM 2)",
+        ),
+        (
+            'dist-dir=',
+            'd',
+            "directory to put final RPM files in (and .spec files if --spec-only)",
+        ),
+        (
+            'python=',
+            None,
+            "path to Python interpreter to hard-code in the .spec file "
+            "[default: \"python\"]",
+        ),
+        (
+            'fix-python',
+            None,
+            "hard-code the exact path to the current Python interpreter in "
+            "the .spec file",
+        ),
+        ('spec-only', None, "only regenerate spec file"),
+        ('source-only', None, "only generate source RPM"),
+        ('binary-only', None, "only generate binary RPM"),
+        ('use-bzip2', None, "use bzip2 instead of gzip to create source distribution"),
+        # More meta-data: too RPM-specific to put in the setup script,
+        # but needs to go in the .spec file -- so we make these options
+        # to "bdist_rpm".  The idea is that packagers would put this
+        # info in setup.cfg, although they are of course free to
+        # supply it on the command line.
+        (
+            'distribution-name=',
+            None,
+            "name of the (Linux) distribution to which this "
+            "RPM applies (*not* the name of the module distribution!)",
+        ),
+        ('group=', None, "package classification [default: \"Development/Libraries\"]"),
+        ('release=', None, "RPM release number"),
+        ('serial=', None, "RPM serial number"),
+        (
+            'vendor=',
+            None,
+            "RPM \"vendor\" (eg. \"Joe Blow \") "
+            "[default: maintainer or author from setup script]",
+        ),
+        (
+            'packager=',
+            None,
+            "RPM packager (eg. \"Jane Doe \") [default: vendor]",
+        ),
+        ('doc-files=', None, "list of documentation files (space or comma-separated)"),
+        ('changelog=', None, "RPM changelog"),
+        ('icon=', None, "name of icon file"),
+        ('provides=', None, "capabilities provided by this package"),
+        ('requires=', None, "capabilities required by this package"),
+        ('conflicts=', None, "capabilities which conflict with this package"),
+        ('build-requires=', None, "capabilities required to build this package"),
+        ('obsoletes=', None, "capabilities made obsolete by this package"),
+        ('no-autoreq', None, "do not automatically calculate dependencies"),
+        # Actions to take when building RPM
+        ('keep-temp', 'k', "don't clean up RPM build directory"),
+        ('no-keep-temp', None, "clean up RPM build directory [default]"),
+        (
+            'use-rpm-opt-flags',
+            None,
+            "compile with RPM_OPT_FLAGS when building from source RPM",
+        ),
+        ('no-rpm-opt-flags', None, "do not pass any RPM CFLAGS to compiler"),
+        ('rpm3-mode', None, "RPM 3 compatibility mode (default)"),
+        ('rpm2-mode', None, "RPM 2 compatibility mode"),
+        # Add the hooks necessary for specifying custom scripts
+        ('prep-script=', None, "Specify a script for the PREP phase of RPM building"),
+        ('build-script=', None, "Specify a script for the BUILD phase of RPM building"),
+        (
+            'pre-install=',
+            None,
+            "Specify a script for the pre-INSTALL phase of RPM building",
+        ),
+        (
+            'install-script=',
+            None,
+            "Specify a script for the INSTALL phase of RPM building",
+        ),
+        (
+            'post-install=',
+            None,
+            "Specify a script for the post-INSTALL phase of RPM building",
+        ),
+        (
+            'pre-uninstall=',
+            None,
+            "Specify a script for the pre-UNINSTALL phase of RPM building",
+        ),
+        (
+            'post-uninstall=',
+            None,
+            "Specify a script for the post-UNINSTALL phase of RPM building",
+        ),
+        ('clean-script=', None, "Specify a script for the CLEAN phase of RPM building"),
+        (
+            'verify-script=',
+            None,
+            "Specify a script for the VERIFY phase of the RPM build",
+        ),
+        # Allow a packager to explicitly force an architecture
+        ('force-arch=', None, "Force an architecture onto the RPM build process"),
+        ('quiet', 'q', "Run the INSTALL phase of RPM building in quiet mode"),
+    ]
+
+    boolean_options = [
+        'keep-temp',
+        'use-rpm-opt-flags',
+        'rpm3-mode',
+        'no-autoreq',
+        'quiet',
+    ]
+
+    negative_opt = {
+        'no-keep-temp': 'keep-temp',
+        'no-rpm-opt-flags': 'use-rpm-opt-flags',
+        'rpm2-mode': 'rpm3-mode',
+    }
+
+    def initialize_options(self):
+        self.bdist_base = None
+        self.rpm_base = None
+        self.dist_dir = None
+        self.python = None
+        self.fix_python = None
+        self.spec_only = None
+        self.binary_only = None
+        self.source_only = None
+        self.use_bzip2 = None
+
+        self.distribution_name = None
+        self.group = None
+        self.release = None
+        self.serial = None
+        self.vendor = None
+        self.packager = None
+        self.doc_files = None
+        self.changelog = None
+        self.icon = None
+
+        self.prep_script = None
+        self.build_script = None
+        self.install_script = None
+        self.clean_script = None
+        self.verify_script = None
+        self.pre_install = None
+        self.post_install = None
+        self.pre_uninstall = None
+        self.post_uninstall = None
+        self.prep = None
+        self.provides = None
+        self.requires = None
+        self.conflicts = None
+        self.build_requires = None
+        self.obsoletes = None
+
+        self.keep_temp = False
+        self.use_rpm_opt_flags = True
+        self.rpm3_mode = True
+        self.no_autoreq = False
+
+        self.force_arch = None
+        self.quiet = False
+
+    def finalize_options(self):
+        self.set_undefined_options('bdist', ('bdist_base', 'bdist_base'))
+        if self.rpm_base is None:
+            if not self.rpm3_mode:
+                raise DistutilsOptionError("you must specify --rpm-base in RPM 2 mode")
+            self.rpm_base = os.path.join(self.bdist_base, "rpm")
+
+        if self.python is None:
+            if self.fix_python:
+                self.python = sys.executable
+            else:
+                self.python = "python3"
+        elif self.fix_python:
+            raise DistutilsOptionError(
+                "--python and --fix-python are mutually exclusive options"
+            )
+
+        if os.name != 'posix':
+            raise DistutilsPlatformError(
+                f"don't know how to create RPM distributions on platform {os.name}"
+            )
+        if self.binary_only and self.source_only:
+            raise DistutilsOptionError(
+                "cannot supply both '--source-only' and '--binary-only'"
+            )
+
+        # don't pass CFLAGS to pure python distributions
+        if not self.distribution.has_ext_modules():
+            self.use_rpm_opt_flags = False
+
+        self.set_undefined_options('bdist', ('dist_dir', 'dist_dir'))
+        self.finalize_package_data()
+
+    def finalize_package_data(self):
+        self.ensure_string('group', "Development/Libraries")
+        self.ensure_string(
+            'vendor',
+            f"{self.distribution.get_contact()} <{self.distribution.get_contact_email()}>",
+        )
+        self.ensure_string('packager')
+        self.ensure_string_list('doc_files')
+        if isinstance(self.doc_files, list):
+            for readme in ('README', 'README.txt'):
+                if os.path.exists(readme) and readme not in self.doc_files:
+                    self.doc_files.append(readme)
+
+        self.ensure_string('release', "1")
+        self.ensure_string('serial')  # should it be an int?
+
+        self.ensure_string('distribution_name')
+
+        self.ensure_string('changelog')
+        # Format changelog correctly
+        self.changelog = self._format_changelog(self.changelog)
+
+        self.ensure_filename('icon')
+
+        self.ensure_filename('prep_script')
+        self.ensure_filename('build_script')
+        self.ensure_filename('install_script')
+        self.ensure_filename('clean_script')
+        self.ensure_filename('verify_script')
+        self.ensure_filename('pre_install')
+        self.ensure_filename('post_install')
+        self.ensure_filename('pre_uninstall')
+        self.ensure_filename('post_uninstall')
+
+        # XXX don't forget we punted on summaries and descriptions -- they
+        # should be handled here eventually!
+
+        # Now *this* is some meta-data that belongs in the setup script...
+        self.ensure_string_list('provides')
+        self.ensure_string_list('requires')
+        self.ensure_string_list('conflicts')
+        self.ensure_string_list('build_requires')
+        self.ensure_string_list('obsoletes')
+
+        self.ensure_string('force_arch')
+
+    def run(self):  # noqa: C901
+        if DEBUG:
+            print("before _get_package_data():")
+            print("vendor =", self.vendor)
+            print("packager =", self.packager)
+            print("doc_files =", self.doc_files)
+            print("changelog =", self.changelog)
+
+        # make directories
+        if self.spec_only:
+            spec_dir = self.dist_dir
+            self.mkpath(spec_dir)
+        else:
+            rpm_dir = {}
+            for d in ('SOURCES', 'SPECS', 'BUILD', 'RPMS', 'SRPMS'):
+                rpm_dir[d] = os.path.join(self.rpm_base, d)
+                self.mkpath(rpm_dir[d])
+            spec_dir = rpm_dir['SPECS']
+
+        # Spec file goes into 'dist_dir' if '--spec-only specified',
+        # build/rpm. otherwise.
+        spec_path = os.path.join(spec_dir, f"{self.distribution.get_name()}.spec")
+        self.execute(
+            write_file, (spec_path, self._make_spec_file()), f"writing '{spec_path}'"
+        )
+
+        if self.spec_only:  # stop if requested
+            return
+
+        # Make a source distribution and copy to SOURCES directory with
+        # optional icon.
+        saved_dist_files = self.distribution.dist_files[:]
+        sdist = self.reinitialize_command('sdist')
+        if self.use_bzip2:
+            sdist.formats = ['bztar']
+        else:
+            sdist.formats = ['gztar']
+        self.run_command('sdist')
+        self.distribution.dist_files = saved_dist_files
+
+        source = sdist.get_archive_files()[0]
+        source_dir = rpm_dir['SOURCES']
+        self.copy_file(source, source_dir)
+
+        if self.icon:
+            if os.path.exists(self.icon):
+                self.copy_file(self.icon, source_dir)
+            else:
+                raise DistutilsFileError(f"icon file '{self.icon}' does not exist")
+
+        # build package
+        log.info("building RPMs")
+        rpm_cmd = ['rpmbuild']
+
+        if self.source_only:  # what kind of RPMs?
+            rpm_cmd.append('-bs')
+        elif self.binary_only:
+            rpm_cmd.append('-bb')
+        else:
+            rpm_cmd.append('-ba')
+        rpm_cmd.extend(['--define', f'__python {self.python}'])
+        if self.rpm3_mode:
+            rpm_cmd.extend(['--define', f'_topdir {os.path.abspath(self.rpm_base)}'])
+        if not self.keep_temp:
+            rpm_cmd.append('--clean')
+
+        if self.quiet:
+            rpm_cmd.append('--quiet')
+
+        rpm_cmd.append(spec_path)
+        # Determine the binary rpm names that should be built out of this spec
+        # file
+        # Note that some of these may not be really built (if the file
+        # list is empty)
+        nvr_string = "%{name}-%{version}-%{release}"
+        src_rpm = nvr_string + ".src.rpm"
+        non_src_rpm = "%{arch}/" + nvr_string + ".%{arch}.rpm"
+        q_cmd = rf"rpm -q --qf '{src_rpm} {non_src_rpm}\n' --specfile '{spec_path}'"
+
+        out = os.popen(q_cmd)
+        try:
+            binary_rpms = []
+            source_rpm = None
+            while True:
+                line = out.readline()
+                if not line:
+                    break
+                ell = line.strip().split()
+                assert len(ell) == 2
+                binary_rpms.append(ell[1])
+                # The source rpm is named after the first entry in the spec file
+                if source_rpm is None:
+                    source_rpm = ell[0]
+
+            status = out.close()
+            if status:
+                raise DistutilsExecError(f"Failed to execute: {q_cmd!r}")
+
+        finally:
+            out.close()
+
+        self.spawn(rpm_cmd)
+
+        if not self.dry_run:
+            if self.distribution.has_ext_modules():
+                pyversion = get_python_version()
+            else:
+                pyversion = 'any'
+
+            if not self.binary_only:
+                srpm = os.path.join(rpm_dir['SRPMS'], source_rpm)
+                assert os.path.exists(srpm)
+                self.move_file(srpm, self.dist_dir)
+                filename = os.path.join(self.dist_dir, source_rpm)
+                self.distribution.dist_files.append(('bdist_rpm', pyversion, filename))
+
+            if not self.source_only:
+                for rpm in binary_rpms:
+                    rpm = os.path.join(rpm_dir['RPMS'], rpm)
+                    if os.path.exists(rpm):
+                        self.move_file(rpm, self.dist_dir)
+                        filename = os.path.join(self.dist_dir, os.path.basename(rpm))
+                        self.distribution.dist_files.append((
+                            'bdist_rpm',
+                            pyversion,
+                            filename,
+                        ))
+
+    def _dist_path(self, path):
+        return os.path.join(self.dist_dir, os.path.basename(path))
+
+    def _make_spec_file(self):  # noqa: C901
+        """Generate the text of an RPM spec file and return it as a
+        list of strings (one per line).
+        """
+        # definitions and headers
+        spec_file = [
+            '%define name ' + self.distribution.get_name(),
+            '%define version ' + self.distribution.get_version().replace('-', '_'),
+            '%define unmangled_version ' + self.distribution.get_version(),
+            '%define release ' + self.release.replace('-', '_'),
+            '',
+            'Summary: ' + (self.distribution.get_description() or "UNKNOWN"),
+        ]
+
+        # Workaround for #14443 which affects some RPM based systems such as
+        # RHEL6 (and probably derivatives)
+        vendor_hook = subprocess.getoutput('rpm --eval %{__os_install_post}')
+        # Generate a potential replacement value for __os_install_post (whilst
+        # normalizing the whitespace to simplify the test for whether the
+        # invocation of brp-python-bytecompile passes in __python):
+        vendor_hook = '\n'.join([
+            f'  {line.strip()} \\' for line in vendor_hook.splitlines()
+        ])
+        problem = "brp-python-bytecompile \\\n"
+        fixed = "brp-python-bytecompile %{__python} \\\n"
+        fixed_hook = vendor_hook.replace(problem, fixed)
+        if fixed_hook != vendor_hook:
+            spec_file.append('# Workaround for https://bugs.python.org/issue14443')
+            spec_file.append('%define __os_install_post ' + fixed_hook + '\n')
+
+        # put locale summaries into spec file
+        # XXX not supported for now (hard to put a dictionary
+        # in a config file -- arg!)
+        # for locale in self.summaries.keys():
+        #    spec_file.append('Summary(%s): %s' % (locale,
+        #                                          self.summaries[locale]))
+
+        spec_file.extend([
+            'Name: %{name}',
+            'Version: %{version}',
+            'Release: %{release}',
+        ])
+
+        # XXX yuck! this filename is available from the "sdist" command,
+        # but only after it has run: and we create the spec file before
+        # running "sdist", in case of --spec-only.
+        if self.use_bzip2:
+            spec_file.append('Source0: %{name}-%{unmangled_version}.tar.bz2')
+        else:
+            spec_file.append('Source0: %{name}-%{unmangled_version}.tar.gz')
+
+        spec_file.extend([
+            'License: ' + (self.distribution.get_license() or "UNKNOWN"),
+            'Group: ' + self.group,
+            'BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot',
+            'Prefix: %{_prefix}',
+        ])
+
+        if not self.force_arch:
+            # noarch if no extension modules
+            if not self.distribution.has_ext_modules():
+                spec_file.append('BuildArch: noarch')
+        else:
+            spec_file.append(f'BuildArch: {self.force_arch}')
+
+        for field in (
+            'Vendor',
+            'Packager',
+            'Provides',
+            'Requires',
+            'Conflicts',
+            'Obsoletes',
+        ):
+            val = getattr(self, field.lower())
+            if isinstance(val, list):
+                spec_file.append('{}: {}'.format(field, ' '.join(val)))
+            elif val is not None:
+                spec_file.append(f'{field}: {val}')
+
+        if self.distribution.get_url():
+            spec_file.append('Url: ' + self.distribution.get_url())
+
+        if self.distribution_name:
+            spec_file.append('Distribution: ' + self.distribution_name)
+
+        if self.build_requires:
+            spec_file.append('BuildRequires: ' + ' '.join(self.build_requires))
+
+        if self.icon:
+            spec_file.append('Icon: ' + os.path.basename(self.icon))
+
+        if self.no_autoreq:
+            spec_file.append('AutoReq: 0')
+
+        spec_file.extend([
+            '',
+            '%description',
+            self.distribution.get_long_description() or "",
+        ])
+
+        # put locale descriptions into spec file
+        # XXX again, suppressed because config file syntax doesn't
+        # easily support this ;-(
+        # for locale in self.descriptions.keys():
+        #    spec_file.extend([
+        #        '',
+        #        '%description -l ' + locale,
+        #        self.descriptions[locale],
+        #        ])
+
+        # rpm scripts
+        # figure out default build script
+        def_setup_call = f"{self.python} {os.path.basename(sys.argv[0])}"
+        def_build = f"{def_setup_call} build"
+        if self.use_rpm_opt_flags:
+            def_build = 'env CFLAGS="$RPM_OPT_FLAGS" ' + def_build
+
+        # insert contents of files
+
+        # XXX this is kind of misleading: user-supplied options are files
+        # that we open and interpolate into the spec file, but the defaults
+        # are just text that we drop in as-is.  Hmmm.
+
+        install_cmd = f'{def_setup_call} install -O1 --root=$RPM_BUILD_ROOT --record=INSTALLED_FILES'
+
+        script_options = [
+            ('prep', 'prep_script', "%setup -n %{name}-%{unmangled_version}"),
+            ('build', 'build_script', def_build),
+            ('install', 'install_script', install_cmd),
+            ('clean', 'clean_script', "rm -rf $RPM_BUILD_ROOT"),
+            ('verifyscript', 'verify_script', None),
+            ('pre', 'pre_install', None),
+            ('post', 'post_install', None),
+            ('preun', 'pre_uninstall', None),
+            ('postun', 'post_uninstall', None),
+        ]
+
+        for rpm_opt, attr, default in script_options:
+            # Insert contents of file referred to, if no file is referred to
+            # use 'default' as contents of script
+            val = getattr(self, attr)
+            if val or default:
+                spec_file.extend([
+                    '',
+                    '%' + rpm_opt,
+                ])
+                if val:
+                    with open(val) as f:
+                        spec_file.extend(f.read().split('\n'))
+                else:
+                    spec_file.append(default)
+
+        # files section
+        spec_file.extend([
+            '',
+            '%files -f INSTALLED_FILES',
+            '%defattr(-,root,root)',
+        ])
+
+        if self.doc_files:
+            spec_file.append('%doc ' + ' '.join(self.doc_files))
+
+        if self.changelog:
+            spec_file.extend([
+                '',
+                '%changelog',
+            ])
+            spec_file.extend(self.changelog)
+
+        return spec_file
+
+    def _format_changelog(self, changelog):
+        """Format the changelog correctly and convert it to a list of strings"""
+        if not changelog:
+            return changelog
+        new_changelog = []
+        for line in changelog.strip().split('\n'):
+            line = line.strip()
+            if line[0] == '*':
+                new_changelog.extend(['', line])
+            elif line[0] == '-':
+                new_changelog.append(line)
+            else:
+                new_changelog.append('  ' + line)
+
+        # strip trailing newline inserted by first changelog entry
+        if not new_changelog[0]:
+            del new_changelog[0]
+
+        return new_changelog
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..caf55073af61d82294d8360842f03ac72f8a0140
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build.py
@@ -0,0 +1,156 @@
+"""distutils.command.build
+
+Implements the Distutils 'build' command."""
+
+import os
+import sys
+import sysconfig
+
+from ..core import Command
+from ..errors import DistutilsOptionError
+from ..util import get_platform
+
+
+def show_compilers():
+    from ..ccompiler import show_compilers
+
+    show_compilers()
+
+
+class build(Command):
+    description = "build everything needed to install"
+
+    user_options = [
+        ('build-base=', 'b', "base directory for build library"),
+        ('build-purelib=', None, "build directory for platform-neutral distributions"),
+        ('build-platlib=', None, "build directory for platform-specific distributions"),
+        (
+            'build-lib=',
+            None,
+            "build directory for all distribution (defaults to either build-purelib or build-platlib",
+        ),
+        ('build-scripts=', None, "build directory for scripts"),
+        ('build-temp=', 't', "temporary build directory"),
+        (
+            'plat-name=',
+            'p',
+            f"platform name to build for, if supported [default: {get_platform()}]",
+        ),
+        ('compiler=', 'c', "specify the compiler type"),
+        ('parallel=', 'j', "number of parallel build jobs"),
+        ('debug', 'g', "compile extensions and libraries with debugging information"),
+        ('force', 'f', "forcibly build everything (ignore file timestamps)"),
+        ('executable=', 'e', "specify final destination interpreter path (build.py)"),
+    ]
+
+    boolean_options = ['debug', 'force']
+
+    help_options = [
+        ('help-compiler', None, "list available compilers", show_compilers),
+    ]
+
+    def initialize_options(self):
+        self.build_base = 'build'
+        # these are decided only after 'build_base' has its final value
+        # (unless overridden by the user or client)
+        self.build_purelib = None
+        self.build_platlib = None
+        self.build_lib = None
+        self.build_temp = None
+        self.build_scripts = None
+        self.compiler = None
+        self.plat_name = None
+        self.debug = None
+        self.force = False
+        self.executable = None
+        self.parallel = None
+
+    def finalize_options(self):  # noqa: C901
+        if self.plat_name is None:
+            self.plat_name = get_platform()
+        else:
+            # plat-name only supported for windows (other platforms are
+            # supported via ./configure flags, if at all).  Avoid misleading
+            # other platforms.
+            if os.name != 'nt':
+                raise DistutilsOptionError(
+                    "--plat-name only supported on Windows (try "
+                    "using './configure --help' on your platform)"
+                )
+
+        plat_specifier = f".{self.plat_name}-{sys.implementation.cache_tag}"
+
+        # Python 3.13+ with --disable-gil shouldn't share build directories
+        if sysconfig.get_config_var('Py_GIL_DISABLED'):
+            plat_specifier += 't'
+
+        # Make it so Python 2.x and Python 2.x with --with-pydebug don't
+        # share the same build directories. Doing so confuses the build
+        # process for C modules
+        if hasattr(sys, 'gettotalrefcount'):
+            plat_specifier += '-pydebug'
+
+        # 'build_purelib' and 'build_platlib' just default to 'lib' and
+        # 'lib.' under the base build directory.  We only use one of
+        # them for a given distribution, though --
+        if self.build_purelib is None:
+            self.build_purelib = os.path.join(self.build_base, 'lib')
+        if self.build_platlib is None:
+            self.build_platlib = os.path.join(self.build_base, 'lib' + plat_specifier)
+
+        # 'build_lib' is the actual directory that we will use for this
+        # particular module distribution -- if user didn't supply it, pick
+        # one of 'build_purelib' or 'build_platlib'.
+        if self.build_lib is None:
+            if self.distribution.has_ext_modules():
+                self.build_lib = self.build_platlib
+            else:
+                self.build_lib = self.build_purelib
+
+        # 'build_temp' -- temporary directory for compiler turds,
+        # "build/temp."
+        if self.build_temp is None:
+            self.build_temp = os.path.join(self.build_base, 'temp' + plat_specifier)
+        if self.build_scripts is None:
+            self.build_scripts = os.path.join(
+                self.build_base, 'scripts-%d.%d' % sys.version_info[:2]
+            )
+
+        if self.executable is None and sys.executable:
+            self.executable = os.path.normpath(sys.executable)
+
+        if isinstance(self.parallel, str):
+            try:
+                self.parallel = int(self.parallel)
+            except ValueError:
+                raise DistutilsOptionError("parallel should be an integer")
+
+    def run(self):
+        # Run all relevant sub-commands.  This will be some subset of:
+        #  - build_py      - pure Python modules
+        #  - build_clib    - standalone C libraries
+        #  - build_ext     - Python extensions
+        #  - build_scripts - (Python) scripts
+        for cmd_name in self.get_sub_commands():
+            self.run_command(cmd_name)
+
+    # -- Predicates for the sub-command list ---------------------------
+
+    def has_pure_modules(self):
+        return self.distribution.has_pure_modules()
+
+    def has_c_libraries(self):
+        return self.distribution.has_c_libraries()
+
+    def has_ext_modules(self):
+        return self.distribution.has_ext_modules()
+
+    def has_scripts(self):
+        return self.distribution.has_scripts()
+
+    sub_commands = [
+        ('build_py', has_pure_modules),
+        ('build_clib', has_c_libraries),
+        ('build_ext', has_ext_modules),
+        ('build_scripts', has_scripts),
+    ]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_clib.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_clib.py
new file mode 100644
index 0000000000000000000000000000000000000000..a600d0937377d20f2d3db1dd2ba415560b35f2fb
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_clib.py
@@ -0,0 +1,208 @@
+"""distutils.command.build_clib
+
+Implements the Distutils 'build_clib' command, to build a C/C++ library
+that is included in the module distribution and needed by an extension
+module."""
+
+
+# XXX this module has *lots* of code ripped-off quite transparently from
+# build_ext.py -- not surprisingly really, as the work required to build
+# a static library from a collection of C source files is not really all
+# that different from what's required to build a shared object file from
+# a collection of C source files.  Nevertheless, I haven't done the
+# necessary refactoring to account for the overlap in code between the
+# two modules, mainly because a number of subtle details changed in the
+# cut 'n paste.  Sigh.
+
+import os
+from distutils._log import log
+
+from ..core import Command
+from ..errors import DistutilsSetupError
+from ..sysconfig import customize_compiler
+
+
+def show_compilers():
+    from ..ccompiler import show_compilers
+
+    show_compilers()
+
+
+class build_clib(Command):
+    description = "build C/C++ libraries used by Python extensions"
+
+    user_options = [
+        ('build-clib=', 'b', "directory to build C/C++ libraries to"),
+        ('build-temp=', 't', "directory to put temporary build by-products"),
+        ('debug', 'g', "compile with debugging information"),
+        ('force', 'f', "forcibly build everything (ignore file timestamps)"),
+        ('compiler=', 'c', "specify the compiler type"),
+    ]
+
+    boolean_options = ['debug', 'force']
+
+    help_options = [
+        ('help-compiler', None, "list available compilers", show_compilers),
+    ]
+
+    def initialize_options(self):
+        self.build_clib = None
+        self.build_temp = None
+
+        # List of libraries to build
+        self.libraries = None
+
+        # Compilation options for all libraries
+        self.include_dirs = None
+        self.define = None
+        self.undef = None
+        self.debug = None
+        self.force = False
+        self.compiler = None
+
+    def finalize_options(self):
+        # This might be confusing: both build-clib and build-temp default
+        # to build-temp as defined by the "build" command.  This is because
+        # I think that C libraries are really just temporary build
+        # by-products, at least from the point of view of building Python
+        # extensions -- but I want to keep my options open.
+        self.set_undefined_options(
+            'build',
+            ('build_temp', 'build_clib'),
+            ('build_temp', 'build_temp'),
+            ('compiler', 'compiler'),
+            ('debug', 'debug'),
+            ('force', 'force'),
+        )
+
+        self.libraries = self.distribution.libraries
+        if self.libraries:
+            self.check_library_list(self.libraries)
+
+        if self.include_dirs is None:
+            self.include_dirs = self.distribution.include_dirs or []
+        if isinstance(self.include_dirs, str):
+            self.include_dirs = self.include_dirs.split(os.pathsep)
+
+        # XXX same as for build_ext -- what about 'self.define' and
+        # 'self.undef' ?
+
+    def run(self):
+        if not self.libraries:
+            return
+
+        # Yech -- this is cut 'n pasted from build_ext.py!
+        from ..ccompiler import new_compiler
+
+        self.compiler = new_compiler(
+            compiler=self.compiler, dry_run=self.dry_run, force=self.force
+        )
+        customize_compiler(self.compiler)
+
+        if self.include_dirs is not None:
+            self.compiler.set_include_dirs(self.include_dirs)
+        if self.define is not None:
+            # 'define' option is a list of (name,value) tuples
+            for name, value in self.define:
+                self.compiler.define_macro(name, value)
+        if self.undef is not None:
+            for macro in self.undef:
+                self.compiler.undefine_macro(macro)
+
+        self.build_libraries(self.libraries)
+
+    def check_library_list(self, libraries):
+        """Ensure that the list of libraries is valid.
+
+        `library` is presumably provided as a command option 'libraries'.
+        This method checks that it is a list of 2-tuples, where the tuples
+        are (library_name, build_info_dict).
+
+        Raise DistutilsSetupError if the structure is invalid anywhere;
+        just returns otherwise.
+        """
+        if not isinstance(libraries, list):
+            raise DistutilsSetupError("'libraries' option must be a list of tuples")
+
+        for lib in libraries:
+            if not isinstance(lib, tuple) and len(lib) != 2:
+                raise DistutilsSetupError("each element of 'libraries' must a 2-tuple")
+
+            name, build_info = lib
+
+            if not isinstance(name, str):
+                raise DistutilsSetupError(
+                    "first element of each tuple in 'libraries' "
+                    "must be a string (the library name)"
+                )
+
+            if '/' in name or (os.sep != '/' and os.sep in name):
+                raise DistutilsSetupError(
+                    f"bad library name '{lib[0]}': "
+                    "may not contain directory separators"
+                )
+
+            if not isinstance(build_info, dict):
+                raise DistutilsSetupError(
+                    "second element of each tuple in 'libraries' "
+                    "must be a dictionary (build info)"
+                )
+
+    def get_library_names(self):
+        # Assume the library list is valid -- 'check_library_list()' is
+        # called from 'finalize_options()', so it should be!
+        if not self.libraries:
+            return None
+
+        lib_names = []
+        for lib_name, _build_info in self.libraries:
+            lib_names.append(lib_name)
+        return lib_names
+
+    def get_source_files(self):
+        self.check_library_list(self.libraries)
+        filenames = []
+        for lib_name, build_info in self.libraries:
+            sources = build_info.get('sources')
+            if sources is None or not isinstance(sources, (list, tuple)):
+                raise DistutilsSetupError(
+                    f"in 'libraries' option (library '{lib_name}'), "
+                    "'sources' must be present and must be "
+                    "a list of source filenames"
+                )
+
+            filenames.extend(sources)
+        return filenames
+
+    def build_libraries(self, libraries):
+        for lib_name, build_info in libraries:
+            sources = build_info.get('sources')
+            if sources is None or not isinstance(sources, (list, tuple)):
+                raise DistutilsSetupError(
+                    f"in 'libraries' option (library '{lib_name}'), "
+                    "'sources' must be present and must be "
+                    "a list of source filenames"
+                )
+            sources = list(sources)
+
+            log.info("building '%s' library", lib_name)
+
+            # First, compile the source code to object files in the library
+            # directory.  (This should probably change to putting object
+            # files in a temporary build directory.)
+            macros = build_info.get('macros')
+            include_dirs = build_info.get('include_dirs')
+            objects = self.compiler.compile(
+                sources,
+                output_dir=self.build_temp,
+                macros=macros,
+                include_dirs=include_dirs,
+                debug=self.debug,
+            )
+
+            # Now "link" the object files together into a static library.
+            # (On Unix at least, this isn't really linking -- it just
+            # builds an archive.  Whatever.)
+            self.compiler.create_static_lib(
+                objects, lib_name, output_dir=self.build_clib, debug=self.debug
+            )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_ext.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_ext.py
new file mode 100644
index 0000000000000000000000000000000000000000..18e1601a288daa7fe91a94c5b801a02891b2e4b5
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_ext.py
@@ -0,0 +1,800 @@
+"""distutils.command.build_ext
+
+Implements the Distutils 'build_ext' command, for building extension
+modules (currently limited to C extensions, should accommodate C++
+extensions ASAP)."""
+
+import contextlib
+import os
+import re
+import sys
+from distutils._log import log
+from site import USER_BASE
+
+from .._modified import newer_group
+from ..core import Command
+from ..errors import (
+    CCompilerError,
+    CompileError,
+    DistutilsError,
+    DistutilsOptionError,
+    DistutilsPlatformError,
+    DistutilsSetupError,
+)
+from ..extension import Extension
+from ..sysconfig import customize_compiler, get_config_h_filename, get_python_version
+from ..util import get_platform, is_mingw
+
+# An extension name is just a dot-separated list of Python NAMEs (ie.
+# the same as a fully-qualified module name).
+extension_name_re = re.compile(r'^[a-zA-Z_][a-zA-Z_0-9]*(\.[a-zA-Z_][a-zA-Z_0-9]*)*$')
+
+
+def show_compilers():
+    from ..ccompiler import show_compilers
+
+    show_compilers()
+
+
+class build_ext(Command):
+    description = "build C/C++ extensions (compile/link to build directory)"
+
+    # XXX thoughts on how to deal with complex command-line options like
+    # these, i.e. how to make it so fancy_getopt can suck them off the
+    # command line and make it look like setup.py defined the appropriate
+    # lists of tuples of what-have-you.
+    #   - each command needs a callback to process its command-line options
+    #   - Command.__init__() needs access to its share of the whole
+    #     command line (must ultimately come from
+    #     Distribution.parse_command_line())
+    #   - it then calls the current command class' option-parsing
+    #     callback to deal with weird options like -D, which have to
+    #     parse the option text and churn out some custom data
+    #     structure
+    #   - that data structure (in this case, a list of 2-tuples)
+    #     will then be present in the command object by the time
+    #     we get to finalize_options() (i.e. the constructor
+    #     takes care of both command-line and client options
+    #     in between initialize_options() and finalize_options())
+
+    sep_by = f" (separated by '{os.pathsep}')"
+    user_options = [
+        ('build-lib=', 'b', "directory for compiled extension modules"),
+        ('build-temp=', 't', "directory for temporary files (build by-products)"),
+        (
+            'plat-name=',
+            'p',
+            "platform name to cross-compile for, if supported "
+            f"[default: {get_platform()}]",
+        ),
+        (
+            'inplace',
+            'i',
+            "ignore build-lib and put compiled extensions into the source "
+            "directory alongside your pure Python modules",
+        ),
+        (
+            'include-dirs=',
+            'I',
+            "list of directories to search for header files" + sep_by,
+        ),
+        ('define=', 'D', "C preprocessor macros to define"),
+        ('undef=', 'U', "C preprocessor macros to undefine"),
+        ('libraries=', 'l', "external C libraries to link with"),
+        (
+            'library-dirs=',
+            'L',
+            "directories to search for external C libraries" + sep_by,
+        ),
+        ('rpath=', 'R', "directories to search for shared C libraries at runtime"),
+        ('link-objects=', 'O', "extra explicit link objects to include in the link"),
+        ('debug', 'g', "compile/link with debugging information"),
+        ('force', 'f', "forcibly build everything (ignore file timestamps)"),
+        ('compiler=', 'c', "specify the compiler type"),
+        ('parallel=', 'j', "number of parallel build jobs"),
+        ('swig-cpp', None, "make SWIG create C++ files (default is C)"),
+        ('swig-opts=', None, "list of SWIG command line options"),
+        ('swig=', None, "path to the SWIG executable"),
+        ('user', None, "add user include, library and rpath"),
+    ]
+
+    boolean_options = ['inplace', 'debug', 'force', 'swig-cpp', 'user']
+
+    help_options = [
+        ('help-compiler', None, "list available compilers", show_compilers),
+    ]
+
+    def initialize_options(self):
+        self.extensions = None
+        self.build_lib = None
+        self.plat_name = None
+        self.build_temp = None
+        self.inplace = False
+        self.package = None
+
+        self.include_dirs = None
+        self.define = None
+        self.undef = None
+        self.libraries = None
+        self.library_dirs = None
+        self.rpath = None
+        self.link_objects = None
+        self.debug = None
+        self.force = None
+        self.compiler = None
+        self.swig = None
+        self.swig_cpp = None
+        self.swig_opts = None
+        self.user = None
+        self.parallel = None
+
+    @staticmethod
+    def _python_lib_dir(sysconfig):
+        """
+        Resolve Python's library directory for building extensions
+        that rely on a shared Python library.
+
+        See python/cpython#44264 and python/cpython#48686
+        """
+        if not sysconfig.get_config_var('Py_ENABLE_SHARED'):
+            return
+
+        if sysconfig.python_build:
+            yield '.'
+            return
+
+        if sys.platform == 'zos':
+            # On z/OS, a user is not required to install Python to
+            # a predetermined path, but can use Python portably
+            installed_dir = sysconfig.get_config_var('base')
+            lib_dir = sysconfig.get_config_var('platlibdir')
+            yield os.path.join(installed_dir, lib_dir)
+        else:
+            # building third party extensions
+            yield sysconfig.get_config_var('LIBDIR')
+
+    def finalize_options(self):  # noqa: C901
+        from distutils import sysconfig
+
+        self.set_undefined_options(
+            'build',
+            ('build_lib', 'build_lib'),
+            ('build_temp', 'build_temp'),
+            ('compiler', 'compiler'),
+            ('debug', 'debug'),
+            ('force', 'force'),
+            ('parallel', 'parallel'),
+            ('plat_name', 'plat_name'),
+        )
+
+        if self.package is None:
+            self.package = self.distribution.ext_package
+
+        self.extensions = self.distribution.ext_modules
+
+        # Make sure Python's include directories (for Python.h, pyconfig.h,
+        # etc.) are in the include search path.
+        py_include = sysconfig.get_python_inc()
+        plat_py_include = sysconfig.get_python_inc(plat_specific=True)
+        if self.include_dirs is None:
+            self.include_dirs = self.distribution.include_dirs or []
+        if isinstance(self.include_dirs, str):
+            self.include_dirs = self.include_dirs.split(os.pathsep)
+
+        # If in a virtualenv, add its include directory
+        # Issue 16116
+        if sys.exec_prefix != sys.base_exec_prefix:
+            self.include_dirs.append(os.path.join(sys.exec_prefix, 'include'))
+
+        # Put the Python "system" include dir at the end, so that
+        # any local include dirs take precedence.
+        self.include_dirs.extend(py_include.split(os.path.pathsep))
+        if plat_py_include != py_include:
+            self.include_dirs.extend(plat_py_include.split(os.path.pathsep))
+
+        self.ensure_string_list('libraries')
+        self.ensure_string_list('link_objects')
+
+        # Life is easier if we're not forever checking for None, so
+        # simplify these options to empty lists if unset
+        if self.libraries is None:
+            self.libraries = []
+        if self.library_dirs is None:
+            self.library_dirs = []
+        elif isinstance(self.library_dirs, str):
+            self.library_dirs = self.library_dirs.split(os.pathsep)
+
+        if self.rpath is None:
+            self.rpath = []
+        elif isinstance(self.rpath, str):
+            self.rpath = self.rpath.split(os.pathsep)
+
+        # for extensions under windows use different directories
+        # for Release and Debug builds.
+        # also Python's library directory must be appended to library_dirs
+        if os.name == 'nt' and not is_mingw():
+            # the 'libs' directory is for binary installs - we assume that
+            # must be the *native* platform.  But we don't really support
+            # cross-compiling via a binary install anyway, so we let it go.
+            self.library_dirs.append(os.path.join(sys.exec_prefix, 'libs'))
+            if sys.base_exec_prefix != sys.prefix:  # Issue 16116
+                self.library_dirs.append(os.path.join(sys.base_exec_prefix, 'libs'))
+            if self.debug:
+                self.build_temp = os.path.join(self.build_temp, "Debug")
+            else:
+                self.build_temp = os.path.join(self.build_temp, "Release")
+
+            # Append the source distribution include and library directories,
+            # this allows distutils on windows to work in the source tree
+            self.include_dirs.append(os.path.dirname(get_config_h_filename()))
+            self.library_dirs.append(sys.base_exec_prefix)
+
+            # Use the .lib files for the correct architecture
+            if self.plat_name == 'win32':
+                suffix = 'win32'
+            else:
+                # win-amd64
+                suffix = self.plat_name[4:]
+            new_lib = os.path.join(sys.exec_prefix, 'PCbuild')
+            if suffix:
+                new_lib = os.path.join(new_lib, suffix)
+            self.library_dirs.append(new_lib)
+
+        # For extensions under Cygwin, Python's library directory must be
+        # appended to library_dirs
+        if sys.platform[:6] == 'cygwin':
+            if not sysconfig.python_build:
+                # building third party extensions
+                self.library_dirs.append(
+                    os.path.join(
+                        sys.prefix, "lib", "python" + get_python_version(), "config"
+                    )
+                )
+            else:
+                # building python standard extensions
+                self.library_dirs.append('.')
+
+        self.library_dirs.extend(self._python_lib_dir(sysconfig))
+
+        # The argument parsing will result in self.define being a string, but
+        # it has to be a list of 2-tuples.  All the preprocessor symbols
+        # specified by the 'define' option will be set to '1'.  Multiple
+        # symbols can be separated with commas.
+
+        if self.define:
+            defines = self.define.split(',')
+            self.define = [(symbol, '1') for symbol in defines]
+
+        # The option for macros to undefine is also a string from the
+        # option parsing, but has to be a list.  Multiple symbols can also
+        # be separated with commas here.
+        if self.undef:
+            self.undef = self.undef.split(',')
+
+        if self.swig_opts is None:
+            self.swig_opts = []
+        else:
+            self.swig_opts = self.swig_opts.split(' ')
+
+        # Finally add the user include and library directories if requested
+        if self.user:
+            user_include = os.path.join(USER_BASE, "include")
+            user_lib = os.path.join(USER_BASE, "lib")
+            if os.path.isdir(user_include):
+                self.include_dirs.append(user_include)
+            if os.path.isdir(user_lib):
+                self.library_dirs.append(user_lib)
+                self.rpath.append(user_lib)
+
+        if isinstance(self.parallel, str):
+            try:
+                self.parallel = int(self.parallel)
+            except ValueError:
+                raise DistutilsOptionError("parallel should be an integer")
+
+    def run(self):  # noqa: C901
+        from ..ccompiler import new_compiler
+
+        # 'self.extensions', as supplied by setup.py, is a list of
+        # Extension instances.  See the documentation for Extension (in
+        # distutils.extension) for details.
+        #
+        # For backwards compatibility with Distutils 0.8.2 and earlier, we
+        # also allow the 'extensions' list to be a list of tuples:
+        #    (ext_name, build_info)
+        # where build_info is a dictionary containing everything that
+        # Extension instances do except the name, with a few things being
+        # differently named.  We convert these 2-tuples to Extension
+        # instances as needed.
+
+        if not self.extensions:
+            return
+
+        # If we were asked to build any C/C++ libraries, make sure that the
+        # directory where we put them is in the library search path for
+        # linking extensions.
+        if self.distribution.has_c_libraries():
+            build_clib = self.get_finalized_command('build_clib')
+            self.libraries.extend(build_clib.get_library_names() or [])
+            self.library_dirs.append(build_clib.build_clib)
+
+        # Setup the CCompiler object that we'll use to do all the
+        # compiling and linking
+        self.compiler = new_compiler(
+            compiler=self.compiler,
+            verbose=self.verbose,
+            dry_run=self.dry_run,
+            force=self.force,
+        )
+        customize_compiler(self.compiler)
+        # If we are cross-compiling, init the compiler now (if we are not
+        # cross-compiling, init would not hurt, but people may rely on
+        # late initialization of compiler even if they shouldn't...)
+        if os.name == 'nt' and self.plat_name != get_platform():
+            self.compiler.initialize(self.plat_name)
+
+        # And make sure that any compile/link-related options (which might
+        # come from the command-line or from the setup script) are set in
+        # that CCompiler object -- that way, they automatically apply to
+        # all compiling and linking done here.
+        if self.include_dirs is not None:
+            self.compiler.set_include_dirs(self.include_dirs)
+        if self.define is not None:
+            # 'define' option is a list of (name,value) tuples
+            for name, value in self.define:
+                self.compiler.define_macro(name, value)
+        if self.undef is not None:
+            for macro in self.undef:
+                self.compiler.undefine_macro(macro)
+        if self.libraries is not None:
+            self.compiler.set_libraries(self.libraries)
+        if self.library_dirs is not None:
+            self.compiler.set_library_dirs(self.library_dirs)
+        if self.rpath is not None:
+            self.compiler.set_runtime_library_dirs(self.rpath)
+        if self.link_objects is not None:
+            self.compiler.set_link_objects(self.link_objects)
+
+        # Now actually compile and link everything.
+        self.build_extensions()
+
+    def check_extensions_list(self, extensions):  # noqa: C901
+        """Ensure that the list of extensions (presumably provided as a
+        command option 'extensions') is valid, i.e. it is a list of
+        Extension objects.  We also support the old-style list of 2-tuples,
+        where the tuples are (ext_name, build_info), which are converted to
+        Extension instances here.
+
+        Raise DistutilsSetupError if the structure is invalid anywhere;
+        just returns otherwise.
+        """
+        if not isinstance(extensions, list):
+            raise DistutilsSetupError(
+                "'ext_modules' option must be a list of Extension instances"
+            )
+
+        for i, ext in enumerate(extensions):
+            if isinstance(ext, Extension):
+                continue  # OK! (assume type-checking done
+                # by Extension constructor)
+
+            if not isinstance(ext, tuple) or len(ext) != 2:
+                raise DistutilsSetupError(
+                    "each element of 'ext_modules' option must be an "
+                    "Extension instance or 2-tuple"
+                )
+
+            ext_name, build_info = ext
+
+            log.warning(
+                "old-style (ext_name, build_info) tuple found in "
+                "ext_modules for extension '%s' "
+                "-- please convert to Extension instance",
+                ext_name,
+            )
+
+            if not (isinstance(ext_name, str) and extension_name_re.match(ext_name)):
+                raise DistutilsSetupError(
+                    "first element of each tuple in 'ext_modules' "
+                    "must be the extension name (a string)"
+                )
+
+            if not isinstance(build_info, dict):
+                raise DistutilsSetupError(
+                    "second element of each tuple in 'ext_modules' "
+                    "must be a dictionary (build info)"
+                )
+
+            # OK, the (ext_name, build_info) dict is type-safe: convert it
+            # to an Extension instance.
+            ext = Extension(ext_name, build_info['sources'])
+
+            # Easy stuff: one-to-one mapping from dict elements to
+            # instance attributes.
+            for key in (
+                'include_dirs',
+                'library_dirs',
+                'libraries',
+                'extra_objects',
+                'extra_compile_args',
+                'extra_link_args',
+            ):
+                val = build_info.get(key)
+                if val is not None:
+                    setattr(ext, key, val)
+
+            # Medium-easy stuff: same syntax/semantics, different names.
+            ext.runtime_library_dirs = build_info.get('rpath')
+            if 'def_file' in build_info:
+                log.warning("'def_file' element of build info dict no longer supported")
+
+            # Non-trivial stuff: 'macros' split into 'define_macros'
+            # and 'undef_macros'.
+            macros = build_info.get('macros')
+            if macros:
+                ext.define_macros = []
+                ext.undef_macros = []
+                for macro in macros:
+                    if not (isinstance(macro, tuple) and len(macro) in (1, 2)):
+                        raise DistutilsSetupError(
+                            "'macros' element of build info dict "
+                            "must be 1- or 2-tuple"
+                        )
+                    if len(macro) == 1:
+                        ext.undef_macros.append(macro[0])
+                    elif len(macro) == 2:
+                        ext.define_macros.append(macro)
+
+            extensions[i] = ext
+
+    def get_source_files(self):
+        self.check_extensions_list(self.extensions)
+        filenames = []
+
+        # Wouldn't it be neat if we knew the names of header files too...
+        for ext in self.extensions:
+            filenames.extend(ext.sources)
+        return filenames
+
+    def get_outputs(self):
+        # Sanity check the 'extensions' list -- can't assume this is being
+        # done in the same run as a 'build_extensions()' call (in fact, we
+        # can probably assume that it *isn't*!).
+        self.check_extensions_list(self.extensions)
+
+        # And build the list of output (built) filenames.  Note that this
+        # ignores the 'inplace' flag, and assumes everything goes in the
+        # "build" tree.
+        outputs = []
+        for ext in self.extensions:
+            outputs.append(self.get_ext_fullpath(ext.name))
+        return outputs
+
+    def build_extensions(self):
+        # First, sanity-check the 'extensions' list
+        self.check_extensions_list(self.extensions)
+        if self.parallel:
+            self._build_extensions_parallel()
+        else:
+            self._build_extensions_serial()
+
+    def _build_extensions_parallel(self):
+        workers = self.parallel
+        if self.parallel is True:
+            workers = os.cpu_count()  # may return None
+        try:
+            from concurrent.futures import ThreadPoolExecutor
+        except ImportError:
+            workers = None
+
+        if workers is None:
+            self._build_extensions_serial()
+            return
+
+        with ThreadPoolExecutor(max_workers=workers) as executor:
+            futures = [
+                executor.submit(self.build_extension, ext) for ext in self.extensions
+            ]
+            for ext, fut in zip(self.extensions, futures):
+                with self._filter_build_errors(ext):
+                    fut.result()
+
+    def _build_extensions_serial(self):
+        for ext in self.extensions:
+            with self._filter_build_errors(ext):
+                self.build_extension(ext)
+
+    @contextlib.contextmanager
+    def _filter_build_errors(self, ext):
+        try:
+            yield
+        except (CCompilerError, DistutilsError, CompileError) as e:
+            if not ext.optional:
+                raise
+            self.warn(f'building extension "{ext.name}" failed: {e}')
+
+    def build_extension(self, ext):
+        sources = ext.sources
+        if sources is None or not isinstance(sources, (list, tuple)):
+            raise DistutilsSetupError(
+                f"in 'ext_modules' option (extension '{ext.name}'), "
+                "'sources' must be present and must be "
+                "a list of source filenames"
+            )
+        # sort to make the resulting .so file build reproducible
+        sources = sorted(sources)
+
+        ext_path = self.get_ext_fullpath(ext.name)
+        depends = sources + ext.depends
+        if not (self.force or newer_group(depends, ext_path, 'newer')):
+            log.debug("skipping '%s' extension (up-to-date)", ext.name)
+            return
+        else:
+            log.info("building '%s' extension", ext.name)
+
+        # First, scan the sources for SWIG definition files (.i), run
+        # SWIG on 'em to create .c files, and modify the sources list
+        # accordingly.
+        sources = self.swig_sources(sources, ext)
+
+        # Next, compile the source code to object files.
+
+        # XXX not honouring 'define_macros' or 'undef_macros' -- the
+        # CCompiler API needs to change to accommodate this, and I
+        # want to do one thing at a time!
+
+        # Two possible sources for extra compiler arguments:
+        #   - 'extra_compile_args' in Extension object
+        #   - CFLAGS environment variable (not particularly
+        #     elegant, but people seem to expect it and I
+        #     guess it's useful)
+        # The environment variable should take precedence, and
+        # any sensible compiler will give precedence to later
+        # command line args.  Hence we combine them in order:
+        extra_args = ext.extra_compile_args or []
+
+        macros = ext.define_macros[:]
+        for undef in ext.undef_macros:
+            macros.append((undef,))
+
+        objects = self.compiler.compile(
+            sources,
+            output_dir=self.build_temp,
+            macros=macros,
+            include_dirs=ext.include_dirs,
+            debug=self.debug,
+            extra_postargs=extra_args,
+            depends=ext.depends,
+        )
+
+        # XXX outdated variable, kept here in case third-part code
+        # needs it.
+        self._built_objects = objects[:]
+
+        # Now link the object files together into a "shared object" --
+        # of course, first we have to figure out all the other things
+        # that go into the mix.
+        if ext.extra_objects:
+            objects.extend(ext.extra_objects)
+        extra_args = ext.extra_link_args or []
+
+        # Detect target language, if not provided
+        language = ext.language or self.compiler.detect_language(sources)
+
+        self.compiler.link_shared_object(
+            objects,
+            ext_path,
+            libraries=self.get_libraries(ext),
+            library_dirs=ext.library_dirs,
+            runtime_library_dirs=ext.runtime_library_dirs,
+            extra_postargs=extra_args,
+            export_symbols=self.get_export_symbols(ext),
+            debug=self.debug,
+            build_temp=self.build_temp,
+            target_lang=language,
+        )
+
+    def swig_sources(self, sources, extension):
+        """Walk the list of source files in 'sources', looking for SWIG
+        interface (.i) files.  Run SWIG on all that are found, and
+        return a modified 'sources' list with SWIG source files replaced
+        by the generated C (or C++) files.
+        """
+        new_sources = []
+        swig_sources = []
+        swig_targets = {}
+
+        # XXX this drops generated C/C++ files into the source tree, which
+        # is fine for developers who want to distribute the generated
+        # source -- but there should be an option to put SWIG output in
+        # the temp dir.
+
+        if self.swig_cpp:
+            log.warning("--swig-cpp is deprecated - use --swig-opts=-c++")
+
+        if (
+            self.swig_cpp
+            or ('-c++' in self.swig_opts)
+            or ('-c++' in extension.swig_opts)
+        ):
+            target_ext = '.cpp'
+        else:
+            target_ext = '.c'
+
+        for source in sources:
+            (base, ext) = os.path.splitext(source)
+            if ext == ".i":  # SWIG interface file
+                new_sources.append(base + '_wrap' + target_ext)
+                swig_sources.append(source)
+                swig_targets[source] = new_sources[-1]
+            else:
+                new_sources.append(source)
+
+        if not swig_sources:
+            return new_sources
+
+        swig = self.swig or self.find_swig()
+        swig_cmd = [swig, "-python"]
+        swig_cmd.extend(self.swig_opts)
+        if self.swig_cpp:
+            swig_cmd.append("-c++")
+
+        # Do not override commandline arguments
+        if not self.swig_opts:
+            for o in extension.swig_opts:
+                swig_cmd.append(o)
+
+        for source in swig_sources:
+            target = swig_targets[source]
+            log.info("swigging %s to %s", source, target)
+            self.spawn(swig_cmd + ["-o", target, source])
+
+        return new_sources
+
+    def find_swig(self):
+        """Return the name of the SWIG executable.  On Unix, this is
+        just "swig" -- it should be in the PATH.  Tries a bit harder on
+        Windows.
+        """
+        if os.name == "posix":
+            return "swig"
+        elif os.name == "nt":
+            # Look for SWIG in its standard installation directory on
+            # Windows (or so I presume!).  If we find it there, great;
+            # if not, act like Unix and assume it's in the PATH.
+            for vers in ("1.3", "1.2", "1.1"):
+                fn = os.path.join(f"c:\\swig{vers}", "swig.exe")
+                if os.path.isfile(fn):
+                    return fn
+            else:
+                return "swig.exe"
+        else:
+            raise DistutilsPlatformError(
+                "I don't know how to find (much less run) SWIG "
+                f"on platform '{os.name}'"
+            )
+
+    # -- Name generators -----------------------------------------------
+    # (extension names, filenames, whatever)
+    def get_ext_fullpath(self, ext_name):
+        """Returns the path of the filename for a given extension.
+
+        The file is located in `build_lib` or directly in the package
+        (inplace option).
+        """
+        fullname = self.get_ext_fullname(ext_name)
+        modpath = fullname.split('.')
+        filename = self.get_ext_filename(modpath[-1])
+
+        if not self.inplace:
+            # no further work needed
+            # returning :
+            #   build_dir/package/path/filename
+            filename = os.path.join(*modpath[:-1] + [filename])
+            return os.path.join(self.build_lib, filename)
+
+        # the inplace option requires to find the package directory
+        # using the build_py command for that
+        package = '.'.join(modpath[0:-1])
+        build_py = self.get_finalized_command('build_py')
+        package_dir = os.path.abspath(build_py.get_package_dir(package))
+
+        # returning
+        #   package_dir/filename
+        return os.path.join(package_dir, filename)
+
+    def get_ext_fullname(self, ext_name):
+        """Returns the fullname of a given extension name.
+
+        Adds the `package.` prefix"""
+        if self.package is None:
+            return ext_name
+        else:
+            return self.package + '.' + ext_name
+
+    def get_ext_filename(self, ext_name):
+        r"""Convert the name of an extension (eg. "foo.bar") into the name
+        of the file from which it will be loaded (eg. "foo/bar.so", or
+        "foo\bar.pyd").
+        """
+        from ..sysconfig import get_config_var
+
+        ext_path = ext_name.split('.')
+        ext_suffix = get_config_var('EXT_SUFFIX')
+        return os.path.join(*ext_path) + ext_suffix
+
+    def get_export_symbols(self, ext):
+        """Return the list of symbols that a shared extension has to
+        export.  This either uses 'ext.export_symbols' or, if it's not
+        provided, "PyInit_" + module_name.  Only relevant on Windows, where
+        the .pyd file (DLL) must export the module "PyInit_" function.
+        """
+        name = ext.name.split('.')[-1]
+        try:
+            # Unicode module name support as defined in PEP-489
+            # https://peps.python.org/pep-0489/#export-hook-name
+            name.encode('ascii')
+        except UnicodeEncodeError:
+            suffix = 'U_' + name.encode('punycode').replace(b'-', b'_').decode('ascii')
+        else:
+            suffix = "_" + name
+
+        initfunc_name = "PyInit" + suffix
+        if initfunc_name not in ext.export_symbols:
+            ext.export_symbols.append(initfunc_name)
+        return ext.export_symbols
+
+    def get_libraries(self, ext):  # noqa: C901
+        """Return the list of libraries to link against when building a
+        shared extension.  On most platforms, this is just 'ext.libraries';
+        on Windows, we add the Python library (eg. python20.dll).
+        """
+        # The python library is always needed on Windows.  For MSVC, this
+        # is redundant, since the library is mentioned in a pragma in
+        # pyconfig.h that MSVC groks.  The other Windows compilers all seem
+        # to need it mentioned explicitly, though, so that's what we do.
+        # Append '_d' to the python import library on debug builds.
+        if sys.platform == "win32" and not is_mingw():
+            from .._msvccompiler import MSVCCompiler
+
+            if not isinstance(self.compiler, MSVCCompiler):
+                template = "python%d%d"
+                if self.debug:
+                    template = template + '_d'
+                pythonlib = template % (
+                    sys.hexversion >> 24,
+                    (sys.hexversion >> 16) & 0xFF,
+                )
+                # don't extend ext.libraries, it may be shared with other
+                # extensions, it is a reference to the original list
+                return ext.libraries + [pythonlib]
+        else:
+            # On Android only the main executable and LD_PRELOADs are considered
+            # to be RTLD_GLOBAL, all the dependencies of the main executable
+            # remain RTLD_LOCAL and so the shared libraries must be linked with
+            # libpython when python is built with a shared python library (issue
+            # bpo-21536).
+            # On Cygwin (and if required, other POSIX-like platforms based on
+            # Windows like MinGW) it is simply necessary that all symbols in
+            # shared libraries are resolved at link time.
+            from ..sysconfig import get_config_var
+
+            link_libpython = False
+            if get_config_var('Py_ENABLE_SHARED'):
+                # A native build on an Android device or on Cygwin
+                if hasattr(sys, 'getandroidapilevel'):
+                    link_libpython = True
+                elif sys.platform == 'cygwin' or is_mingw():
+                    link_libpython = True
+                elif '_PYTHON_HOST_PLATFORM' in os.environ:
+                    # We are cross-compiling for one of the relevant platforms
+                    if get_config_var('ANDROID_API_LEVEL') != 0:
+                        link_libpython = True
+                    elif get_config_var('MACHDEP') == 'cygwin':
+                        link_libpython = True
+
+            if link_libpython:
+                ldversion = get_config_var('LDVERSION')
+                return ext.libraries + ['python' + ldversion]
+
+        return ext.libraries
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_py.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_py.py
new file mode 100644
index 0000000000000000000000000000000000000000..49d710346ebc9bf5e27f5358c2951b85e197e330
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_py.py
@@ -0,0 +1,406 @@
+"""distutils.command.build_py
+
+Implements the Distutils 'build_py' command."""
+
+import glob
+import importlib.util
+import os
+import sys
+from distutils._log import log
+
+from ..core import Command
+from ..errors import DistutilsFileError, DistutilsOptionError
+from ..util import convert_path
+
+
+class build_py(Command):
+    description = "\"build\" pure Python modules (copy to build directory)"
+
+    user_options = [
+        ('build-lib=', 'd', "directory to \"build\" (copy) to"),
+        ('compile', 'c', "compile .py to .pyc"),
+        ('no-compile', None, "don't compile .py files [default]"),
+        (
+            'optimize=',
+            'O',
+            "also compile with optimization: -O1 for \"python -O\", "
+            "-O2 for \"python -OO\", and -O0 to disable [default: -O0]",
+        ),
+        ('force', 'f', "forcibly build everything (ignore file timestamps)"),
+    ]
+
+    boolean_options = ['compile', 'force']
+    negative_opt = {'no-compile': 'compile'}
+
+    def initialize_options(self):
+        self.build_lib = None
+        self.py_modules = None
+        self.package = None
+        self.package_data = None
+        self.package_dir = None
+        self.compile = False
+        self.optimize = 0
+        self.force = None
+
+    def finalize_options(self):
+        self.set_undefined_options(
+            'build', ('build_lib', 'build_lib'), ('force', 'force')
+        )
+
+        # Get the distribution options that are aliases for build_py
+        # options -- list of packages and list of modules.
+        self.packages = self.distribution.packages
+        self.py_modules = self.distribution.py_modules
+        self.package_data = self.distribution.package_data
+        self.package_dir = {}
+        if self.distribution.package_dir:
+            for name, path in self.distribution.package_dir.items():
+                self.package_dir[name] = convert_path(path)
+        self.data_files = self.get_data_files()
+
+        # Ick, copied straight from install_lib.py (fancy_getopt needs a
+        # type system!  Hell, *everything* needs a type system!!!)
+        if not isinstance(self.optimize, int):
+            try:
+                self.optimize = int(self.optimize)
+                assert 0 <= self.optimize <= 2
+            except (ValueError, AssertionError):
+                raise DistutilsOptionError("optimize must be 0, 1, or 2")
+
+    def run(self):
+        # XXX copy_file by default preserves atime and mtime.  IMHO this is
+        # the right thing to do, but perhaps it should be an option -- in
+        # particular, a site administrator might want installed files to
+        # reflect the time of installation rather than the last
+        # modification time before the installed release.
+
+        # XXX copy_file by default preserves mode, which appears to be the
+        # wrong thing to do: if a file is read-only in the working
+        # directory, we want it to be installed read/write so that the next
+        # installation of the same module distribution can overwrite it
+        # without problems.  (This might be a Unix-specific issue.)  Thus
+        # we turn off 'preserve_mode' when copying to the build directory,
+        # since the build directory is supposed to be exactly what the
+        # installation will look like (ie. we preserve mode when
+        # installing).
+
+        # Two options control which modules will be installed: 'packages'
+        # and 'py_modules'.  The former lets us work with whole packages, not
+        # specifying individual modules at all; the latter is for
+        # specifying modules one-at-a-time.
+
+        if self.py_modules:
+            self.build_modules()
+        if self.packages:
+            self.build_packages()
+            self.build_package_data()
+
+        self.byte_compile(self.get_outputs(include_bytecode=False))
+
+    def get_data_files(self):
+        """Generate list of '(package,src_dir,build_dir,filenames)' tuples"""
+        data = []
+        if not self.packages:
+            return data
+        for package in self.packages:
+            # Locate package source directory
+            src_dir = self.get_package_dir(package)
+
+            # Compute package build directory
+            build_dir = os.path.join(*([self.build_lib] + package.split('.')))
+
+            # Length of path to strip from found files
+            plen = 0
+            if src_dir:
+                plen = len(src_dir) + 1
+
+            # Strip directory from globbed filenames
+            filenames = [file[plen:] for file in self.find_data_files(package, src_dir)]
+            data.append((package, src_dir, build_dir, filenames))
+        return data
+
+    def find_data_files(self, package, src_dir):
+        """Return filenames for package's data files in 'src_dir'"""
+        globs = self.package_data.get('', []) + self.package_data.get(package, [])
+        files = []
+        for pattern in globs:
+            # Each pattern has to be converted to a platform-specific path
+            filelist = glob.glob(
+                os.path.join(glob.escape(src_dir), convert_path(pattern))
+            )
+            # Files that match more than one pattern are only added once
+            files.extend([
+                fn for fn in filelist if fn not in files and os.path.isfile(fn)
+            ])
+        return files
+
+    def build_package_data(self):
+        """Copy data files into build directory"""
+        for _package, src_dir, build_dir, filenames in self.data_files:
+            for filename in filenames:
+                target = os.path.join(build_dir, filename)
+                self.mkpath(os.path.dirname(target))
+                self.copy_file(
+                    os.path.join(src_dir, filename), target, preserve_mode=False
+                )
+
+    def get_package_dir(self, package):
+        """Return the directory, relative to the top of the source
+        distribution, where package 'package' should be found
+        (at least according to the 'package_dir' option, if any)."""
+        path = package.split('.')
+
+        if not self.package_dir:
+            if path:
+                return os.path.join(*path)
+            else:
+                return ''
+        else:
+            tail = []
+            while path:
+                try:
+                    pdir = self.package_dir['.'.join(path)]
+                except KeyError:
+                    tail.insert(0, path[-1])
+                    del path[-1]
+                else:
+                    tail.insert(0, pdir)
+                    return os.path.join(*tail)
+            else:
+                # Oops, got all the way through 'path' without finding a
+                # match in package_dir.  If package_dir defines a directory
+                # for the root (nameless) package, then fallback on it;
+                # otherwise, we might as well have not consulted
+                # package_dir at all, as we just use the directory implied
+                # by 'tail' (which should be the same as the original value
+                # of 'path' at this point).
+                pdir = self.package_dir.get('')
+                if pdir is not None:
+                    tail.insert(0, pdir)
+
+                if tail:
+                    return os.path.join(*tail)
+                else:
+                    return ''
+
+    def check_package(self, package, package_dir):
+        # Empty dir name means current directory, which we can probably
+        # assume exists.  Also, os.path.exists and isdir don't know about
+        # my "empty string means current dir" convention, so we have to
+        # circumvent them.
+        if package_dir != "":
+            if not os.path.exists(package_dir):
+                raise DistutilsFileError(
+                    f"package directory '{package_dir}' does not exist"
+                )
+            if not os.path.isdir(package_dir):
+                raise DistutilsFileError(
+                    f"supposed package directory '{package_dir}' exists, "
+                    "but is not a directory"
+                )
+
+        # Directories without __init__.py are namespace packages (PEP 420).
+        if package:
+            init_py = os.path.join(package_dir, "__init__.py")
+            if os.path.isfile(init_py):
+                return init_py
+
+        # Either not in a package at all (__init__.py not expected), or
+        # __init__.py doesn't exist -- so don't return the filename.
+        return None
+
+    def check_module(self, module, module_file):
+        if not os.path.isfile(module_file):
+            log.warning("file %s (for module %s) not found", module_file, module)
+            return False
+        else:
+            return True
+
+    def find_package_modules(self, package, package_dir):
+        self.check_package(package, package_dir)
+        module_files = glob.glob(os.path.join(glob.escape(package_dir), "*.py"))
+        modules = []
+        setup_script = os.path.abspath(self.distribution.script_name)
+
+        for f in module_files:
+            abs_f = os.path.abspath(f)
+            if abs_f != setup_script:
+                module = os.path.splitext(os.path.basename(f))[0]
+                modules.append((package, module, f))
+            else:
+                self.debug_print(f"excluding {setup_script}")
+        return modules
+
+    def find_modules(self):
+        """Finds individually-specified Python modules, ie. those listed by
+        module name in 'self.py_modules'.  Returns a list of tuples (package,
+        module_base, filename): 'package' is a tuple of the path through
+        package-space to the module; 'module_base' is the bare (no
+        packages, no dots) module name, and 'filename' is the path to the
+        ".py" file (relative to the distribution root) that implements the
+        module.
+        """
+        # Map package names to tuples of useful info about the package:
+        #    (package_dir, checked)
+        # package_dir - the directory where we'll find source files for
+        #   this package
+        # checked - true if we have checked that the package directory
+        #   is valid (exists, contains __init__.py, ... ?)
+        packages = {}
+
+        # List of (package, module, filename) tuples to return
+        modules = []
+
+        # We treat modules-in-packages almost the same as toplevel modules,
+        # just the "package" for a toplevel is empty (either an empty
+        # string or empty list, depending on context).  Differences:
+        #   - don't check for __init__.py in directory for empty package
+        for module in self.py_modules:
+            path = module.split('.')
+            package = '.'.join(path[0:-1])
+            module_base = path[-1]
+
+            try:
+                (package_dir, checked) = packages[package]
+            except KeyError:
+                package_dir = self.get_package_dir(package)
+                checked = False
+
+            if not checked:
+                init_py = self.check_package(package, package_dir)
+                packages[package] = (package_dir, 1)
+                if init_py:
+                    modules.append((package, "__init__", init_py))
+
+            # XXX perhaps we should also check for just .pyc files
+            # (so greedy closed-source bastards can distribute Python
+            # modules too)
+            module_file = os.path.join(package_dir, module_base + ".py")
+            if not self.check_module(module, module_file):
+                continue
+
+            modules.append((package, module_base, module_file))
+
+        return modules
+
+    def find_all_modules(self):
+        """Compute the list of all modules that will be built, whether
+        they are specified one-module-at-a-time ('self.py_modules') or
+        by whole packages ('self.packages').  Return a list of tuples
+        (package, module, module_file), just like 'find_modules()' and
+        'find_package_modules()' do."""
+        modules = []
+        if self.py_modules:
+            modules.extend(self.find_modules())
+        if self.packages:
+            for package in self.packages:
+                package_dir = self.get_package_dir(package)
+                m = self.find_package_modules(package, package_dir)
+                modules.extend(m)
+        return modules
+
+    def get_source_files(self):
+        return [module[-1] for module in self.find_all_modules()]
+
+    def get_module_outfile(self, build_dir, package, module):
+        outfile_path = [build_dir] + list(package) + [module + ".py"]
+        return os.path.join(*outfile_path)
+
+    def get_outputs(self, include_bytecode=True):
+        modules = self.find_all_modules()
+        outputs = []
+        for package, module, _module_file in modules:
+            package = package.split('.')
+            filename = self.get_module_outfile(self.build_lib, package, module)
+            outputs.append(filename)
+            if include_bytecode:
+                if self.compile:
+                    outputs.append(
+                        importlib.util.cache_from_source(filename, optimization='')
+                    )
+                if self.optimize > 0:
+                    outputs.append(
+                        importlib.util.cache_from_source(
+                            filename, optimization=self.optimize
+                        )
+                    )
+
+        outputs += [
+            os.path.join(build_dir, filename)
+            for package, src_dir, build_dir, filenames in self.data_files
+            for filename in filenames
+        ]
+
+        return outputs
+
+    def build_module(self, module, module_file, package):
+        if isinstance(package, str):
+            package = package.split('.')
+        elif not isinstance(package, (list, tuple)):
+            raise TypeError(
+                "'package' must be a string (dot-separated), list, or tuple"
+            )
+
+        # Now put the module source file into the "build" area -- this is
+        # easy, we just copy it somewhere under self.build_lib (the build
+        # directory for Python source).
+        outfile = self.get_module_outfile(self.build_lib, package, module)
+        dir = os.path.dirname(outfile)
+        self.mkpath(dir)
+        return self.copy_file(module_file, outfile, preserve_mode=False)
+
+    def build_modules(self):
+        modules = self.find_modules()
+        for package, module, module_file in modules:
+            # Now "build" the module -- ie. copy the source file to
+            # self.build_lib (the build directory for Python source).
+            # (Actually, it gets copied to the directory for this package
+            # under self.build_lib.)
+            self.build_module(module, module_file, package)
+
+    def build_packages(self):
+        for package in self.packages:
+            # Get list of (package, module, module_file) tuples based on
+            # scanning the package directory.  'package' is only included
+            # in the tuple so that 'find_modules()' and
+            # 'find_package_tuples()' have a consistent interface; it's
+            # ignored here (apart from a sanity check).  Also, 'module' is
+            # the *unqualified* module name (ie. no dots, no package -- we
+            # already know its package!), and 'module_file' is the path to
+            # the .py file, relative to the current directory
+            # (ie. including 'package_dir').
+            package_dir = self.get_package_dir(package)
+            modules = self.find_package_modules(package, package_dir)
+
+            # Now loop over the modules we found, "building" each one (just
+            # copy it to self.build_lib).
+            for package_, module, module_file in modules:
+                assert package == package_
+                self.build_module(module, module_file, package)
+
+    def byte_compile(self, files):
+        if sys.dont_write_bytecode:
+            self.warn('byte-compiling is disabled, skipping.')
+            return
+
+        from ..util import byte_compile
+
+        prefix = self.build_lib
+        if prefix[-1] != os.sep:
+            prefix = prefix + os.sep
+
+        # XXX this code is essentially the same as the 'byte_compile()
+        # method of the "install_lib" command, except for the determination
+        # of the 'prefix' string.  Hmmm.
+        if self.compile:
+            byte_compile(
+                files, optimize=0, force=self.force, prefix=prefix, dry_run=self.dry_run
+            )
+        if self.optimize > 0:
+            byte_compile(
+                files,
+                optimize=self.optimize,
+                force=self.force,
+                prefix=prefix,
+                dry_run=self.dry_run,
+            )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_scripts.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_scripts.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e5963c243db56b0c8c13b92501e07ecd4b17224
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/build_scripts.py
@@ -0,0 +1,170 @@
+"""distutils.command.build_scripts
+
+Implements the Distutils 'build_scripts' command."""
+
+import os
+import re
+import tokenize
+from distutils import sysconfig
+from distutils._log import log
+from stat import ST_MODE
+
+from .._modified import newer
+from ..core import Command
+from ..util import convert_path
+
+shebang_pattern = re.compile('^#!.*python[0-9.]*([ \t].*)?$')
+"""
+Pattern matching a Python interpreter indicated in first line of a script.
+"""
+
+# for Setuptools compatibility
+first_line_re = shebang_pattern
+
+
+class build_scripts(Command):
+    description = "\"build\" scripts (copy and fixup #! line)"
+
+    user_options = [
+        ('build-dir=', 'd', "directory to \"build\" (copy) to"),
+        ('force', 'f', "forcibly build everything (ignore file timestamps"),
+        ('executable=', 'e', "specify final destination interpreter path"),
+    ]
+
+    boolean_options = ['force']
+
+    def initialize_options(self):
+        self.build_dir = None
+        self.scripts = None
+        self.force = None
+        self.executable = None
+
+    def finalize_options(self):
+        self.set_undefined_options(
+            'build',
+            ('build_scripts', 'build_dir'),
+            ('force', 'force'),
+            ('executable', 'executable'),
+        )
+        self.scripts = self.distribution.scripts
+
+    def get_source_files(self):
+        return self.scripts
+
+    def run(self):
+        if not self.scripts:
+            return
+        self.copy_scripts()
+
+    def copy_scripts(self):
+        """
+        Copy each script listed in ``self.scripts``.
+
+        If a script is marked as a Python script (first line matches
+        'shebang_pattern', i.e. starts with ``#!`` and contains
+        "python"), then adjust in the copy the first line to refer to
+        the current Python interpreter.
+        """
+        self.mkpath(self.build_dir)
+        outfiles = []
+        updated_files = []
+        for script in self.scripts:
+            self._copy_script(script, outfiles, updated_files)
+
+        self._change_modes(outfiles)
+
+        return outfiles, updated_files
+
+    def _copy_script(self, script, outfiles, updated_files):  # noqa: C901
+        shebang_match = None
+        script = convert_path(script)
+        outfile = os.path.join(self.build_dir, os.path.basename(script))
+        outfiles.append(outfile)
+
+        if not self.force and not newer(script, outfile):
+            log.debug("not copying %s (up-to-date)", script)
+            return
+
+        # Always open the file, but ignore failures in dry-run mode
+        # in order to attempt to copy directly.
+        try:
+            f = tokenize.open(script)
+        except OSError:
+            if not self.dry_run:
+                raise
+            f = None
+        else:
+            first_line = f.readline()
+            if not first_line:
+                self.warn(f"{script} is an empty file (skipping)")
+                return
+
+            shebang_match = shebang_pattern.match(first_line)
+
+        updated_files.append(outfile)
+        if shebang_match:
+            log.info("copying and adjusting %s -> %s", script, self.build_dir)
+            if not self.dry_run:
+                if not sysconfig.python_build:
+                    executable = self.executable
+                else:
+                    executable = os.path.join(
+                        sysconfig.get_config_var("BINDIR"),
+                        "python{}{}".format(
+                            sysconfig.get_config_var("VERSION"),
+                            sysconfig.get_config_var("EXE"),
+                        ),
+                    )
+                post_interp = shebang_match.group(1) or ''
+                shebang = "#!" + executable + post_interp + "\n"
+                self._validate_shebang(shebang, f.encoding)
+                with open(outfile, "w", encoding=f.encoding) as outf:
+                    outf.write(shebang)
+                    outf.writelines(f.readlines())
+            if f:
+                f.close()
+        else:
+            if f:
+                f.close()
+            self.copy_file(script, outfile)
+
+    def _change_modes(self, outfiles):
+        if os.name != 'posix':
+            return
+
+        for file in outfiles:
+            self._change_mode(file)
+
+    def _change_mode(self, file):
+        if self.dry_run:
+            log.info("changing mode of %s", file)
+            return
+
+        oldmode = os.stat(file)[ST_MODE] & 0o7777
+        newmode = (oldmode | 0o555) & 0o7777
+        if newmode != oldmode:
+            log.info("changing mode of %s from %o to %o", file, oldmode, newmode)
+            os.chmod(file, newmode)
+
+    @staticmethod
+    def _validate_shebang(shebang, encoding):
+        # Python parser starts to read a script using UTF-8 until
+        # it gets a #coding:xxx cookie. The shebang has to be the
+        # first line of a file, the #coding:xxx cookie cannot be
+        # written before. So the shebang has to be encodable to
+        # UTF-8.
+        try:
+            shebang.encode('utf-8')
+        except UnicodeEncodeError:
+            raise ValueError(f"The shebang ({shebang!r}) is not encodable to utf-8")
+
+        # If the script is encoded to a custom encoding (use a
+        # #coding:xxx cookie), the shebang has to be encodable to
+        # the script encoding too.
+        try:
+            shebang.encode(encoding)
+        except UnicodeEncodeError:
+            raise ValueError(
+                f"The shebang ({shebang!r}) is not encodable "
+                f"to the script encoding ({encoding})"
+            )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/check.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/check.py
new file mode 100644
index 0000000000000000000000000000000000000000..58b3f949f9f262acc389bb4a00c75aa98689b6f6
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/check.py
@@ -0,0 +1,155 @@
+"""distutils.command.check
+
+Implements the Distutils 'check' command.
+"""
+
+import contextlib
+
+from ..core import Command
+from ..errors import DistutilsSetupError
+
+with contextlib.suppress(ImportError):
+    import docutils.frontend
+    import docutils.nodes
+    import docutils.parsers.rst
+    import docutils.utils
+
+    class SilentReporter(docutils.utils.Reporter):
+        def __init__(
+            self,
+            source,
+            report_level,
+            halt_level,
+            stream=None,
+            debug=False,
+            encoding='ascii',
+            error_handler='replace',
+        ):
+            self.messages = []
+            super().__init__(
+                source, report_level, halt_level, stream, debug, encoding, error_handler
+            )
+
+        def system_message(self, level, message, *children, **kwargs):
+            self.messages.append((level, message, children, kwargs))
+            return docutils.nodes.system_message(
+                message, *children, level=level, type=self.levels[level], **kwargs
+            )
+
+
+class check(Command):
+    """This command checks the meta-data of the package."""
+
+    description = "perform some checks on the package"
+    user_options = [
+        ('metadata', 'm', 'Verify meta-data'),
+        (
+            'restructuredtext',
+            'r',
+            (
+                'Checks if long string meta-data syntax '
+                'are reStructuredText-compliant'
+            ),
+        ),
+        ('strict', 's', 'Will exit with an error if a check fails'),
+    ]
+
+    boolean_options = ['metadata', 'restructuredtext', 'strict']
+
+    def initialize_options(self):
+        """Sets default values for options."""
+        self.restructuredtext = False
+        self.metadata = 1
+        self.strict = False
+        self._warnings = 0
+
+    def finalize_options(self):
+        pass
+
+    def warn(self, msg):
+        """Counts the number of warnings that occurs."""
+        self._warnings += 1
+        return Command.warn(self, msg)
+
+    def run(self):
+        """Runs the command."""
+        # perform the various tests
+        if self.metadata:
+            self.check_metadata()
+        if self.restructuredtext:
+            if 'docutils' in globals():
+                try:
+                    self.check_restructuredtext()
+                except TypeError as exc:
+                    raise DistutilsSetupError(str(exc))
+            elif self.strict:
+                raise DistutilsSetupError('The docutils package is needed.')
+
+        # let's raise an error in strict mode, if we have at least
+        # one warning
+        if self.strict and self._warnings > 0:
+            raise DistutilsSetupError('Please correct your package.')
+
+    def check_metadata(self):
+        """Ensures that all required elements of meta-data are supplied.
+
+        Required fields:
+            name, version
+
+        Warns if any are missing.
+        """
+        metadata = self.distribution.metadata
+
+        missing = []
+        for attr in 'name', 'version':
+            if not getattr(metadata, attr, None):
+                missing.append(attr)
+
+        if missing:
+            self.warn("missing required meta-data: {}".format(', '.join(missing)))
+
+    def check_restructuredtext(self):
+        """Checks if the long string fields are reST-compliant."""
+        data = self.distribution.get_long_description()
+        for warning in self._check_rst_data(data):
+            line = warning[-1].get('line')
+            if line is None:
+                warning = warning[1]
+            else:
+                warning = f'{warning[1]} (line {line})'
+            self.warn(warning)
+
+    def _check_rst_data(self, data):
+        """Returns warnings when the provided data doesn't compile."""
+        # the include and csv_table directives need this to be a path
+        source_path = self.distribution.script_name or 'setup.py'
+        parser = docutils.parsers.rst.Parser()
+        settings = docutils.frontend.OptionParser(
+            components=(docutils.parsers.rst.Parser,)
+        ).get_default_values()
+        settings.tab_width = 4
+        settings.pep_references = None
+        settings.rfc_references = None
+        reporter = SilentReporter(
+            source_path,
+            settings.report_level,
+            settings.halt_level,
+            stream=settings.warning_stream,
+            debug=settings.debug,
+            encoding=settings.error_encoding,
+            error_handler=settings.error_encoding_error_handler,
+        )
+
+        document = docutils.nodes.document(settings, reporter, source=source_path)
+        document.note_source(source_path, -1)
+        try:
+            parser.parse(data, document)
+        except AttributeError as e:
+            reporter.messages.append((
+                -1,
+                f'Could not finish the parsing: {e}.',
+                '',
+                {},
+            ))
+
+        return reporter.messages
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/clean.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/clean.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb54a60ed4fb755b53e0d2a0d7bffcd3837c5e07
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/clean.py
@@ -0,0 +1,76 @@
+"""distutils.command.clean
+
+Implements the Distutils 'clean' command."""
+
+# contributed by Bastian Kleineidam , added 2000-03-18
+
+import os
+from distutils._log import log
+
+from ..core import Command
+from ..dir_util import remove_tree
+
+
+class clean(Command):
+    description = "clean up temporary files from 'build' command"
+    user_options = [
+        ('build-base=', 'b', "base build directory [default: 'build.build-base']"),
+        (
+            'build-lib=',
+            None,
+            "build directory for all modules [default: 'build.build-lib']",
+        ),
+        ('build-temp=', 't', "temporary build directory [default: 'build.build-temp']"),
+        (
+            'build-scripts=',
+            None,
+            "build directory for scripts [default: 'build.build-scripts']",
+        ),
+        ('bdist-base=', None, "temporary directory for built distributions"),
+        ('all', 'a', "remove all build output, not just temporary by-products"),
+    ]
+
+    boolean_options = ['all']
+
+    def initialize_options(self):
+        self.build_base = None
+        self.build_lib = None
+        self.build_temp = None
+        self.build_scripts = None
+        self.bdist_base = None
+        self.all = None
+
+    def finalize_options(self):
+        self.set_undefined_options(
+            'build',
+            ('build_base', 'build_base'),
+            ('build_lib', 'build_lib'),
+            ('build_scripts', 'build_scripts'),
+            ('build_temp', 'build_temp'),
+        )
+        self.set_undefined_options('bdist', ('bdist_base', 'bdist_base'))
+
+    def run(self):
+        # remove the build/temp. directory (unless it's already
+        # gone)
+        if os.path.exists(self.build_temp):
+            remove_tree(self.build_temp, dry_run=self.dry_run)
+        else:
+            log.debug("'%s' does not exist -- can't clean it", self.build_temp)
+
+        if self.all:
+            # remove build directories
+            for directory in (self.build_lib, self.bdist_base, self.build_scripts):
+                if os.path.exists(directory):
+                    remove_tree(directory, dry_run=self.dry_run)
+                else:
+                    log.warning("'%s' does not exist -- can't clean it", directory)
+
+        # just for the heck of it, try to remove the base build directory:
+        # we might have emptied it right now, but if not we don't care
+        if not self.dry_run:
+            try:
+                os.rmdir(self.build_base)
+                log.info("removing '%s'", self.build_base)
+            except OSError:
+                pass
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/config.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe83c2924dc8f2b21defe51b7e3c00ff92e5e7f0
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/config.py
@@ -0,0 +1,369 @@
+"""distutils.command.config
+
+Implements the Distutils 'config' command, a (mostly) empty command class
+that exists mainly to be sub-classed by specific module distributions and
+applications.  The idea is that while every "config" command is different,
+at least they're all named the same, and users always see "config" in the
+list of standard commands.  Also, this is a good place to put common
+configure-like tasks: "try to compile this C code", or "figure out where
+this header file lives".
+"""
+
+from __future__ import annotations
+
+import os
+import pathlib
+import re
+from collections.abc import Sequence
+from distutils._log import log
+
+from ..core import Command
+from ..errors import DistutilsExecError
+from ..sysconfig import customize_compiler
+
+LANG_EXT = {"c": ".c", "c++": ".cxx"}
+
+
+class config(Command):
+    description = "prepare to build"
+
+    user_options = [
+        ('compiler=', None, "specify the compiler type"),
+        ('cc=', None, "specify the compiler executable"),
+        ('include-dirs=', 'I', "list of directories to search for header files"),
+        ('define=', 'D', "C preprocessor macros to define"),
+        ('undef=', 'U', "C preprocessor macros to undefine"),
+        ('libraries=', 'l', "external C libraries to link with"),
+        ('library-dirs=', 'L', "directories to search for external C libraries"),
+        ('noisy', None, "show every action (compile, link, run, ...) taken"),
+        (
+            'dump-source',
+            None,
+            "dump generated source files before attempting to compile them",
+        ),
+    ]
+
+    # The three standard command methods: since the "config" command
+    # does nothing by default, these are empty.
+
+    def initialize_options(self):
+        self.compiler = None
+        self.cc = None
+        self.include_dirs = None
+        self.libraries = None
+        self.library_dirs = None
+
+        # maximal output for now
+        self.noisy = 1
+        self.dump_source = 1
+
+        # list of temporary files generated along-the-way that we have
+        # to clean at some point
+        self.temp_files = []
+
+    def finalize_options(self):
+        if self.include_dirs is None:
+            self.include_dirs = self.distribution.include_dirs or []
+        elif isinstance(self.include_dirs, str):
+            self.include_dirs = self.include_dirs.split(os.pathsep)
+
+        if self.libraries is None:
+            self.libraries = []
+        elif isinstance(self.libraries, str):
+            self.libraries = [self.libraries]
+
+        if self.library_dirs is None:
+            self.library_dirs = []
+        elif isinstance(self.library_dirs, str):
+            self.library_dirs = self.library_dirs.split(os.pathsep)
+
+    def run(self):
+        pass
+
+    # Utility methods for actual "config" commands.  The interfaces are
+    # loosely based on Autoconf macros of similar names.  Sub-classes
+    # may use these freely.
+
+    def _check_compiler(self):
+        """Check that 'self.compiler' really is a CCompiler object;
+        if not, make it one.
+        """
+        # We do this late, and only on-demand, because this is an expensive
+        # import.
+        from ..ccompiler import CCompiler, new_compiler
+
+        if not isinstance(self.compiler, CCompiler):
+            self.compiler = new_compiler(
+                compiler=self.compiler, dry_run=self.dry_run, force=True
+            )
+            customize_compiler(self.compiler)
+            if self.include_dirs:
+                self.compiler.set_include_dirs(self.include_dirs)
+            if self.libraries:
+                self.compiler.set_libraries(self.libraries)
+            if self.library_dirs:
+                self.compiler.set_library_dirs(self.library_dirs)
+
+    def _gen_temp_sourcefile(self, body, headers, lang):
+        filename = "_configtest" + LANG_EXT[lang]
+        with open(filename, "w", encoding='utf-8') as file:
+            if headers:
+                for header in headers:
+                    file.write(f"#include <{header}>\n")
+                file.write("\n")
+            file.write(body)
+            if body[-1] != "\n":
+                file.write("\n")
+        return filename
+
+    def _preprocess(self, body, headers, include_dirs, lang):
+        src = self._gen_temp_sourcefile(body, headers, lang)
+        out = "_configtest.i"
+        self.temp_files.extend([src, out])
+        self.compiler.preprocess(src, out, include_dirs=include_dirs)
+        return (src, out)
+
+    def _compile(self, body, headers, include_dirs, lang):
+        src = self._gen_temp_sourcefile(body, headers, lang)
+        if self.dump_source:
+            dump_file(src, f"compiling '{src}':")
+        (obj,) = self.compiler.object_filenames([src])
+        self.temp_files.extend([src, obj])
+        self.compiler.compile([src], include_dirs=include_dirs)
+        return (src, obj)
+
+    def _link(self, body, headers, include_dirs, libraries, library_dirs, lang):
+        (src, obj) = self._compile(body, headers, include_dirs, lang)
+        prog = os.path.splitext(os.path.basename(src))[0]
+        self.compiler.link_executable(
+            [obj],
+            prog,
+            libraries=libraries,
+            library_dirs=library_dirs,
+            target_lang=lang,
+        )
+
+        if self.compiler.exe_extension is not None:
+            prog = prog + self.compiler.exe_extension
+        self.temp_files.append(prog)
+
+        return (src, obj, prog)
+
+    def _clean(self, *filenames):
+        if not filenames:
+            filenames = self.temp_files
+            self.temp_files = []
+        log.info("removing: %s", ' '.join(filenames))
+        for filename in filenames:
+            try:
+                os.remove(filename)
+            except OSError:
+                pass
+
+    # XXX these ignore the dry-run flag: what to do, what to do? even if
+    # you want a dry-run build, you still need some sort of configuration
+    # info.  My inclination is to make it up to the real config command to
+    # consult 'dry_run', and assume a default (minimal) configuration if
+    # true.  The problem with trying to do it here is that you'd have to
+    # return either true or false from all the 'try' methods, neither of
+    # which is correct.
+
+    # XXX need access to the header search path and maybe default macros.
+
+    def try_cpp(self, body=None, headers=None, include_dirs=None, lang="c"):
+        """Construct a source file from 'body' (a string containing lines
+        of C/C++ code) and 'headers' (a list of header files to include)
+        and run it through the preprocessor.  Return true if the
+        preprocessor succeeded, false if there were any errors.
+        ('body' probably isn't of much use, but what the heck.)
+        """
+        from ..ccompiler import CompileError
+
+        self._check_compiler()
+        ok = True
+        try:
+            self._preprocess(body, headers, include_dirs, lang)
+        except CompileError:
+            ok = False
+
+        self._clean()
+        return ok
+
+    def search_cpp(self, pattern, body=None, headers=None, include_dirs=None, lang="c"):
+        """Construct a source file (just like 'try_cpp()'), run it through
+        the preprocessor, and return true if any line of the output matches
+        'pattern'.  'pattern' should either be a compiled regex object or a
+        string containing a regex.  If both 'body' and 'headers' are None,
+        preprocesses an empty file -- which can be useful to determine the
+        symbols the preprocessor and compiler set by default.
+        """
+        self._check_compiler()
+        src, out = self._preprocess(body, headers, include_dirs, lang)
+
+        if isinstance(pattern, str):
+            pattern = re.compile(pattern)
+
+        with open(out, encoding='utf-8') as file:
+            match = any(pattern.search(line) for line in file)
+
+        self._clean()
+        return match
+
+    def try_compile(self, body, headers=None, include_dirs=None, lang="c"):
+        """Try to compile a source file built from 'body' and 'headers'.
+        Return true on success, false otherwise.
+        """
+        from ..ccompiler import CompileError
+
+        self._check_compiler()
+        try:
+            self._compile(body, headers, include_dirs, lang)
+            ok = True
+        except CompileError:
+            ok = False
+
+        log.info(ok and "success!" or "failure.")
+        self._clean()
+        return ok
+
+    def try_link(
+        self,
+        body,
+        headers=None,
+        include_dirs=None,
+        libraries=None,
+        library_dirs=None,
+        lang="c",
+    ):
+        """Try to compile and link a source file, built from 'body' and
+        'headers', to executable form.  Return true on success, false
+        otherwise.
+        """
+        from ..ccompiler import CompileError, LinkError
+
+        self._check_compiler()
+        try:
+            self._link(body, headers, include_dirs, libraries, library_dirs, lang)
+            ok = True
+        except (CompileError, LinkError):
+            ok = False
+
+        log.info(ok and "success!" or "failure.")
+        self._clean()
+        return ok
+
+    def try_run(
+        self,
+        body,
+        headers=None,
+        include_dirs=None,
+        libraries=None,
+        library_dirs=None,
+        lang="c",
+    ):
+        """Try to compile, link to an executable, and run a program
+        built from 'body' and 'headers'.  Return true on success, false
+        otherwise.
+        """
+        from ..ccompiler import CompileError, LinkError
+
+        self._check_compiler()
+        try:
+            src, obj, exe = self._link(
+                body, headers, include_dirs, libraries, library_dirs, lang
+            )
+            self.spawn([exe])
+            ok = True
+        except (CompileError, LinkError, DistutilsExecError):
+            ok = False
+
+        log.info(ok and "success!" or "failure.")
+        self._clean()
+        return ok
+
+    # -- High-level methods --------------------------------------------
+    # (these are the ones that are actually likely to be useful
+    # when implementing a real-world config command!)
+
+    def check_func(
+        self,
+        func,
+        headers=None,
+        include_dirs=None,
+        libraries=None,
+        library_dirs=None,
+        decl=False,
+        call=False,
+    ):
+        """Determine if function 'func' is available by constructing a
+        source file that refers to 'func', and compiles and links it.
+        If everything succeeds, returns true; otherwise returns false.
+
+        The constructed source file starts out by including the header
+        files listed in 'headers'.  If 'decl' is true, it then declares
+        'func' (as "int func()"); you probably shouldn't supply 'headers'
+        and set 'decl' true in the same call, or you might get errors about
+        a conflicting declarations for 'func'.  Finally, the constructed
+        'main()' function either references 'func' or (if 'call' is true)
+        calls it.  'libraries' and 'library_dirs' are used when
+        linking.
+        """
+        self._check_compiler()
+        body = []
+        if decl:
+            body.append(f"int {func} ();")
+        body.append("int main () {")
+        if call:
+            body.append(f"  {func}();")
+        else:
+            body.append(f"  {func};")
+        body.append("}")
+        body = "\n".join(body) + "\n"
+
+        return self.try_link(body, headers, include_dirs, libraries, library_dirs)
+
+    def check_lib(
+        self,
+        library,
+        library_dirs=None,
+        headers=None,
+        include_dirs=None,
+        other_libraries: Sequence[str] = [],
+    ):
+        """Determine if 'library' is available to be linked against,
+        without actually checking that any particular symbols are provided
+        by it.  'headers' will be used in constructing the source file to
+        be compiled, but the only effect of this is to check if all the
+        header files listed are available.  Any libraries listed in
+        'other_libraries' will be included in the link, in case 'library'
+        has symbols that depend on other libraries.
+        """
+        self._check_compiler()
+        return self.try_link(
+            "int main (void) { }",
+            headers,
+            include_dirs,
+            [library] + list(other_libraries),
+            library_dirs,
+        )
+
+    def check_header(self, header, include_dirs=None, library_dirs=None, lang="c"):
+        """Determine if the system header file named by 'header_file'
+        exists and can be found by the preprocessor; return true if so,
+        false otherwise.
+        """
+        return self.try_cpp(
+            body="/* No body */", headers=[header], include_dirs=include_dirs
+        )
+
+
+def dump_file(filename, head=None):
+    """Dumps a file content into log.info.
+
+    If head is not None, will be dumped before the file content.
+    """
+    if head is None:
+        log.info('%s', filename)
+    else:
+        log.info(head)
+    log.info(pathlib.Path(filename).read_text(encoding='utf-8'))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install.py
new file mode 100644
index 0000000000000000000000000000000000000000..1fc09eef89f5a2f3e84227b5261fac858e09caff
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install.py
@@ -0,0 +1,810 @@
+"""distutils.command.install
+
+Implements the Distutils 'install' command."""
+
+import contextlib
+import itertools
+import os
+import sys
+import sysconfig
+from distutils._log import log
+from site import USER_BASE, USER_SITE
+
+from .. import _collections
+from ..core import Command
+from ..debug import DEBUG
+from ..errors import DistutilsOptionError, DistutilsPlatformError
+from ..file_util import write_file
+from ..sysconfig import get_config_vars
+from ..util import change_root, convert_path, get_platform, subst_vars
+from . import _framework_compat as fw
+
+HAS_USER_SITE = True
+
+WINDOWS_SCHEME = {
+    'purelib': '{base}/Lib/site-packages',
+    'platlib': '{base}/Lib/site-packages',
+    'headers': '{base}/Include/{dist_name}',
+    'scripts': '{base}/Scripts',
+    'data': '{base}',
+}
+
+INSTALL_SCHEMES = {
+    'posix_prefix': {
+        'purelib': '{base}/lib/{implementation_lower}{py_version_short}/site-packages',
+        'platlib': '{platbase}/{platlibdir}/{implementation_lower}'
+        '{py_version_short}/site-packages',
+        'headers': '{base}/include/{implementation_lower}'
+        '{py_version_short}{abiflags}/{dist_name}',
+        'scripts': '{base}/bin',
+        'data': '{base}',
+    },
+    'posix_home': {
+        'purelib': '{base}/lib/{implementation_lower}',
+        'platlib': '{base}/{platlibdir}/{implementation_lower}',
+        'headers': '{base}/include/{implementation_lower}/{dist_name}',
+        'scripts': '{base}/bin',
+        'data': '{base}',
+    },
+    'nt': WINDOWS_SCHEME,
+    'pypy': {
+        'purelib': '{base}/site-packages',
+        'platlib': '{base}/site-packages',
+        'headers': '{base}/include/{dist_name}',
+        'scripts': '{base}/bin',
+        'data': '{base}',
+    },
+    'pypy_nt': {
+        'purelib': '{base}/site-packages',
+        'platlib': '{base}/site-packages',
+        'headers': '{base}/include/{dist_name}',
+        'scripts': '{base}/Scripts',
+        'data': '{base}',
+    },
+}
+
+# user site schemes
+if HAS_USER_SITE:
+    INSTALL_SCHEMES['nt_user'] = {
+        'purelib': '{usersite}',
+        'platlib': '{usersite}',
+        'headers': '{userbase}/{implementation}{py_version_nodot_plat}'
+        '/Include/{dist_name}',
+        'scripts': '{userbase}/{implementation}{py_version_nodot_plat}/Scripts',
+        'data': '{userbase}',
+    }
+
+    INSTALL_SCHEMES['posix_user'] = {
+        'purelib': '{usersite}',
+        'platlib': '{usersite}',
+        'headers': '{userbase}/include/{implementation_lower}'
+        '{py_version_short}{abiflags}/{dist_name}',
+        'scripts': '{userbase}/bin',
+        'data': '{userbase}',
+    }
+
+
+INSTALL_SCHEMES.update(fw.schemes)
+
+
+# The keys to an installation scheme; if any new types of files are to be
+# installed, be sure to add an entry to every installation scheme above,
+# and to SCHEME_KEYS here.
+SCHEME_KEYS = ('purelib', 'platlib', 'headers', 'scripts', 'data')
+
+
+def _load_sysconfig_schemes():
+    with contextlib.suppress(AttributeError):
+        return {
+            scheme: sysconfig.get_paths(scheme, expand=False)
+            for scheme in sysconfig.get_scheme_names()
+        }
+
+
+def _load_schemes():
+    """
+    Extend default schemes with schemes from sysconfig.
+    """
+
+    sysconfig_schemes = _load_sysconfig_schemes() or {}
+
+    return {
+        scheme: {
+            **INSTALL_SCHEMES.get(scheme, {}),
+            **sysconfig_schemes.get(scheme, {}),
+        }
+        for scheme in set(itertools.chain(INSTALL_SCHEMES, sysconfig_schemes))
+    }
+
+
+def _get_implementation():
+    if hasattr(sys, 'pypy_version_info'):
+        return 'PyPy'
+    else:
+        return 'Python'
+
+
+def _select_scheme(ob, name):
+    scheme = _inject_headers(name, _load_scheme(_resolve_scheme(name)))
+    vars(ob).update(_remove_set(ob, _scheme_attrs(scheme)))
+
+
+def _remove_set(ob, attrs):
+    """
+    Include only attrs that are None in ob.
+    """
+    return {key: value for key, value in attrs.items() if getattr(ob, key) is None}
+
+
+def _resolve_scheme(name):
+    os_name, sep, key = name.partition('_')
+    try:
+        resolved = sysconfig.get_preferred_scheme(key)
+    except Exception:
+        resolved = fw.scheme(_pypy_hack(name))
+    return resolved
+
+
+def _load_scheme(name):
+    return _load_schemes()[name]
+
+
+def _inject_headers(name, scheme):
+    """
+    Given a scheme name and the resolved scheme,
+    if the scheme does not include headers, resolve
+    the fallback scheme for the name and use headers
+    from it. pypa/distutils#88
+    """
+    # Bypass the preferred scheme, which may not
+    # have defined headers.
+    fallback = _load_scheme(_pypy_hack(name))
+    scheme.setdefault('headers', fallback['headers'])
+    return scheme
+
+
+def _scheme_attrs(scheme):
+    """Resolve install directories by applying the install schemes."""
+    return {f'install_{key}': scheme[key] for key in SCHEME_KEYS}
+
+
+def _pypy_hack(name):
+    PY37 = sys.version_info < (3, 8)
+    old_pypy = hasattr(sys, 'pypy_version_info') and PY37
+    prefix = not name.endswith(('_user', '_home'))
+    pypy_name = 'pypy' + '_nt' * (os.name == 'nt')
+    return pypy_name if old_pypy and prefix else name
+
+
+class install(Command):
+    description = "install everything from build directory"
+
+    user_options = [
+        # Select installation scheme and set base director(y|ies)
+        ('prefix=', None, "installation prefix"),
+        ('exec-prefix=', None, "(Unix only) prefix for platform-specific files"),
+        ('home=', None, "(Unix only) home directory to install under"),
+        # Or, just set the base director(y|ies)
+        (
+            'install-base=',
+            None,
+            "base installation directory (instead of --prefix or --home)",
+        ),
+        (
+            'install-platbase=',
+            None,
+            "base installation directory for platform-specific files (instead of --exec-prefix or --home)",
+        ),
+        ('root=', None, "install everything relative to this alternate root directory"),
+        # Or, explicitly set the installation scheme
+        (
+            'install-purelib=',
+            None,
+            "installation directory for pure Python module distributions",
+        ),
+        (
+            'install-platlib=',
+            None,
+            "installation directory for non-pure module distributions",
+        ),
+        (
+            'install-lib=',
+            None,
+            "installation directory for all module distributions (overrides --install-purelib and --install-platlib)",
+        ),
+        ('install-headers=', None, "installation directory for C/C++ headers"),
+        ('install-scripts=', None, "installation directory for Python scripts"),
+        ('install-data=', None, "installation directory for data files"),
+        # Byte-compilation options -- see install_lib.py for details, as
+        # these are duplicated from there (but only install_lib does
+        # anything with them).
+        ('compile', 'c', "compile .py to .pyc [default]"),
+        ('no-compile', None, "don't compile .py files"),
+        (
+            'optimize=',
+            'O',
+            "also compile with optimization: -O1 for \"python -O\", "
+            "-O2 for \"python -OO\", and -O0 to disable [default: -O0]",
+        ),
+        # Miscellaneous control options
+        ('force', 'f', "force installation (overwrite any existing files)"),
+        ('skip-build', None, "skip rebuilding everything (for testing/debugging)"),
+        # Where to install documentation (eventually!)
+        # ('doc-format=', None, "format of documentation to generate"),
+        # ('install-man=', None, "directory for Unix man pages"),
+        # ('install-html=', None, "directory for HTML documentation"),
+        # ('install-info=', None, "directory for GNU info files"),
+        ('record=', None, "filename in which to record list of installed files"),
+    ]
+
+    boolean_options = ['compile', 'force', 'skip-build']
+
+    if HAS_USER_SITE:
+        user_options.append((
+            'user',
+            None,
+            f"install in user site-package '{USER_SITE}'",
+        ))
+        boolean_options.append('user')
+
+    negative_opt = {'no-compile': 'compile'}
+
+    def initialize_options(self):
+        """Initializes options."""
+        # High-level options: these select both an installation base
+        # and scheme.
+        self.prefix = None
+        self.exec_prefix = None
+        self.home = None
+        self.user = False
+
+        # These select only the installation base; it's up to the user to
+        # specify the installation scheme (currently, that means supplying
+        # the --install-{platlib,purelib,scripts,data} options).
+        self.install_base = None
+        self.install_platbase = None
+        self.root = None
+
+        # These options are the actual installation directories; if not
+        # supplied by the user, they are filled in using the installation
+        # scheme implied by prefix/exec-prefix/home and the contents of
+        # that installation scheme.
+        self.install_purelib = None  # for pure module distributions
+        self.install_platlib = None  # non-pure (dists w/ extensions)
+        self.install_headers = None  # for C/C++ headers
+        self.install_lib = None  # set to either purelib or platlib
+        self.install_scripts = None
+        self.install_data = None
+        self.install_userbase = USER_BASE
+        self.install_usersite = USER_SITE
+
+        self.compile = None
+        self.optimize = None
+
+        # Deprecated
+        # These two are for putting non-packagized distributions into their
+        # own directory and creating a .pth file if it makes sense.
+        # 'extra_path' comes from the setup file; 'install_path_file' can
+        # be turned off if it makes no sense to install a .pth file.  (But
+        # better to install it uselessly than to guess wrong and not
+        # install it when it's necessary and would be used!)  Currently,
+        # 'install_path_file' is always true unless some outsider meddles
+        # with it.
+        self.extra_path = None
+        self.install_path_file = True
+
+        # 'force' forces installation, even if target files are not
+        # out-of-date.  'skip_build' skips running the "build" command,
+        # handy if you know it's not necessary.  'warn_dir' (which is *not*
+        # a user option, it's just there so the bdist_* commands can turn
+        # it off) determines whether we warn about installing to a
+        # directory not in sys.path.
+        self.force = False
+        self.skip_build = False
+        self.warn_dir = True
+
+        # These are only here as a conduit from the 'build' command to the
+        # 'install_*' commands that do the real work.  ('build_base' isn't
+        # actually used anywhere, but it might be useful in future.)  They
+        # are not user options, because if the user told the install
+        # command where the build directory is, that wouldn't affect the
+        # build command.
+        self.build_base = None
+        self.build_lib = None
+
+        # Not defined yet because we don't know anything about
+        # documentation yet.
+        # self.install_man = None
+        # self.install_html = None
+        # self.install_info = None
+
+        self.record = None
+
+    # -- Option finalizing methods -------------------------------------
+    # (This is rather more involved than for most commands,
+    # because this is where the policy for installing third-
+    # party Python modules on various platforms given a wide
+    # array of user input is decided.  Yes, it's quite complex!)
+
+    def finalize_options(self):  # noqa: C901
+        """Finalizes options."""
+        # This method (and its helpers, like 'finalize_unix()',
+        # 'finalize_other()', and 'select_scheme()') is where the default
+        # installation directories for modules, extension modules, and
+        # anything else we care to install from a Python module
+        # distribution.  Thus, this code makes a pretty important policy
+        # statement about how third-party stuff is added to a Python
+        # installation!  Note that the actual work of installation is done
+        # by the relatively simple 'install_*' commands; they just take
+        # their orders from the installation directory options determined
+        # here.
+
+        # Check for errors/inconsistencies in the options; first, stuff
+        # that's wrong on any platform.
+
+        if (self.prefix or self.exec_prefix or self.home) and (
+            self.install_base or self.install_platbase
+        ):
+            raise DistutilsOptionError(
+                "must supply either prefix/exec-prefix/home or install-base/install-platbase -- not both"
+            )
+
+        if self.home and (self.prefix or self.exec_prefix):
+            raise DistutilsOptionError(
+                "must supply either home or prefix/exec-prefix -- not both"
+            )
+
+        if self.user and (
+            self.prefix
+            or self.exec_prefix
+            or self.home
+            or self.install_base
+            or self.install_platbase
+        ):
+            raise DistutilsOptionError(
+                "can't combine user with prefix, "
+                "exec_prefix/home, or install_(plat)base"
+            )
+
+        # Next, stuff that's wrong (or dubious) only on certain platforms.
+        if os.name != "posix":
+            if self.exec_prefix:
+                self.warn("exec-prefix option ignored on this platform")
+                self.exec_prefix = None
+
+        # Now the interesting logic -- so interesting that we farm it out
+        # to other methods.  The goal of these methods is to set the final
+        # values for the install_{lib,scripts,data,...}  options, using as
+        # input a heady brew of prefix, exec_prefix, home, install_base,
+        # install_platbase, user-supplied versions of
+        # install_{purelib,platlib,lib,scripts,data,...}, and the
+        # install schemes.  Phew!
+
+        self.dump_dirs("pre-finalize_{unix,other}")
+
+        if os.name == 'posix':
+            self.finalize_unix()
+        else:
+            self.finalize_other()
+
+        self.dump_dirs("post-finalize_{unix,other}()")
+
+        # Expand configuration variables, tilde, etc. in self.install_base
+        # and self.install_platbase -- that way, we can use $base or
+        # $platbase in the other installation directories and not worry
+        # about needing recursive variable expansion (shudder).
+
+        py_version = sys.version.split()[0]
+        (prefix, exec_prefix) = get_config_vars('prefix', 'exec_prefix')
+        try:
+            abiflags = sys.abiflags
+        except AttributeError:
+            # sys.abiflags may not be defined on all platforms.
+            abiflags = ''
+        local_vars = {
+            'dist_name': self.distribution.get_name(),
+            'dist_version': self.distribution.get_version(),
+            'dist_fullname': self.distribution.get_fullname(),
+            'py_version': py_version,
+            'py_version_short': '%d.%d' % sys.version_info[:2],
+            'py_version_nodot': '%d%d' % sys.version_info[:2],
+            'sys_prefix': prefix,
+            'prefix': prefix,
+            'sys_exec_prefix': exec_prefix,
+            'exec_prefix': exec_prefix,
+            'abiflags': abiflags,
+            'platlibdir': getattr(sys, 'platlibdir', 'lib'),
+            'implementation_lower': _get_implementation().lower(),
+            'implementation': _get_implementation(),
+        }
+
+        # vars for compatibility on older Pythons
+        compat_vars = dict(
+            # Python 3.9 and earlier
+            py_version_nodot_plat=getattr(sys, 'winver', '').replace('.', ''),
+        )
+
+        if HAS_USER_SITE:
+            local_vars['userbase'] = self.install_userbase
+            local_vars['usersite'] = self.install_usersite
+
+        self.config_vars = _collections.DictStack([
+            fw.vars(),
+            compat_vars,
+            sysconfig.get_config_vars(),
+            local_vars,
+        ])
+
+        self.expand_basedirs()
+
+        self.dump_dirs("post-expand_basedirs()")
+
+        # Now define config vars for the base directories so we can expand
+        # everything else.
+        local_vars['base'] = self.install_base
+        local_vars['platbase'] = self.install_platbase
+
+        if DEBUG:
+            from pprint import pprint
+
+            print("config vars:")
+            pprint(dict(self.config_vars))
+
+        # Expand "~" and configuration variables in the installation
+        # directories.
+        self.expand_dirs()
+
+        self.dump_dirs("post-expand_dirs()")
+
+        # Create directories in the home dir:
+        if self.user:
+            self.create_home_path()
+
+        # Pick the actual directory to install all modules to: either
+        # install_purelib or install_platlib, depending on whether this
+        # module distribution is pure or not.  Of course, if the user
+        # already specified install_lib, use their selection.
+        if self.install_lib is None:
+            if self.distribution.has_ext_modules():  # has extensions: non-pure
+                self.install_lib = self.install_platlib
+            else:
+                self.install_lib = self.install_purelib
+
+        # Convert directories from Unix /-separated syntax to the local
+        # convention.
+        self.convert_paths(
+            'lib',
+            'purelib',
+            'platlib',
+            'scripts',
+            'data',
+            'headers',
+            'userbase',
+            'usersite',
+        )
+
+        # Deprecated
+        # Well, we're not actually fully completely finalized yet: we still
+        # have to deal with 'extra_path', which is the hack for allowing
+        # non-packagized module distributions (hello, Numerical Python!) to
+        # get their own directories.
+        self.handle_extra_path()
+        self.install_libbase = self.install_lib  # needed for .pth file
+        self.install_lib = os.path.join(self.install_lib, self.extra_dirs)
+
+        # If a new root directory was supplied, make all the installation
+        # dirs relative to it.
+        if self.root is not None:
+            self.change_roots(
+                'libbase', 'lib', 'purelib', 'platlib', 'scripts', 'data', 'headers'
+            )
+
+        self.dump_dirs("after prepending root")
+
+        # Find out the build directories, ie. where to install from.
+        self.set_undefined_options(
+            'build', ('build_base', 'build_base'), ('build_lib', 'build_lib')
+        )
+
+        # Punt on doc directories for now -- after all, we're punting on
+        # documentation completely!
+
+    def dump_dirs(self, msg):
+        """Dumps the list of user options."""
+        if not DEBUG:
+            return
+        from ..fancy_getopt import longopt_xlate
+
+        log.debug(msg + ":")
+        for opt in self.user_options:
+            opt_name = opt[0]
+            if opt_name[-1] == "=":
+                opt_name = opt_name[0:-1]
+            if opt_name in self.negative_opt:
+                opt_name = self.negative_opt[opt_name]
+                opt_name = opt_name.translate(longopt_xlate)
+                val = not getattr(self, opt_name)
+            else:
+                opt_name = opt_name.translate(longopt_xlate)
+                val = getattr(self, opt_name)
+            log.debug("  %s: %s", opt_name, val)
+
+    def finalize_unix(self):
+        """Finalizes options for posix platforms."""
+        if self.install_base is not None or self.install_platbase is not None:
+            incomplete_scheme = (
+                (
+                    self.install_lib is None
+                    and self.install_purelib is None
+                    and self.install_platlib is None
+                )
+                or self.install_headers is None
+                or self.install_scripts is None
+                or self.install_data is None
+            )
+            if incomplete_scheme:
+                raise DistutilsOptionError(
+                    "install-base or install-platbase supplied, but "
+                    "installation scheme is incomplete"
+                )
+            return
+
+        if self.user:
+            if self.install_userbase is None:
+                raise DistutilsPlatformError("User base directory is not specified")
+            self.install_base = self.install_platbase = self.install_userbase
+            self.select_scheme("posix_user")
+        elif self.home is not None:
+            self.install_base = self.install_platbase = self.home
+            self.select_scheme("posix_home")
+        else:
+            if self.prefix is None:
+                if self.exec_prefix is not None:
+                    raise DistutilsOptionError(
+                        "must not supply exec-prefix without prefix"
+                    )
+
+                # Allow Fedora to add components to the prefix
+                _prefix_addition = getattr(sysconfig, '_prefix_addition', "")
+
+                self.prefix = os.path.normpath(sys.prefix) + _prefix_addition
+                self.exec_prefix = os.path.normpath(sys.exec_prefix) + _prefix_addition
+
+            else:
+                if self.exec_prefix is None:
+                    self.exec_prefix = self.prefix
+
+            self.install_base = self.prefix
+            self.install_platbase = self.exec_prefix
+            self.select_scheme("posix_prefix")
+
+    def finalize_other(self):
+        """Finalizes options for non-posix platforms"""
+        if self.user:
+            if self.install_userbase is None:
+                raise DistutilsPlatformError("User base directory is not specified")
+            self.install_base = self.install_platbase = self.install_userbase
+            self.select_scheme(os.name + "_user")
+        elif self.home is not None:
+            self.install_base = self.install_platbase = self.home
+            self.select_scheme("posix_home")
+        else:
+            if self.prefix is None:
+                self.prefix = os.path.normpath(sys.prefix)
+
+            self.install_base = self.install_platbase = self.prefix
+            try:
+                self.select_scheme(os.name)
+            except KeyError:
+                raise DistutilsPlatformError(
+                    f"I don't know how to install stuff on '{os.name}'"
+                )
+
+    def select_scheme(self, name):
+        _select_scheme(self, name)
+
+    def _expand_attrs(self, attrs):
+        for attr in attrs:
+            val = getattr(self, attr)
+            if val is not None:
+                if os.name in ('posix', 'nt'):
+                    val = os.path.expanduser(val)
+                val = subst_vars(val, self.config_vars)
+                setattr(self, attr, val)
+
+    def expand_basedirs(self):
+        """Calls `os.path.expanduser` on install_base, install_platbase and
+        root."""
+        self._expand_attrs(['install_base', 'install_platbase', 'root'])
+
+    def expand_dirs(self):
+        """Calls `os.path.expanduser` on install dirs."""
+        self._expand_attrs([
+            'install_purelib',
+            'install_platlib',
+            'install_lib',
+            'install_headers',
+            'install_scripts',
+            'install_data',
+        ])
+
+    def convert_paths(self, *names):
+        """Call `convert_path` over `names`."""
+        for name in names:
+            attr = "install_" + name
+            setattr(self, attr, convert_path(getattr(self, attr)))
+
+    def handle_extra_path(self):
+        """Set `path_file` and `extra_dirs` using `extra_path`."""
+        if self.extra_path is None:
+            self.extra_path = self.distribution.extra_path
+
+        if self.extra_path is not None:
+            log.warning(
+                "Distribution option extra_path is deprecated. "
+                "See issue27919 for details."
+            )
+            if isinstance(self.extra_path, str):
+                self.extra_path = self.extra_path.split(',')
+
+            if len(self.extra_path) == 1:
+                path_file = extra_dirs = self.extra_path[0]
+            elif len(self.extra_path) == 2:
+                path_file, extra_dirs = self.extra_path
+            else:
+                raise DistutilsOptionError(
+                    "'extra_path' option must be a list, tuple, or "
+                    "comma-separated string with 1 or 2 elements"
+                )
+
+            # convert to local form in case Unix notation used (as it
+            # should be in setup scripts)
+            extra_dirs = convert_path(extra_dirs)
+        else:
+            path_file = None
+            extra_dirs = ''
+
+        # XXX should we warn if path_file and not extra_dirs? (in which
+        # case the path file would be harmless but pointless)
+        self.path_file = path_file
+        self.extra_dirs = extra_dirs
+
+    def change_roots(self, *names):
+        """Change the install directories pointed by name using root."""
+        for name in names:
+            attr = "install_" + name
+            setattr(self, attr, change_root(self.root, getattr(self, attr)))
+
+    def create_home_path(self):
+        """Create directories under ~."""
+        if not self.user:
+            return
+        home = convert_path(os.path.expanduser("~"))
+        for _name, path in self.config_vars.items():
+            if str(path).startswith(home) and not os.path.isdir(path):
+                self.debug_print(f"os.makedirs('{path}', 0o700)")
+                os.makedirs(path, 0o700)
+
+    # -- Command execution methods -------------------------------------
+
+    def run(self):
+        """Runs the command."""
+        # Obviously have to build before we can install
+        if not self.skip_build:
+            self.run_command('build')
+            # If we built for any other platform, we can't install.
+            build_plat = self.distribution.get_command_obj('build').plat_name
+            # check warn_dir - it is a clue that the 'install' is happening
+            # internally, and not to sys.path, so we don't check the platform
+            # matches what we are running.
+            if self.warn_dir and build_plat != get_platform():
+                raise DistutilsPlatformError("Can't install when cross-compiling")
+
+        # Run all sub-commands (at least those that need to be run)
+        for cmd_name in self.get_sub_commands():
+            self.run_command(cmd_name)
+
+        if self.path_file:
+            self.create_path_file()
+
+        # write list of installed files, if requested.
+        if self.record:
+            outputs = self.get_outputs()
+            if self.root:  # strip any package prefix
+                root_len = len(self.root)
+                for counter in range(len(outputs)):
+                    outputs[counter] = outputs[counter][root_len:]
+            self.execute(
+                write_file,
+                (self.record, outputs),
+                f"writing list of installed files to '{self.record}'",
+            )
+
+        sys_path = map(os.path.normpath, sys.path)
+        sys_path = map(os.path.normcase, sys_path)
+        install_lib = os.path.normcase(os.path.normpath(self.install_lib))
+        if (
+            self.warn_dir
+            and not (self.path_file and self.install_path_file)
+            and install_lib not in sys_path
+        ):
+            log.debug(
+                (
+                    "modules installed to '%s', which is not in "
+                    "Python's module search path (sys.path) -- "
+                    "you'll have to change the search path yourself"
+                ),
+                self.install_lib,
+            )
+
+    def create_path_file(self):
+        """Creates the .pth file"""
+        filename = os.path.join(self.install_libbase, self.path_file + ".pth")
+        if self.install_path_file:
+            self.execute(
+                write_file, (filename, [self.extra_dirs]), f"creating {filename}"
+            )
+        else:
+            self.warn(f"path file '{filename}' not created")
+
+    # -- Reporting methods ---------------------------------------------
+
+    def get_outputs(self):
+        """Assembles the outputs of all the sub-commands."""
+        outputs = []
+        for cmd_name in self.get_sub_commands():
+            cmd = self.get_finalized_command(cmd_name)
+            # Add the contents of cmd.get_outputs(), ensuring
+            # that outputs doesn't contain duplicate entries
+            for filename in cmd.get_outputs():
+                if filename not in outputs:
+                    outputs.append(filename)
+
+        if self.path_file and self.install_path_file:
+            outputs.append(os.path.join(self.install_libbase, self.path_file + ".pth"))
+
+        return outputs
+
+    def get_inputs(self):
+        """Returns the inputs of all the sub-commands"""
+        # XXX gee, this looks familiar ;-(
+        inputs = []
+        for cmd_name in self.get_sub_commands():
+            cmd = self.get_finalized_command(cmd_name)
+            inputs.extend(cmd.get_inputs())
+
+        return inputs
+
+    # -- Predicates for sub-command list -------------------------------
+
+    def has_lib(self):
+        """Returns true if the current distribution has any Python
+        modules to install."""
+        return (
+            self.distribution.has_pure_modules() or self.distribution.has_ext_modules()
+        )
+
+    def has_headers(self):
+        """Returns true if the current distribution has any headers to
+        install."""
+        return self.distribution.has_headers()
+
+    def has_scripts(self):
+        """Returns true if the current distribution has any scripts to.
+        install."""
+        return self.distribution.has_scripts()
+
+    def has_data(self):
+        """Returns true if the current distribution has any data to.
+        install."""
+        return self.distribution.has_data_files()
+
+    # 'sub_commands': a list of commands this command might have to run to
+    # get its work done.  See cmd.py for more info.
+    sub_commands = [
+        ('install_lib', has_lib),
+        ('install_headers', has_headers),
+        ('install_scripts', has_scripts),
+        ('install_data', has_data),
+        ('install_egg_info', lambda self: True),
+    ]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_data.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..624c0b901b730d17653fc8ebc57a556023066f70
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_data.py
@@ -0,0 +1,84 @@
+"""distutils.command.install_data
+
+Implements the Distutils 'install_data' command, for installing
+platform-independent data files."""
+
+# contributed by Bastian Kleineidam
+
+import os
+
+from ..core import Command
+from ..util import change_root, convert_path
+
+
+class install_data(Command):
+    description = "install data files"
+
+    user_options = [
+        (
+            'install-dir=',
+            'd',
+            "base directory for installing data files "
+            "[default: installation base dir]",
+        ),
+        ('root=', None, "install everything relative to this alternate root directory"),
+        ('force', 'f', "force installation (overwrite existing files)"),
+    ]
+
+    boolean_options = ['force']
+
+    def initialize_options(self):
+        self.install_dir = None
+        self.outfiles = []
+        self.root = None
+        self.force = False
+        self.data_files = self.distribution.data_files
+        self.warn_dir = True
+
+    def finalize_options(self):
+        self.set_undefined_options(
+            'install',
+            ('install_data', 'install_dir'),
+            ('root', 'root'),
+            ('force', 'force'),
+        )
+
+    def run(self):
+        self.mkpath(self.install_dir)
+        for f in self.data_files:
+            if isinstance(f, str):
+                # it's a simple file, so copy it
+                f = convert_path(f)
+                if self.warn_dir:
+                    self.warn(
+                        "setup script did not provide a directory for "
+                        f"'{f}' -- installing right in '{self.install_dir}'"
+                    )
+                (out, _) = self.copy_file(f, self.install_dir)
+                self.outfiles.append(out)
+            else:
+                # it's a tuple with path to install to and a list of files
+                dir = convert_path(f[0])
+                if not os.path.isabs(dir):
+                    dir = os.path.join(self.install_dir, dir)
+                elif self.root:
+                    dir = change_root(self.root, dir)
+                self.mkpath(dir)
+
+                if f[1] == []:
+                    # If there are no files listed, the user must be
+                    # trying to create an empty directory, so add the
+                    # directory to the list of output files.
+                    self.outfiles.append(dir)
+                else:
+                    # Copy files, adding them to the list of output files.
+                    for data in f[1]:
+                        data = convert_path(data)
+                        (out, _) = self.copy_file(data, dir)
+                        self.outfiles.append(out)
+
+    def get_inputs(self):
+        return self.data_files or []
+
+    def get_outputs(self):
+        return self.outfiles
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_egg_info.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_egg_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fbb3440abc340feb68123e91cdd70fafea1ae9b
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_egg_info.py
@@ -0,0 +1,92 @@
+"""
+distutils.command.install_egg_info
+
+Implements the Distutils 'install_egg_info' command, for installing
+a package's PKG-INFO metadata.
+"""
+
+import os
+import re
+import sys
+
+from .. import dir_util
+from .._log import log
+from ..cmd import Command
+
+
+class install_egg_info(Command):
+    """Install an .egg-info file for the package"""
+
+    description = "Install package's PKG-INFO metadata as an .egg-info file"
+    user_options = [
+        ('install-dir=', 'd', "directory to install to"),
+    ]
+
+    def initialize_options(self):
+        self.install_dir = None
+
+    @property
+    def basename(self):
+        """
+        Allow basename to be overridden by child class.
+        Ref pypa/distutils#2.
+        """
+        return "%s-%s-py%d.%d.egg-info" % (
+            to_filename(safe_name(self.distribution.get_name())),
+            to_filename(safe_version(self.distribution.get_version())),
+            *sys.version_info[:2],
+        )
+
+    def finalize_options(self):
+        self.set_undefined_options('install_lib', ('install_dir', 'install_dir'))
+        self.target = os.path.join(self.install_dir, self.basename)
+        self.outputs = [self.target]
+
+    def run(self):
+        target = self.target
+        if os.path.isdir(target) and not os.path.islink(target):
+            dir_util.remove_tree(target, dry_run=self.dry_run)
+        elif os.path.exists(target):
+            self.execute(os.unlink, (self.target,), "Removing " + target)
+        elif not os.path.isdir(self.install_dir):
+            self.execute(
+                os.makedirs, (self.install_dir,), "Creating " + self.install_dir
+            )
+        log.info("Writing %s", target)
+        if not self.dry_run:
+            with open(target, 'w', encoding='UTF-8') as f:
+                self.distribution.metadata.write_pkg_file(f)
+
+    def get_outputs(self):
+        return self.outputs
+
+
+# The following routines are taken from setuptools' pkg_resources module and
+# can be replaced by importing them from pkg_resources once it is included
+# in the stdlib.
+
+
+def safe_name(name):
+    """Convert an arbitrary string to a standard distribution name
+
+    Any runs of non-alphanumeric/. characters are replaced with a single '-'.
+    """
+    return re.sub('[^A-Za-z0-9.]+', '-', name)
+
+
+def safe_version(version):
+    """Convert an arbitrary string to a standard version string
+
+    Spaces become dots, and all other non-alphanumeric characters become
+    dashes, with runs of multiple dashes condensed to a single dash.
+    """
+    version = version.replace(' ', '.')
+    return re.sub('[^A-Za-z0-9.]+', '-', version)
+
+
+def to_filename(name):
+    """Convert a project or version name to its filename-escaped form
+
+    Any '-' characters are currently replaced with '_'.
+    """
+    return name.replace('-', '_')
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_headers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_headers.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbb3b242ea09bf1e50fa1a582ac8ee50016889ab
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_headers.py
@@ -0,0 +1,44 @@
+"""distutils.command.install_headers
+
+Implements the Distutils 'install_headers' command, to install C/C++ header
+files to the Python include directory."""
+
+from ..core import Command
+
+
+# XXX force is never used
+class install_headers(Command):
+    description = "install C/C++ header files"
+
+    user_options = [
+        ('install-dir=', 'd', "directory to install header files to"),
+        ('force', 'f', "force installation (overwrite existing files)"),
+    ]
+
+    boolean_options = ['force']
+
+    def initialize_options(self):
+        self.install_dir = None
+        self.force = False
+        self.outfiles = []
+
+    def finalize_options(self):
+        self.set_undefined_options(
+            'install', ('install_headers', 'install_dir'), ('force', 'force')
+        )
+
+    def run(self):
+        headers = self.distribution.headers
+        if not headers:
+            return
+
+        self.mkpath(self.install_dir)
+        for header in headers:
+            (out, _) = self.copy_file(header, self.install_dir)
+            self.outfiles.append(out)
+
+    def get_inputs(self):
+        return self.distribution.headers or []
+
+    def get_outputs(self):
+        return self.outfiles
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_lib.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_lib.py
new file mode 100644
index 0000000000000000000000000000000000000000..54a12d38a81d303a51b56f8d1537634b77564070
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_lib.py
@@ -0,0 +1,236 @@
+"""distutils.command.install_lib
+
+Implements the Distutils 'install_lib' command
+(install all Python modules)."""
+
+import importlib.util
+import os
+import sys
+
+from ..core import Command
+from ..errors import DistutilsOptionError
+
+# Extension for Python source files.
+PYTHON_SOURCE_EXTENSION = ".py"
+
+
+class install_lib(Command):
+    description = "install all Python modules (extensions and pure Python)"
+
+    # The byte-compilation options are a tad confusing.  Here are the
+    # possible scenarios:
+    #   1) no compilation at all (--no-compile --no-optimize)
+    #   2) compile .pyc only (--compile --no-optimize; default)
+    #   3) compile .pyc and "opt-1" .pyc (--compile --optimize)
+    #   4) compile "opt-1" .pyc only (--no-compile --optimize)
+    #   5) compile .pyc and "opt-2" .pyc (--compile --optimize-more)
+    #   6) compile "opt-2" .pyc only (--no-compile --optimize-more)
+    #
+    # The UI for this is two options, 'compile' and 'optimize'.
+    # 'compile' is strictly boolean, and only decides whether to
+    # generate .pyc files.  'optimize' is three-way (0, 1, or 2), and
+    # decides both whether to generate .pyc files and what level of
+    # optimization to use.
+
+    user_options = [
+        ('install-dir=', 'd', "directory to install to"),
+        ('build-dir=', 'b', "build directory (where to install from)"),
+        ('force', 'f', "force installation (overwrite existing files)"),
+        ('compile', 'c', "compile .py to .pyc [default]"),
+        ('no-compile', None, "don't compile .py files"),
+        (
+            'optimize=',
+            'O',
+            "also compile with optimization: -O1 for \"python -O\", "
+            "-O2 for \"python -OO\", and -O0 to disable [default: -O0]",
+        ),
+        ('skip-build', None, "skip the build steps"),
+    ]
+
+    boolean_options = ['force', 'compile', 'skip-build']
+    negative_opt = {'no-compile': 'compile'}
+
+    def initialize_options(self):
+        # let the 'install' command dictate our installation directory
+        self.install_dir = None
+        self.build_dir = None
+        self.force = False
+        self.compile = None
+        self.optimize = None
+        self.skip_build = None
+
+    def finalize_options(self):
+        # Get all the information we need to install pure Python modules
+        # from the umbrella 'install' command -- build (source) directory,
+        # install (target) directory, and whether to compile .py files.
+        self.set_undefined_options(
+            'install',
+            ('build_lib', 'build_dir'),
+            ('install_lib', 'install_dir'),
+            ('force', 'force'),
+            ('compile', 'compile'),
+            ('optimize', 'optimize'),
+            ('skip_build', 'skip_build'),
+        )
+
+        if self.compile is None:
+            self.compile = True
+        if self.optimize is None:
+            self.optimize = False
+
+        if not isinstance(self.optimize, int):
+            try:
+                self.optimize = int(self.optimize)
+                if self.optimize not in (0, 1, 2):
+                    raise AssertionError
+            except (ValueError, AssertionError):
+                raise DistutilsOptionError("optimize must be 0, 1, or 2")
+
+    def run(self):
+        # Make sure we have built everything we need first
+        self.build()
+
+        # Install everything: simply dump the entire contents of the build
+        # directory to the installation directory (that's the beauty of
+        # having a build directory!)
+        outfiles = self.install()
+
+        # (Optionally) compile .py to .pyc
+        if outfiles is not None and self.distribution.has_pure_modules():
+            self.byte_compile(outfiles)
+
+    # -- Top-level worker functions ------------------------------------
+    # (called from 'run()')
+
+    def build(self):
+        if not self.skip_build:
+            if self.distribution.has_pure_modules():
+                self.run_command('build_py')
+            if self.distribution.has_ext_modules():
+                self.run_command('build_ext')
+
+    def install(self):
+        if os.path.isdir(self.build_dir):
+            outfiles = self.copy_tree(self.build_dir, self.install_dir)
+        else:
+            self.warn(
+                f"'{self.build_dir}' does not exist -- no Python modules to install"
+            )
+            return
+        return outfiles
+
+    def byte_compile(self, files):
+        if sys.dont_write_bytecode:
+            self.warn('byte-compiling is disabled, skipping.')
+            return
+
+        from ..util import byte_compile
+
+        # Get the "--root" directory supplied to the "install" command,
+        # and use it as a prefix to strip off the purported filename
+        # encoded in bytecode files.  This is far from complete, but it
+        # should at least generate usable bytecode in RPM distributions.
+        install_root = self.get_finalized_command('install').root
+
+        if self.compile:
+            byte_compile(
+                files,
+                optimize=0,
+                force=self.force,
+                prefix=install_root,
+                dry_run=self.dry_run,
+            )
+        if self.optimize > 0:
+            byte_compile(
+                files,
+                optimize=self.optimize,
+                force=self.force,
+                prefix=install_root,
+                verbose=self.verbose,
+                dry_run=self.dry_run,
+            )
+
+    # -- Utility methods -----------------------------------------------
+
+    def _mutate_outputs(self, has_any, build_cmd, cmd_option, output_dir):
+        if not has_any:
+            return []
+
+        build_cmd = self.get_finalized_command(build_cmd)
+        build_files = build_cmd.get_outputs()
+        build_dir = getattr(build_cmd, cmd_option)
+
+        prefix_len = len(build_dir) + len(os.sep)
+        outputs = []
+        for file in build_files:
+            outputs.append(os.path.join(output_dir, file[prefix_len:]))
+
+        return outputs
+
+    def _bytecode_filenames(self, py_filenames):
+        bytecode_files = []
+        for py_file in py_filenames:
+            # Since build_py handles package data installation, the
+            # list of outputs can contain more than just .py files.
+            # Make sure we only report bytecode for the .py files.
+            ext = os.path.splitext(os.path.normcase(py_file))[1]
+            if ext != PYTHON_SOURCE_EXTENSION:
+                continue
+            if self.compile:
+                bytecode_files.append(
+                    importlib.util.cache_from_source(py_file, optimization='')
+                )
+            if self.optimize > 0:
+                bytecode_files.append(
+                    importlib.util.cache_from_source(
+                        py_file, optimization=self.optimize
+                    )
+                )
+
+        return bytecode_files
+
+    # -- External interface --------------------------------------------
+    # (called by outsiders)
+
+    def get_outputs(self):
+        """Return the list of files that would be installed if this command
+        were actually run.  Not affected by the "dry-run" flag or whether
+        modules have actually been built yet.
+        """
+        pure_outputs = self._mutate_outputs(
+            self.distribution.has_pure_modules(),
+            'build_py',
+            'build_lib',
+            self.install_dir,
+        )
+        if self.compile:
+            bytecode_outputs = self._bytecode_filenames(pure_outputs)
+        else:
+            bytecode_outputs = []
+
+        ext_outputs = self._mutate_outputs(
+            self.distribution.has_ext_modules(),
+            'build_ext',
+            'build_lib',
+            self.install_dir,
+        )
+
+        return pure_outputs + bytecode_outputs + ext_outputs
+
+    def get_inputs(self):
+        """Get the list of files that are input to this command, ie. the
+        files that get installed as they are named in the build tree.
+        The files in this list correspond one-to-one to the output
+        filenames returned by 'get_outputs()'.
+        """
+        inputs = []
+
+        if self.distribution.has_pure_modules():
+            build_py = self.get_finalized_command('build_py')
+            inputs.extend(build_py.get_outputs())
+
+        if self.distribution.has_ext_modules():
+            build_ext = self.get_finalized_command('build_ext')
+            inputs.extend(build_ext.get_outputs())
+
+        return inputs
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_scripts.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_scripts.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb43387fb87761a070cd8899830a4bce6d384f9e
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/install_scripts.py
@@ -0,0 +1,61 @@
+"""distutils.command.install_scripts
+
+Implements the Distutils 'install_scripts' command, for installing
+Python scripts."""
+
+# contributed by Bastian Kleineidam
+
+import os
+from distutils._log import log
+from stat import ST_MODE
+
+from ..core import Command
+
+
+class install_scripts(Command):
+    description = "install scripts (Python or otherwise)"
+
+    user_options = [
+        ('install-dir=', 'd', "directory to install scripts to"),
+        ('build-dir=', 'b', "build directory (where to install from)"),
+        ('force', 'f', "force installation (overwrite existing files)"),
+        ('skip-build', None, "skip the build steps"),
+    ]
+
+    boolean_options = ['force', 'skip-build']
+
+    def initialize_options(self):
+        self.install_dir = None
+        self.force = False
+        self.build_dir = None
+        self.skip_build = None
+
+    def finalize_options(self):
+        self.set_undefined_options('build', ('build_scripts', 'build_dir'))
+        self.set_undefined_options(
+            'install',
+            ('install_scripts', 'install_dir'),
+            ('force', 'force'),
+            ('skip_build', 'skip_build'),
+        )
+
+    def run(self):
+        if not self.skip_build:
+            self.run_command('build_scripts')
+        self.outfiles = self.copy_tree(self.build_dir, self.install_dir)
+        if os.name == 'posix':
+            # Set the executable bits (owner, group, and world) on
+            # all the scripts we just installed.
+            for file in self.get_outputs():
+                if self.dry_run:
+                    log.info("changing mode of %s", file)
+                else:
+                    mode = ((os.stat(file)[ST_MODE]) | 0o555) & 0o7777
+                    log.info("changing mode of %s to %o", file, mode)
+                    os.chmod(file, mode)
+
+    def get_inputs(self):
+        return self.distribution.scripts or []
+
+    def get_outputs(self):
+        return self.outfiles or []
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/register.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/register.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1acd27b54846e5ebc0b9ef292256c69eb9d94a7
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/register.py
@@ -0,0 +1,322 @@
+"""distutils.command.register
+
+Implements the Distutils 'register' command (register with the repository).
+"""
+
+# created 2002/10/21, Richard Jones
+
+import getpass
+import io
+import logging
+import urllib.parse
+import urllib.request
+from distutils._log import log
+from warnings import warn
+
+from .._itertools import always_iterable
+from ..core import PyPIRCCommand
+
+
+class register(PyPIRCCommand):
+    description = "register the distribution with the Python package index"
+    user_options = PyPIRCCommand.user_options + [
+        ('list-classifiers', None, 'list the valid Trove classifiers'),
+        (
+            'strict',
+            None,
+            'Will stop the registering if the meta-data are not fully compliant',
+        ),
+    ]
+    boolean_options = PyPIRCCommand.boolean_options + [
+        'verify',
+        'list-classifiers',
+        'strict',
+    ]
+
+    sub_commands = [('check', lambda self: True)]
+
+    def initialize_options(self):
+        PyPIRCCommand.initialize_options(self)
+        self.list_classifiers = False
+        self.strict = False
+
+    def finalize_options(self):
+        PyPIRCCommand.finalize_options(self)
+        # setting options for the `check` subcommand
+        check_options = {
+            'strict': ('register', self.strict),
+            'restructuredtext': ('register', 1),
+        }
+        self.distribution.command_options['check'] = check_options
+
+    def run(self):
+        self.finalize_options()
+        self._set_config()
+
+        # Run sub commands
+        for cmd_name in self.get_sub_commands():
+            self.run_command(cmd_name)
+
+        if self.dry_run:
+            self.verify_metadata()
+        elif self.list_classifiers:
+            self.classifiers()
+        else:
+            self.send_metadata()
+
+    def check_metadata(self):
+        """Deprecated API."""
+        warn(
+            "distutils.command.register.check_metadata is deprecated; "
+            "use the check command instead",
+            DeprecationWarning,
+        )
+        check = self.distribution.get_command_obj('check')
+        check.ensure_finalized()
+        check.strict = self.strict
+        check.restructuredtext = True
+        check.run()
+
+    def _set_config(self):
+        """Reads the configuration file and set attributes."""
+        config = self._read_pypirc()
+        if config != {}:
+            self.username = config['username']
+            self.password = config['password']
+            self.repository = config['repository']
+            self.realm = config['realm']
+            self.has_config = True
+        else:
+            if self.repository not in ('pypi', self.DEFAULT_REPOSITORY):
+                raise ValueError(f'{self.repository} not found in .pypirc')
+            if self.repository == 'pypi':
+                self.repository = self.DEFAULT_REPOSITORY
+            self.has_config = False
+
+    def classifiers(self):
+        """Fetch the list of classifiers from the server."""
+        url = self.repository + '?:action=list_classifiers'
+        response = urllib.request.urlopen(url)
+        log.info(self._read_pypi_response(response))
+
+    def verify_metadata(self):
+        """Send the metadata to the package index server to be checked."""
+        # send the info to the server and report the result
+        (code, result) = self.post_to_server(self.build_post_data('verify'))
+        log.info('Server response (%s): %s', code, result)
+
+    def send_metadata(self):  # noqa: C901
+        """Send the metadata to the package index server.
+
+        Well, do the following:
+        1. figure who the user is, and then
+        2. send the data as a Basic auth'ed POST.
+
+        First we try to read the username/password from $HOME/.pypirc,
+        which is a ConfigParser-formatted file with a section
+        [distutils] containing username and password entries (both
+        in clear text). Eg:
+
+            [distutils]
+            index-servers =
+                pypi
+
+            [pypi]
+            username: fred
+            password: sekrit
+
+        Otherwise, to figure who the user is, we offer the user three
+        choices:
+
+         1. use existing login,
+         2. register as a new user, or
+         3. set the password to a random string and email the user.
+
+        """
+        # see if we can short-cut and get the username/password from the
+        # config
+        if self.has_config:
+            choice = '1'
+            username = self.username
+            password = self.password
+        else:
+            choice = 'x'
+            username = password = ''
+
+        # get the user's login info
+        choices = '1 2 3 4'.split()
+        while choice not in choices:
+            self.announce(
+                """\
+We need to know who you are, so please choose either:
+ 1. use your existing login,
+ 2. register as a new user,
+ 3. have the server generate a new password for you (and email it to you), or
+ 4. quit
+Your selection [default 1]: """,
+                logging.INFO,
+            )
+            choice = input()
+            if not choice:
+                choice = '1'
+            elif choice not in choices:
+                print('Please choose one of the four options!')
+
+        if choice == '1':
+            # get the username and password
+            while not username:
+                username = input('Username: ')
+            while not password:
+                password = getpass.getpass('Password: ')
+
+            # set up the authentication
+            auth = urllib.request.HTTPPasswordMgr()
+            host = urllib.parse.urlparse(self.repository)[1]
+            auth.add_password(self.realm, host, username, password)
+            # send the info to the server and report the result
+            code, result = self.post_to_server(self.build_post_data('submit'), auth)
+            self.announce(f'Server response ({code}): {result}', logging.INFO)
+
+            # possibly save the login
+            if code == 200:
+                if self.has_config:
+                    # sharing the password in the distribution instance
+                    # so the upload command can reuse it
+                    self.distribution.password = password
+                else:
+                    self.announce(
+                        (
+                            'I can store your PyPI login so future '
+                            'submissions will be faster.'
+                        ),
+                        logging.INFO,
+                    )
+                    self.announce(
+                        f'(the login will be stored in {self._get_rc_file()})',
+                        logging.INFO,
+                    )
+                    choice = 'X'
+                    while choice.lower() not in 'yn':
+                        choice = input('Save your login (y/N)?')
+                        if not choice:
+                            choice = 'n'
+                    if choice.lower() == 'y':
+                        self._store_pypirc(username, password)
+
+        elif choice == '2':
+            data = {':action': 'user'}
+            data['name'] = data['password'] = data['email'] = ''
+            data['confirm'] = None
+            while not data['name']:
+                data['name'] = input('Username: ')
+            while data['password'] != data['confirm']:
+                while not data['password']:
+                    data['password'] = getpass.getpass('Password: ')
+                while not data['confirm']:
+                    data['confirm'] = getpass.getpass(' Confirm: ')
+                if data['password'] != data['confirm']:
+                    data['password'] = ''
+                    data['confirm'] = None
+                    print("Password and confirm don't match!")
+            while not data['email']:
+                data['email'] = input('   EMail: ')
+            code, result = self.post_to_server(data)
+            if code != 200:
+                log.info('Server response (%s): %s', code, result)
+            else:
+                log.info('You will receive an email shortly.')
+                log.info('Follow the instructions in it to complete registration.')
+        elif choice == '3':
+            data = {':action': 'password_reset'}
+            data['email'] = ''
+            while not data['email']:
+                data['email'] = input('Your email address: ')
+            code, result = self.post_to_server(data)
+            log.info('Server response (%s): %s', code, result)
+
+    def build_post_data(self, action):
+        # figure the data to send - the metadata plus some additional
+        # information used by the package server
+        meta = self.distribution.metadata
+        data = {
+            ':action': action,
+            'metadata_version': '1.0',
+            'name': meta.get_name(),
+            'version': meta.get_version(),
+            'summary': meta.get_description(),
+            'home_page': meta.get_url(),
+            'author': meta.get_contact(),
+            'author_email': meta.get_contact_email(),
+            'license': meta.get_licence(),
+            'description': meta.get_long_description(),
+            'keywords': meta.get_keywords(),
+            'platform': meta.get_platforms(),
+            'classifiers': meta.get_classifiers(),
+            'download_url': meta.get_download_url(),
+            # PEP 314
+            'provides': meta.get_provides(),
+            'requires': meta.get_requires(),
+            'obsoletes': meta.get_obsoletes(),
+        }
+        if data['provides'] or data['requires'] or data['obsoletes']:
+            data['metadata_version'] = '1.1'
+        return data
+
+    def post_to_server(self, data, auth=None):  # noqa: C901
+        """Post a query to the server, and return a string response."""
+        if 'name' in data:
+            self.announce(
+                'Registering {} to {}'.format(data['name'], self.repository),
+                logging.INFO,
+            )
+        # Build up the MIME payload for the urllib2 POST data
+        boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
+        sep_boundary = '\n--' + boundary
+        end_boundary = sep_boundary + '--'
+        body = io.StringIO()
+        for key, values in data.items():
+            for value in map(str, make_iterable(values)):
+                body.write(sep_boundary)
+                body.write(f'\nContent-Disposition: form-data; name="{key}"')
+                body.write("\n\n")
+                body.write(value)
+                if value and value[-1] == '\r':
+                    body.write('\n')  # write an extra newline (lurve Macs)
+        body.write(end_boundary)
+        body.write("\n")
+        body = body.getvalue().encode("utf-8")
+
+        # build the Request
+        headers = {
+            'Content-type': f'multipart/form-data; boundary={boundary}; charset=utf-8',
+            'Content-length': str(len(body)),
+        }
+        req = urllib.request.Request(self.repository, body, headers)
+
+        # handle HTTP and include the Basic Auth handler
+        opener = urllib.request.build_opener(
+            urllib.request.HTTPBasicAuthHandler(password_mgr=auth)
+        )
+        data = ''
+        try:
+            result = opener.open(req)
+        except urllib.error.HTTPError as e:
+            if self.show_response:
+                data = e.fp.read()
+            result = e.code, e.msg
+        except urllib.error.URLError as e:
+            result = 500, str(e)
+        else:
+            if self.show_response:
+                data = self._read_pypi_response(result)
+            result = 200, 'OK'
+        if self.show_response:
+            msg = '\n'.join(('-' * 75, data, '-' * 75))
+            self.announce(msg, logging.INFO)
+        return result
+
+
+def make_iterable(values):
+    if values is None:
+        return [None]
+    return always_iterable(values)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/sdist.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/sdist.py
new file mode 100644
index 0000000000000000000000000000000000000000..04333dd2144c165e0c1da2eb37e161477bc2f46e
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/sdist.py
@@ -0,0 +1,527 @@
+"""distutils.command.sdist
+
+Implements the Distutils 'sdist' command (create a source distribution)."""
+
+import os
+import sys
+from distutils import archive_util, dir_util, file_util
+from distutils._log import log
+from glob import glob
+from itertools import filterfalse
+from warnings import warn
+
+from ..core import Command
+from ..errors import DistutilsOptionError, DistutilsTemplateError
+from ..filelist import FileList
+from ..text_file import TextFile
+from ..util import convert_path
+
+
+def show_formats():
+    """Print all possible values for the 'formats' option (used by
+    the "--help-formats" command-line option).
+    """
+    from ..archive_util import ARCHIVE_FORMATS
+    from ..fancy_getopt import FancyGetopt
+
+    formats = []
+    for format in ARCHIVE_FORMATS.keys():
+        formats.append(("formats=" + format, None, ARCHIVE_FORMATS[format][2]))
+    formats.sort()
+    FancyGetopt(formats).print_help("List of available source distribution formats:")
+
+
+class sdist(Command):
+    description = "create a source distribution (tarball, zip file, etc.)"
+
+    def checking_metadata(self):
+        """Callable used for the check sub-command.
+
+        Placed here so user_options can view it"""
+        return self.metadata_check
+
+    user_options = [
+        ('template=', 't', "name of manifest template file [default: MANIFEST.in]"),
+        ('manifest=', 'm', "name of manifest file [default: MANIFEST]"),
+        (
+            'use-defaults',
+            None,
+            "include the default file set in the manifest "
+            "[default; disable with --no-defaults]",
+        ),
+        ('no-defaults', None, "don't include the default file set"),
+        (
+            'prune',
+            None,
+            "specifically exclude files/directories that should not be "
+            "distributed (build tree, RCS/CVS dirs, etc.) "
+            "[default; disable with --no-prune]",
+        ),
+        ('no-prune', None, "don't automatically exclude anything"),
+        (
+            'manifest-only',
+            'o',
+            "just regenerate the manifest and then stop (implies --force-manifest)",
+        ),
+        (
+            'force-manifest',
+            'f',
+            "forcibly regenerate the manifest and carry on as usual. "
+            "Deprecated: now the manifest is always regenerated.",
+        ),
+        ('formats=', None, "formats for source distribution (comma-separated list)"),
+        (
+            'keep-temp',
+            'k',
+            "keep the distribution tree around after creating " + "archive file(s)",
+        ),
+        (
+            'dist-dir=',
+            'd',
+            "directory to put the source distribution archive(s) in [default: dist]",
+        ),
+        (
+            'metadata-check',
+            None,
+            "Ensure that all required elements of meta-data "
+            "are supplied. Warn if any missing. [default]",
+        ),
+        (
+            'owner=',
+            'u',
+            "Owner name used when creating a tar file [default: current user]",
+        ),
+        (
+            'group=',
+            'g',
+            "Group name used when creating a tar file [default: current group]",
+        ),
+    ]
+
+    boolean_options = [
+        'use-defaults',
+        'prune',
+        'manifest-only',
+        'force-manifest',
+        'keep-temp',
+        'metadata-check',
+    ]
+
+    help_options = [
+        ('help-formats', None, "list available distribution formats", show_formats),
+    ]
+
+    negative_opt = {'no-defaults': 'use-defaults', 'no-prune': 'prune'}
+
+    sub_commands = [('check', checking_metadata)]
+
+    READMES = ('README', 'README.txt', 'README.rst')
+
+    def initialize_options(self):
+        # 'template' and 'manifest' are, respectively, the names of
+        # the manifest template and manifest file.
+        self.template = None
+        self.manifest = None
+
+        # 'use_defaults': if true, we will include the default file set
+        # in the manifest
+        self.use_defaults = True
+        self.prune = True
+
+        self.manifest_only = False
+        self.force_manifest = False
+
+        self.formats = ['gztar']
+        self.keep_temp = False
+        self.dist_dir = None
+
+        self.archive_files = None
+        self.metadata_check = 1
+        self.owner = None
+        self.group = None
+
+    def finalize_options(self):
+        if self.manifest is None:
+            self.manifest = "MANIFEST"
+        if self.template is None:
+            self.template = "MANIFEST.in"
+
+        self.ensure_string_list('formats')
+
+        bad_format = archive_util.check_archive_formats(self.formats)
+        if bad_format:
+            raise DistutilsOptionError(f"unknown archive format '{bad_format}'")
+
+        if self.dist_dir is None:
+            self.dist_dir = "dist"
+
+    def run(self):
+        # 'filelist' contains the list of files that will make up the
+        # manifest
+        self.filelist = FileList()
+
+        # Run sub commands
+        for cmd_name in self.get_sub_commands():
+            self.run_command(cmd_name)
+
+        # Do whatever it takes to get the list of files to process
+        # (process the manifest template, read an existing manifest,
+        # whatever).  File list is accumulated in 'self.filelist'.
+        self.get_file_list()
+
+        # If user just wanted us to regenerate the manifest, stop now.
+        if self.manifest_only:
+            return
+
+        # Otherwise, go ahead and create the source distribution tarball,
+        # or zipfile, or whatever.
+        self.make_distribution()
+
+    def check_metadata(self):
+        """Deprecated API."""
+        warn(
+            "distutils.command.sdist.check_metadata is deprecated, \
+              use the check command instead",
+            PendingDeprecationWarning,
+        )
+        check = self.distribution.get_command_obj('check')
+        check.ensure_finalized()
+        check.run()
+
+    def get_file_list(self):
+        """Figure out the list of files to include in the source
+        distribution, and put it in 'self.filelist'.  This might involve
+        reading the manifest template (and writing the manifest), or just
+        reading the manifest, or just using the default file set -- it all
+        depends on the user's options.
+        """
+        # new behavior when using a template:
+        # the file list is recalculated every time because
+        # even if MANIFEST.in or setup.py are not changed
+        # the user might have added some files in the tree that
+        # need to be included.
+        #
+        #  This makes --force the default and only behavior with templates.
+        template_exists = os.path.isfile(self.template)
+        if not template_exists and self._manifest_is_not_generated():
+            self.read_manifest()
+            self.filelist.sort()
+            self.filelist.remove_duplicates()
+            return
+
+        if not template_exists:
+            self.warn(
+                ("manifest template '%s' does not exist " + "(using default file list)")
+                % self.template
+            )
+        self.filelist.findall()
+
+        if self.use_defaults:
+            self.add_defaults()
+
+        if template_exists:
+            self.read_template()
+
+        if self.prune:
+            self.prune_file_list()
+
+        self.filelist.sort()
+        self.filelist.remove_duplicates()
+        self.write_manifest()
+
+    def add_defaults(self):
+        """Add all the default files to self.filelist:
+          - README or README.txt
+          - setup.py
+          - tests/test*.py and test/test*.py
+          - all pure Python modules mentioned in setup script
+          - all files pointed by package_data (build_py)
+          - all files defined in data_files.
+          - all files defined as scripts.
+          - all C sources listed as part of extensions or C libraries
+            in the setup script (doesn't catch C headers!)
+        Warns if (README or README.txt) or setup.py are missing; everything
+        else is optional.
+        """
+        self._add_defaults_standards()
+        self._add_defaults_optional()
+        self._add_defaults_python()
+        self._add_defaults_data_files()
+        self._add_defaults_ext()
+        self._add_defaults_c_libs()
+        self._add_defaults_scripts()
+
+    @staticmethod
+    def _cs_path_exists(fspath):
+        """
+        Case-sensitive path existence check
+
+        >>> sdist._cs_path_exists(__file__)
+        True
+        >>> sdist._cs_path_exists(__file__.upper())
+        False
+        """
+        if not os.path.exists(fspath):
+            return False
+        # make absolute so we always have a directory
+        abspath = os.path.abspath(fspath)
+        directory, filename = os.path.split(abspath)
+        return filename in os.listdir(directory)
+
+    def _add_defaults_standards(self):
+        standards = [self.READMES, self.distribution.script_name]
+        for fn in standards:
+            if isinstance(fn, tuple):
+                alts = fn
+                got_it = False
+                for fn in alts:
+                    if self._cs_path_exists(fn):
+                        got_it = True
+                        self.filelist.append(fn)
+                        break
+
+                if not got_it:
+                    self.warn(
+                        "standard file not found: should have one of " + ', '.join(alts)
+                    )
+            else:
+                if self._cs_path_exists(fn):
+                    self.filelist.append(fn)
+                else:
+                    self.warn(f"standard file '{fn}' not found")
+
+    def _add_defaults_optional(self):
+        optional = ['tests/test*.py', 'test/test*.py', 'setup.cfg']
+        for pattern in optional:
+            files = filter(os.path.isfile, glob(pattern))
+            self.filelist.extend(files)
+
+    def _add_defaults_python(self):
+        # build_py is used to get:
+        #  - python modules
+        #  - files defined in package_data
+        build_py = self.get_finalized_command('build_py')
+
+        # getting python files
+        if self.distribution.has_pure_modules():
+            self.filelist.extend(build_py.get_source_files())
+
+        # getting package_data files
+        # (computed in build_py.data_files by build_py.finalize_options)
+        for _pkg, src_dir, _build_dir, filenames in build_py.data_files:
+            for filename in filenames:
+                self.filelist.append(os.path.join(src_dir, filename))
+
+    def _add_defaults_data_files(self):
+        # getting distribution.data_files
+        if self.distribution.has_data_files():
+            for item in self.distribution.data_files:
+                if isinstance(item, str):
+                    # plain file
+                    item = convert_path(item)
+                    if os.path.isfile(item):
+                        self.filelist.append(item)
+                else:
+                    # a (dirname, filenames) tuple
+                    dirname, filenames = item
+                    for f in filenames:
+                        f = convert_path(f)
+                        if os.path.isfile(f):
+                            self.filelist.append(f)
+
+    def _add_defaults_ext(self):
+        if self.distribution.has_ext_modules():
+            build_ext = self.get_finalized_command('build_ext')
+            self.filelist.extend(build_ext.get_source_files())
+
+    def _add_defaults_c_libs(self):
+        if self.distribution.has_c_libraries():
+            build_clib = self.get_finalized_command('build_clib')
+            self.filelist.extend(build_clib.get_source_files())
+
+    def _add_defaults_scripts(self):
+        if self.distribution.has_scripts():
+            build_scripts = self.get_finalized_command('build_scripts')
+            self.filelist.extend(build_scripts.get_source_files())
+
+    def read_template(self):
+        """Read and parse manifest template file named by self.template.
+
+        (usually "MANIFEST.in") The parsing and processing is done by
+        'self.filelist', which updates itself accordingly.
+        """
+        log.info("reading manifest template '%s'", self.template)
+        template = TextFile(
+            self.template,
+            strip_comments=True,
+            skip_blanks=True,
+            join_lines=True,
+            lstrip_ws=True,
+            rstrip_ws=True,
+            collapse_join=True,
+        )
+
+        try:
+            while True:
+                line = template.readline()
+                if line is None:  # end of file
+                    break
+
+                try:
+                    self.filelist.process_template_line(line)
+                # the call above can raise a DistutilsTemplateError for
+                # malformed lines, or a ValueError from the lower-level
+                # convert_path function
+                except (DistutilsTemplateError, ValueError) as msg:
+                    self.warn(
+                        "%s, line %d: %s"
+                        % (template.filename, template.current_line, msg)
+                    )
+        finally:
+            template.close()
+
+    def prune_file_list(self):
+        """Prune off branches that might slip into the file list as created
+        by 'read_template()', but really don't belong there:
+          * the build tree (typically "build")
+          * the release tree itself (only an issue if we ran "sdist"
+            previously with --keep-temp, or it aborted)
+          * any RCS, CVS, .svn, .hg, .git, .bzr, _darcs directories
+        """
+        build = self.get_finalized_command('build')
+        base_dir = self.distribution.get_fullname()
+
+        self.filelist.exclude_pattern(None, prefix=build.build_base)
+        self.filelist.exclude_pattern(None, prefix=base_dir)
+
+        if sys.platform == 'win32':
+            seps = r'/|\\'
+        else:
+            seps = '/'
+
+        vcs_dirs = ['RCS', 'CVS', r'\.svn', r'\.hg', r'\.git', r'\.bzr', '_darcs']
+        vcs_ptrn = r'(^|{})({})({}).*'.format(seps, '|'.join(vcs_dirs), seps)
+        self.filelist.exclude_pattern(vcs_ptrn, is_regex=True)
+
+    def write_manifest(self):
+        """Write the file list in 'self.filelist' (presumably as filled in
+        by 'add_defaults()' and 'read_template()') to the manifest file
+        named by 'self.manifest'.
+        """
+        if self._manifest_is_not_generated():
+            log.info(
+                f"not writing to manually maintained manifest file '{self.manifest}'"
+            )
+            return
+
+        content = self.filelist.files[:]
+        content.insert(0, '# file GENERATED by distutils, do NOT edit')
+        self.execute(
+            file_util.write_file,
+            (self.manifest, content),
+            f"writing manifest file '{self.manifest}'",
+        )
+
+    def _manifest_is_not_generated(self):
+        # check for special comment used in 3.1.3 and higher
+        if not os.path.isfile(self.manifest):
+            return False
+
+        with open(self.manifest, encoding='utf-8') as fp:
+            first_line = next(fp)
+        return first_line != '# file GENERATED by distutils, do NOT edit\n'
+
+    def read_manifest(self):
+        """Read the manifest file (named by 'self.manifest') and use it to
+        fill in 'self.filelist', the list of files to include in the source
+        distribution.
+        """
+        log.info("reading manifest file '%s'", self.manifest)
+        with open(self.manifest, encoding='utf-8') as lines:
+            self.filelist.extend(
+                # ignore comments and blank lines
+                filter(None, filterfalse(is_comment, map(str.strip, lines)))
+            )
+
+    def make_release_tree(self, base_dir, files):
+        """Create the directory tree that will become the source
+        distribution archive.  All directories implied by the filenames in
+        'files' are created under 'base_dir', and then we hard link or copy
+        (if hard linking is unavailable) those files into place.
+        Essentially, this duplicates the developer's source tree, but in a
+        directory named after the distribution, containing only the files
+        to be distributed.
+        """
+        # Create all the directories under 'base_dir' necessary to
+        # put 'files' there; the 'mkpath()' is just so we don't die
+        # if the manifest happens to be empty.
+        self.mkpath(base_dir)
+        dir_util.create_tree(base_dir, files, dry_run=self.dry_run)
+
+        # And walk over the list of files, either making a hard link (if
+        # os.link exists) to each one that doesn't already exist in its
+        # corresponding location under 'base_dir', or copying each file
+        # that's out-of-date in 'base_dir'.  (Usually, all files will be
+        # out-of-date, because by default we blow away 'base_dir' when
+        # we're done making the distribution archives.)
+
+        if hasattr(os, 'link'):  # can make hard links on this system
+            link = 'hard'
+            msg = f"making hard links in {base_dir}..."
+        else:  # nope, have to copy
+            link = None
+            msg = f"copying files to {base_dir}..."
+
+        if not files:
+            log.warning("no files to distribute -- empty manifest?")
+        else:
+            log.info(msg)
+        for file in files:
+            if not os.path.isfile(file):
+                log.warning("'%s' not a regular file -- skipping", file)
+            else:
+                dest = os.path.join(base_dir, file)
+                self.copy_file(file, dest, link=link)
+
+        self.distribution.metadata.write_pkg_info(base_dir)
+
+    def make_distribution(self):
+        """Create the source distribution(s).  First, we create the release
+        tree with 'make_release_tree()'; then, we create all required
+        archive files (according to 'self.formats') from the release tree.
+        Finally, we clean up by blowing away the release tree (unless
+        'self.keep_temp' is true).  The list of archive files created is
+        stored so it can be retrieved later by 'get_archive_files()'.
+        """
+        # Don't warn about missing meta-data here -- should be (and is!)
+        # done elsewhere.
+        base_dir = self.distribution.get_fullname()
+        base_name = os.path.join(self.dist_dir, base_dir)
+
+        self.make_release_tree(base_dir, self.filelist.files)
+        archive_files = []  # remember names of files we create
+        # tar archive must be created last to avoid overwrite and remove
+        if 'tar' in self.formats:
+            self.formats.append(self.formats.pop(self.formats.index('tar')))
+
+        for fmt in self.formats:
+            file = self.make_archive(
+                base_name, fmt, base_dir=base_dir, owner=self.owner, group=self.group
+            )
+            archive_files.append(file)
+            self.distribution.dist_files.append(('sdist', '', file))
+
+        self.archive_files = archive_files
+
+        if not self.keep_temp:
+            dir_util.remove_tree(base_dir, dry_run=self.dry_run)
+
+    def get_archive_files(self):
+        """Return the list of archive files created when the command
+        was run, or None if the command hasn't run yet.
+        """
+        return self.archive_files
+
+
+def is_comment(line):
+    return line.startswith('#')
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/upload.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/upload.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2461e089f5848d699c9f0caf5c8bf047b4526c7
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/command/upload.py
@@ -0,0 +1,208 @@
+"""
+distutils.command.upload
+
+Implements the Distutils 'upload' subcommand (upload package to a package
+index).
+"""
+
+import hashlib
+import io
+import logging
+import os
+from base64 import standard_b64encode
+from urllib.parse import urlparse
+from urllib.request import HTTPError, Request, urlopen
+
+from .._itertools import always_iterable
+from ..core import PyPIRCCommand
+from ..errors import DistutilsError, DistutilsOptionError
+from ..spawn import spawn
+
+# PyPI Warehouse supports MD5, SHA256, and Blake2 (blake2-256)
+# https://bugs.python.org/issue40698
+_FILE_CONTENT_DIGESTS = {
+    "md5_digest": getattr(hashlib, "md5", None),
+    "sha256_digest": getattr(hashlib, "sha256", None),
+    "blake2_256_digest": getattr(hashlib, "blake2b", None),
+}
+
+
+class upload(PyPIRCCommand):
+    description = "upload binary package to PyPI"
+
+    user_options = PyPIRCCommand.user_options + [
+        ('sign', 's', 'sign files to upload using gpg'),
+        ('identity=', 'i', 'GPG identity used to sign files'),
+    ]
+
+    boolean_options = PyPIRCCommand.boolean_options + ['sign']
+
+    def initialize_options(self):
+        PyPIRCCommand.initialize_options(self)
+        self.username = ''
+        self.password = ''
+        self.show_response = False
+        self.sign = False
+        self.identity = None
+
+    def finalize_options(self):
+        PyPIRCCommand.finalize_options(self)
+        if self.identity and not self.sign:
+            raise DistutilsOptionError("Must use --sign for --identity to have meaning")
+        config = self._read_pypirc()
+        if config != {}:
+            self.username = config['username']
+            self.password = config['password']
+            self.repository = config['repository']
+            self.realm = config['realm']
+
+        # getting the password from the distribution
+        # if previously set by the register command
+        if not self.password and self.distribution.password:
+            self.password = self.distribution.password
+
+    def run(self):
+        if not self.distribution.dist_files:
+            msg = (
+                "Must create and upload files in one command "
+                "(e.g. setup.py sdist upload)"
+            )
+            raise DistutilsOptionError(msg)
+        for command, pyversion, filename in self.distribution.dist_files:
+            self.upload_file(command, pyversion, filename)
+
+    def upload_file(self, command, pyversion, filename):  # noqa: C901
+        # Makes sure the repository URL is compliant
+        schema, netloc, url, params, query, fragments = urlparse(self.repository)
+        if params or query or fragments:
+            raise AssertionError(f"Incompatible url {self.repository}")
+
+        if schema not in ('http', 'https'):
+            raise AssertionError("unsupported schema " + schema)
+
+        # Sign if requested
+        if self.sign:
+            gpg_args = ["gpg", "--detach-sign", "-a", filename]
+            if self.identity:
+                gpg_args[2:2] = ["--local-user", self.identity]
+            spawn(gpg_args, dry_run=self.dry_run)
+
+        # Fill in the data - send all the meta-data in case we need to
+        # register a new release
+        f = open(filename, 'rb')
+        try:
+            content = f.read()
+        finally:
+            f.close()
+
+        meta = self.distribution.metadata
+        data = {
+            # action
+            ':action': 'file_upload',
+            'protocol_version': '1',
+            # identify release
+            'name': meta.get_name(),
+            'version': meta.get_version(),
+            # file content
+            'content': (os.path.basename(filename), content),
+            'filetype': command,
+            'pyversion': pyversion,
+            # additional meta-data
+            'metadata_version': '1.0',
+            'summary': meta.get_description(),
+            'home_page': meta.get_url(),
+            'author': meta.get_contact(),
+            'author_email': meta.get_contact_email(),
+            'license': meta.get_licence(),
+            'description': meta.get_long_description(),
+            'keywords': meta.get_keywords(),
+            'platform': meta.get_platforms(),
+            'classifiers': meta.get_classifiers(),
+            'download_url': meta.get_download_url(),
+            # PEP 314
+            'provides': meta.get_provides(),
+            'requires': meta.get_requires(),
+            'obsoletes': meta.get_obsoletes(),
+        }
+
+        data['comment'] = ''
+
+        # file content digests
+        for digest_name, digest_cons in _FILE_CONTENT_DIGESTS.items():
+            if digest_cons is None:
+                continue
+            try:
+                data[digest_name] = digest_cons(content).hexdigest()
+            except ValueError:
+                # hash digest not available or blocked by security policy
+                pass
+
+        if self.sign:
+            with open(filename + ".asc", "rb") as f:
+                data['gpg_signature'] = (os.path.basename(filename) + ".asc", f.read())
+
+        # set up the authentication
+        user_pass = (self.username + ":" + self.password).encode('ascii')
+        # The exact encoding of the authentication string is debated.
+        # Anyway PyPI only accepts ascii for both username or password.
+        auth = "Basic " + standard_b64encode(user_pass).decode('ascii')
+
+        # Build up the MIME payload for the POST data
+        boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
+        sep_boundary = b'\r\n--' + boundary.encode('ascii')
+        end_boundary = sep_boundary + b'--\r\n'
+        body = io.BytesIO()
+        for key, values in data.items():
+            title = f'\r\nContent-Disposition: form-data; name="{key}"'
+            for value in make_iterable(values):
+                if type(value) is tuple:
+                    title += f'; filename="{value[0]}"'
+                    value = value[1]
+                else:
+                    value = str(value).encode('utf-8')
+                body.write(sep_boundary)
+                body.write(title.encode('utf-8'))
+                body.write(b"\r\n\r\n")
+                body.write(value)
+        body.write(end_boundary)
+        body = body.getvalue()
+
+        msg = f"Submitting {filename} to {self.repository}"
+        self.announce(msg, logging.INFO)
+
+        # build the Request
+        headers = {
+            'Content-type': f'multipart/form-data; boundary={boundary}',
+            'Content-length': str(len(body)),
+            'Authorization': auth,
+        }
+
+        request = Request(self.repository, data=body, headers=headers)
+        # send the data
+        try:
+            result = urlopen(request)
+            status = result.getcode()
+            reason = result.msg
+        except HTTPError as e:
+            status = e.code
+            reason = e.msg
+        except OSError as e:
+            self.announce(str(e), logging.ERROR)
+            raise
+
+        if status == 200:
+            self.announce(f'Server response ({status}): {reason}', logging.INFO)
+            if self.show_response:
+                text = self._read_pypi_response(result)
+                msg = '\n'.join(('-' * 75, text, '-' * 75))
+                self.announce(msg, logging.INFO)
+        else:
+            msg = f'Upload failed ({status}): {reason}'
+            self.announce(msg, logging.ERROR)
+            raise DistutilsError(msg)
+
+
+def make_iterable(values):
+    if values is None:
+        return [None]
+    return always_iterable(values, base_type=(bytes, str, tuple))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e12534a32cf1d21634ac66ebbe7c9e2804d2f513
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__init__.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from .py38 import removeprefix
+
+
+def consolidate_linker_args(args: list[str]) -> list[str] | str:
+    """
+    Ensure the return value is a string for backward compatibility.
+
+    Retain until at least 2025-04-31. See pypa/distutils#246
+    """
+
+    if not all(arg.startswith('-Wl,') for arg in args):
+        return args
+    return '-Wl,' + ','.join(removeprefix(arg, '-Wl,') for arg in args)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7218a70f142f8dabb642127503835705984f6a9b
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/py38.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/py38.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72adc5078e2fc547e3e08802d9bf3b82419fcbab
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/py38.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/py39.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/py39.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..097f432187b843e09c1640a7a3136ccf802a5e94
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/__pycache__/py39.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/py38.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/py38.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d4421114793d61ba8587d5adb54eb93d490b7ab
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/py38.py
@@ -0,0 +1,33 @@
+import sys
+
+if sys.version_info < (3, 9):
+
+    def removesuffix(self, suffix):
+        # suffix='' should not call self[:-0].
+        if suffix and self.endswith(suffix):
+            return self[: -len(suffix)]
+        else:
+            return self[:]
+
+    def removeprefix(self, prefix):
+        if self.startswith(prefix):
+            return self[len(prefix) :]
+        else:
+            return self[:]
+else:
+
+    def removesuffix(self, suffix):
+        return self.removesuffix(suffix)
+
+    def removeprefix(self, prefix):
+        return self.removeprefix(prefix)
+
+
+def aix_platform(osname, version, release):
+    try:
+        import _aix_support  # type: ignore
+
+        return _aix_support.aix_platform()
+    except ImportError:
+        pass
+    return f"{osname}-{version}.{release}"
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/py39.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/py39.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b436d76586cbcf38164548f3fc849ce1a3ff600
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/compat/py39.py
@@ -0,0 +1,66 @@
+import functools
+import itertools
+import platform
+import sys
+
+
+def add_ext_suffix_39(vars):
+    """
+    Ensure vars contains 'EXT_SUFFIX'. pypa/distutils#130
+    """
+    import _imp
+
+    ext_suffix = _imp.extension_suffixes()[0]
+    vars.update(
+        EXT_SUFFIX=ext_suffix,
+        # sysconfig sets SO to match EXT_SUFFIX, so maintain
+        # that expectation.
+        # https://github.com/python/cpython/blob/785cc6770588de087d09e89a69110af2542be208/Lib/sysconfig.py#L671-L673
+        SO=ext_suffix,
+    )
+
+
+needs_ext_suffix = sys.version_info < (3, 10) and platform.system() == 'Windows'
+add_ext_suffix = add_ext_suffix_39 if needs_ext_suffix else lambda vars: None
+
+
+# from more_itertools
+class UnequalIterablesError(ValueError):
+    def __init__(self, details=None):
+        msg = 'Iterables have different lengths'
+        if details is not None:
+            msg += (': index 0 has length {}; index {} has length {}').format(*details)
+
+        super().__init__(msg)
+
+
+# from more_itertools
+def _zip_equal_generator(iterables):
+    _marker = object()
+    for combo in itertools.zip_longest(*iterables, fillvalue=_marker):
+        for val in combo:
+            if val is _marker:
+                raise UnequalIterablesError()
+        yield combo
+
+
+# from more_itertools
+def _zip_equal(*iterables):
+    # Check whether the iterables are all the same size.
+    try:
+        first_size = len(iterables[0])
+        for i, it in enumerate(iterables[1:], 1):
+            size = len(it)
+            if size != first_size:
+                raise UnequalIterablesError(details=(first_size, i, size))
+        # All sizes are equal, we can use the built-in zip.
+        return zip(*iterables)
+    # If any one of the iterables didn't have a length, start reading
+    # them until one runs out.
+    except TypeError:
+        return _zip_equal_generator(iterables)
+
+
+zip_strict = (
+    _zip_equal if sys.version_info < (3, 10) else functools.partial(zip, strict=True)
+)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/config.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebd2e11da3336578bb7065367d9f067c11202978
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/config.py
@@ -0,0 +1,151 @@
+"""distutils.pypirc
+
+Provides the PyPIRCCommand class, the base class for the command classes
+that uses .pypirc in the distutils.command package.
+"""
+
+import email.message
+import os
+from configparser import RawConfigParser
+
+from .cmd import Command
+
+DEFAULT_PYPIRC = """\
+[distutils]
+index-servers =
+    pypi
+
+[pypi]
+username:%s
+password:%s
+"""
+
+
+class PyPIRCCommand(Command):
+    """Base command that knows how to handle the .pypirc file"""
+
+    DEFAULT_REPOSITORY = 'https://upload.pypi.org/legacy/'
+    DEFAULT_REALM = 'pypi'
+    repository = None
+    realm = None
+
+    user_options = [
+        ('repository=', 'r', f"url of repository [default: {DEFAULT_REPOSITORY}]"),
+        ('show-response', None, 'display full response text from server'),
+    ]
+
+    boolean_options = ['show-response']
+
+    def _get_rc_file(self):
+        """Returns rc file path."""
+        return os.path.join(os.path.expanduser('~'), '.pypirc')
+
+    def _store_pypirc(self, username, password):
+        """Creates a default .pypirc file."""
+        rc = self._get_rc_file()
+        raw = os.open(rc, os.O_CREAT | os.O_WRONLY, 0o600)
+        with os.fdopen(raw, 'w', encoding='utf-8') as f:
+            f.write(DEFAULT_PYPIRC % (username, password))
+
+    def _read_pypirc(self):  # noqa: C901
+        """Reads the .pypirc file."""
+        rc = self._get_rc_file()
+        if os.path.exists(rc):
+            self.announce(f'Using PyPI login from {rc}')
+            repository = self.repository or self.DEFAULT_REPOSITORY
+
+            config = RawConfigParser()
+            config.read(rc, encoding='utf-8')
+            sections = config.sections()
+            if 'distutils' in sections:
+                # let's get the list of servers
+                index_servers = config.get('distutils', 'index-servers')
+                _servers = [
+                    server.strip()
+                    for server in index_servers.split('\n')
+                    if server.strip() != ''
+                ]
+                if _servers == []:
+                    # nothing set, let's try to get the default pypi
+                    if 'pypi' in sections:
+                        _servers = ['pypi']
+                    else:
+                        # the file is not properly defined, returning
+                        # an empty dict
+                        return {}
+                for server in _servers:
+                    current = {'server': server}
+                    current['username'] = config.get(server, 'username')
+
+                    # optional params
+                    for key, default in (
+                        ('repository', self.DEFAULT_REPOSITORY),
+                        ('realm', self.DEFAULT_REALM),
+                        ('password', None),
+                    ):
+                        if config.has_option(server, key):
+                            current[key] = config.get(server, key)
+                        else:
+                            current[key] = default
+
+                    # work around people having "repository" for the "pypi"
+                    # section of their config set to the HTTP (rather than
+                    # HTTPS) URL
+                    if server == 'pypi' and repository in (
+                        self.DEFAULT_REPOSITORY,
+                        'pypi',
+                    ):
+                        current['repository'] = self.DEFAULT_REPOSITORY
+                        return current
+
+                    if (
+                        current['server'] == repository
+                        or current['repository'] == repository
+                    ):
+                        return current
+            elif 'server-login' in sections:
+                # old format
+                server = 'server-login'
+                if config.has_option(server, 'repository'):
+                    repository = config.get(server, 'repository')
+                else:
+                    repository = self.DEFAULT_REPOSITORY
+                return {
+                    'username': config.get(server, 'username'),
+                    'password': config.get(server, 'password'),
+                    'repository': repository,
+                    'server': server,
+                    'realm': self.DEFAULT_REALM,
+                }
+
+        return {}
+
+    def _read_pypi_response(self, response):
+        """Read and decode a PyPI HTTP response."""
+        content_type = response.getheader('content-type', 'text/plain')
+        return response.read().decode(_extract_encoding(content_type))
+
+    def initialize_options(self):
+        """Initialize options."""
+        self.repository = None
+        self.realm = None
+        self.show_response = False
+
+    def finalize_options(self):
+        """Finalizes options."""
+        if self.repository is None:
+            self.repository = self.DEFAULT_REPOSITORY
+        if self.realm is None:
+            self.realm = self.DEFAULT_REALM
+
+
+def _extract_encoding(content_type):
+    """
+    >>> _extract_encoding('text/plain')
+    'ascii'
+    >>> _extract_encoding('text/html; charset="utf8"')
+    'utf8'
+    """
+    msg = email.message.EmailMessage()
+    msg['content-type'] = content_type
+    return msg['content-type'].params.get('charset', 'ascii')
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/core.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/core.py
new file mode 100644
index 0000000000000000000000000000000000000000..82113c47c191e98929509b5fba963bb15a6f8474
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/core.py
@@ -0,0 +1,287 @@
+"""distutils.core
+
+The only module that needs to be imported to use the Distutils; provides
+the 'setup' function (which is to be called from the setup script).  Also
+indirectly provides the Distribution and Command classes, although they are
+really defined in distutils.dist and distutils.cmd.
+"""
+
+import os
+import sys
+import tokenize
+
+from .cmd import Command
+from .config import PyPIRCCommand
+from .debug import DEBUG
+
+# Mainly import these so setup scripts can "from distutils.core import" them.
+from .dist import Distribution
+from .errors import (
+    CCompilerError,
+    DistutilsArgError,
+    DistutilsError,
+    DistutilsSetupError,
+)
+from .extension import Extension
+
+__all__ = ['Distribution', 'Command', 'PyPIRCCommand', 'Extension', 'setup']
+
+# This is a barebones help message generated displayed when the user
+# runs the setup script with no arguments at all.  More useful help
+# is generated with various --help options: global help, list commands,
+# and per-command help.
+USAGE = """\
+usage: %(script)s [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...]
+   or: %(script)s --help [cmd1 cmd2 ...]
+   or: %(script)s --help-commands
+   or: %(script)s cmd --help
+"""
+
+
+def gen_usage(script_name):
+    script = os.path.basename(script_name)
+    return USAGE % locals()
+
+
+# Some mild magic to control the behaviour of 'setup()' from 'run_setup()'.
+_setup_stop_after = None
+_setup_distribution = None
+
+# Legal keyword arguments for the setup() function
+setup_keywords = (
+    'distclass',
+    'script_name',
+    'script_args',
+    'options',
+    'name',
+    'version',
+    'author',
+    'author_email',
+    'maintainer',
+    'maintainer_email',
+    'url',
+    'license',
+    'description',
+    'long_description',
+    'keywords',
+    'platforms',
+    'classifiers',
+    'download_url',
+    'requires',
+    'provides',
+    'obsoletes',
+)
+
+# Legal keyword arguments for the Extension constructor
+extension_keywords = (
+    'name',
+    'sources',
+    'include_dirs',
+    'define_macros',
+    'undef_macros',
+    'library_dirs',
+    'libraries',
+    'runtime_library_dirs',
+    'extra_objects',
+    'extra_compile_args',
+    'extra_link_args',
+    'swig_opts',
+    'export_symbols',
+    'depends',
+    'language',
+)
+
+
+def setup(**attrs):  # noqa: C901
+    """The gateway to the Distutils: do everything your setup script needs
+    to do, in a highly flexible and user-driven way.  Briefly: create a
+    Distribution instance; find and parse config files; parse the command
+    line; run each Distutils command found there, customized by the options
+    supplied to 'setup()' (as keyword arguments), in config files, and on
+    the command line.
+
+    The Distribution instance might be an instance of a class supplied via
+    the 'distclass' keyword argument to 'setup'; if no such class is
+    supplied, then the Distribution class (in dist.py) is instantiated.
+    All other arguments to 'setup' (except for 'cmdclass') are used to set
+    attributes of the Distribution instance.
+
+    The 'cmdclass' argument, if supplied, is a dictionary mapping command
+    names to command classes.  Each command encountered on the command line
+    will be turned into a command class, which is in turn instantiated; any
+    class found in 'cmdclass' is used in place of the default, which is
+    (for command 'foo_bar') class 'foo_bar' in module
+    'distutils.command.foo_bar'.  The command class must provide a
+    'user_options' attribute which is a list of option specifiers for
+    'distutils.fancy_getopt'.  Any command-line options between the current
+    and the next command are used to set attributes of the current command
+    object.
+
+    When the entire command-line has been successfully parsed, calls the
+    'run()' method on each command object in turn.  This method will be
+    driven entirely by the Distribution object (which each command object
+    has a reference to, thanks to its constructor), and the
+    command-specific options that became attributes of each command
+    object.
+    """
+
+    global _setup_stop_after, _setup_distribution
+
+    # Determine the distribution class -- either caller-supplied or
+    # our Distribution (see below).
+    klass = attrs.get('distclass')
+    if klass:
+        attrs.pop('distclass')
+    else:
+        klass = Distribution
+
+    if 'script_name' not in attrs:
+        attrs['script_name'] = os.path.basename(sys.argv[0])
+    if 'script_args' not in attrs:
+        attrs['script_args'] = sys.argv[1:]
+
+    # Create the Distribution instance, using the remaining arguments
+    # (ie. everything except distclass) to initialize it
+    try:
+        _setup_distribution = dist = klass(attrs)
+    except DistutilsSetupError as msg:
+        if 'name' not in attrs:
+            raise SystemExit(f"error in setup command: {msg}")
+        else:
+            raise SystemExit("error in {} setup command: {}".format(attrs['name'], msg))
+
+    if _setup_stop_after == "init":
+        return dist
+
+    # Find and parse the config file(s): they will override options from
+    # the setup script, but be overridden by the command line.
+    dist.parse_config_files()
+
+    if DEBUG:
+        print("options (after parsing config files):")
+        dist.dump_option_dicts()
+
+    if _setup_stop_after == "config":
+        return dist
+
+    # Parse the command line and override config files; any
+    # command-line errors are the end user's fault, so turn them into
+    # SystemExit to suppress tracebacks.
+    try:
+        ok = dist.parse_command_line()
+    except DistutilsArgError as msg:
+        raise SystemExit(gen_usage(dist.script_name) + f"\nerror: {msg}")
+
+    if DEBUG:
+        print("options (after parsing command line):")
+        dist.dump_option_dicts()
+
+    if _setup_stop_after == "commandline":
+        return dist
+
+    # And finally, run all the commands found on the command line.
+    if ok:
+        return run_commands(dist)
+
+    return dist
+
+
+# setup ()
+
+
+def run_commands(dist):
+    """Given a Distribution object run all the commands,
+    raising ``SystemExit`` errors in the case of failure.
+
+    This function assumes that either ``sys.argv`` or ``dist.script_args``
+    is already set accordingly.
+    """
+    try:
+        dist.run_commands()
+    except KeyboardInterrupt:
+        raise SystemExit("interrupted")
+    except OSError as exc:
+        if DEBUG:
+            sys.stderr.write(f"error: {exc}\n")
+            raise
+        else:
+            raise SystemExit(f"error: {exc}")
+
+    except (DistutilsError, CCompilerError) as msg:
+        if DEBUG:
+            raise
+        else:
+            raise SystemExit("error: " + str(msg))
+
+    return dist
+
+
+def run_setup(script_name, script_args=None, stop_after="run"):
+    """Run a setup script in a somewhat controlled environment, and
+    return the Distribution instance that drives things.  This is useful
+    if you need to find out the distribution meta-data (passed as
+    keyword args from 'script' to 'setup()', or the contents of the
+    config files or command-line.
+
+    'script_name' is a file that will be read and run with 'exec()';
+    'sys.argv[0]' will be replaced with 'script' for the duration of the
+    call.  'script_args' is a list of strings; if supplied,
+    'sys.argv[1:]' will be replaced by 'script_args' for the duration of
+    the call.
+
+    'stop_after' tells 'setup()' when to stop processing; possible
+    values:
+      init
+        stop after the Distribution instance has been created and
+        populated with the keyword arguments to 'setup()'
+      config
+        stop after config files have been parsed (and their data
+        stored in the Distribution instance)
+      commandline
+        stop after the command-line ('sys.argv[1:]' or 'script_args')
+        have been parsed (and the data stored in the Distribution)
+      run [default]
+        stop after all commands have been run (the same as if 'setup()'
+        had been called in the usual way
+
+    Returns the Distribution instance, which provides all information
+    used to drive the Distutils.
+    """
+    if stop_after not in ('init', 'config', 'commandline', 'run'):
+        raise ValueError(f"invalid value for 'stop_after': {stop_after!r}")
+
+    global _setup_stop_after, _setup_distribution
+    _setup_stop_after = stop_after
+
+    save_argv = sys.argv.copy()
+    g = {'__file__': script_name, '__name__': '__main__'}
+    try:
+        try:
+            sys.argv[0] = script_name
+            if script_args is not None:
+                sys.argv[1:] = script_args
+            # tokenize.open supports automatic encoding detection
+            with tokenize.open(script_name) as f:
+                code = f.read().replace(r'\r\n', r'\n')
+                exec(code, g)
+        finally:
+            sys.argv = save_argv
+            _setup_stop_after = None
+    except SystemExit:
+        # Hmm, should we do something if exiting with a non-zero code
+        # (ie. error)?
+        pass
+
+    if _setup_distribution is None:
+        raise RuntimeError(
+            "'distutils.core.setup()' was never called -- "
+            f"perhaps '{script_name}' is not a Distutils setup script?"
+        )
+
+    # I wonder if the setup script's namespace -- g and l -- would be of
+    # any interest to callers?
+    # print "_setup_distribution:", _setup_distribution
+    return _setup_distribution
+
+
+# run_setup ()
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/cygwinccompiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/cygwinccompiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b812fd0555e077b1f09cdc9ca2f4c7e2cdb8007
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/cygwinccompiler.py
@@ -0,0 +1,355 @@
+"""distutils.cygwinccompiler
+
+Provides the CygwinCCompiler class, a subclass of UnixCCompiler that
+handles the Cygwin port of the GNU C compiler to Windows.  It also contains
+the Mingw32CCompiler class which handles the mingw32 port of GCC (same as
+cygwin in no-cygwin mode).
+"""
+
+import copy
+import os
+import pathlib
+import re
+import shlex
+import sys
+import warnings
+from subprocess import check_output
+
+from ._collections import RangeMap
+from .errors import (
+    CCompilerError,
+    CompileError,
+    DistutilsExecError,
+    DistutilsPlatformError,
+)
+from .file_util import write_file
+from .unixccompiler import UnixCCompiler
+from .version import LooseVersion, suppress_known_deprecation
+
+_msvcr_lookup = RangeMap.left(
+    {
+        # MSVC 7.0
+        1300: ['msvcr70'],
+        # MSVC 7.1
+        1310: ['msvcr71'],
+        # VS2005 / MSVC 8.0
+        1400: ['msvcr80'],
+        # VS2008 / MSVC 9.0
+        1500: ['msvcr90'],
+        # VS2010 / MSVC 10.0
+        1600: ['msvcr100'],
+        # VS2012 / MSVC 11.0
+        1700: ['msvcr110'],
+        # VS2013 / MSVC 12.0
+        1800: ['msvcr120'],
+        # VS2015 / MSVC 14.0
+        1900: ['vcruntime140'],
+        2000: RangeMap.undefined_value,
+    },
+)
+
+
+def get_msvcr():
+    """Include the appropriate MSVC runtime library if Python was built
+    with MSVC 7.0 or later.
+    """
+    match = re.search(r'MSC v\.(\d{4})', sys.version)
+    try:
+        msc_ver = int(match.group(1))
+    except AttributeError:
+        return []
+    try:
+        return _msvcr_lookup[msc_ver]
+    except KeyError:
+        raise ValueError(f"Unknown MS Compiler version {msc_ver} ")
+
+
+_runtime_library_dirs_msg = (
+    "Unable to set runtime library search path on Windows, "
+    "usually indicated by `runtime_library_dirs` parameter to Extension"
+)
+
+
+class CygwinCCompiler(UnixCCompiler):
+    """Handles the Cygwin port of the GNU C compiler to Windows."""
+
+    compiler_type = 'cygwin'
+    obj_extension = ".o"
+    static_lib_extension = ".a"
+    shared_lib_extension = ".dll.a"
+    dylib_lib_extension = ".dll"
+    static_lib_format = "lib%s%s"
+    shared_lib_format = "lib%s%s"
+    dylib_lib_format = "cyg%s%s"
+    exe_extension = ".exe"
+
+    def __init__(self, verbose=False, dry_run=False, force=False):
+        super().__init__(verbose, dry_run, force)
+
+        status, details = check_config_h()
+        self.debug_print(f"Python's GCC status: {status} (details: {details})")
+        if status is not CONFIG_H_OK:
+            self.warn(
+                "Python's pyconfig.h doesn't seem to support your compiler. "
+                f"Reason: {details}. "
+                "Compiling may fail because of undefined preprocessor macros."
+            )
+
+        self.cc = os.environ.get('CC', 'gcc')
+        self.cxx = os.environ.get('CXX', 'g++')
+
+        self.linker_dll = self.cc
+        shared_option = "-shared"
+
+        self.set_executables(
+            compiler=f'{self.cc} -mcygwin -O -Wall',
+            compiler_so=f'{self.cc} -mcygwin -mdll -O -Wall',
+            compiler_cxx=f'{self.cxx} -mcygwin -O -Wall',
+            linker_exe=f'{self.cc} -mcygwin',
+            linker_so=(f'{self.linker_dll} -mcygwin {shared_option}'),
+        )
+
+        # Include the appropriate MSVC runtime library if Python was built
+        # with MSVC 7.0 or later.
+        self.dll_libraries = get_msvcr()
+
+    @property
+    def gcc_version(self):
+        # Older numpy depended on this existing to check for ancient
+        # gcc versions. This doesn't make much sense with clang etc so
+        # just hardcode to something recent.
+        # https://github.com/numpy/numpy/pull/20333
+        warnings.warn(
+            "gcc_version attribute of CygwinCCompiler is deprecated. "
+            "Instead of returning actual gcc version a fixed value 11.2.0 is returned.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        with suppress_known_deprecation():
+            return LooseVersion("11.2.0")
+
+    def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
+        """Compiles the source by spawning GCC and windres if needed."""
+        if ext in ('.rc', '.res'):
+            # gcc needs '.res' and '.rc' compiled to object files !!!
+            try:
+                self.spawn(["windres", "-i", src, "-o", obj])
+            except DistutilsExecError as msg:
+                raise CompileError(msg)
+        else:  # for other files use the C-compiler
+            try:
+                self.spawn(
+                    self.compiler_so + cc_args + [src, '-o', obj] + extra_postargs
+                )
+            except DistutilsExecError as msg:
+                raise CompileError(msg)
+
+    def link(
+        self,
+        target_desc,
+        objects,
+        output_filename,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        """Link the objects."""
+        # use separate copies, so we can modify the lists
+        extra_preargs = copy.copy(extra_preargs or [])
+        libraries = copy.copy(libraries or [])
+        objects = copy.copy(objects or [])
+
+        if runtime_library_dirs:
+            self.warn(_runtime_library_dirs_msg)
+
+        # Additional libraries
+        libraries.extend(self.dll_libraries)
+
+        # handle export symbols by creating a def-file
+        # with executables this only works with gcc/ld as linker
+        if (export_symbols is not None) and (
+            target_desc != self.EXECUTABLE or self.linker_dll == "gcc"
+        ):
+            # (The linker doesn't do anything if output is up-to-date.
+            # So it would probably better to check if we really need this,
+            # but for this we had to insert some unchanged parts of
+            # UnixCCompiler, and this is not what we want.)
+
+            # we want to put some files in the same directory as the
+            # object files are, build_temp doesn't help much
+            # where are the object files
+            temp_dir = os.path.dirname(objects[0])
+            # name of dll to give the helper files the same base name
+            (dll_name, dll_extension) = os.path.splitext(
+                os.path.basename(output_filename)
+            )
+
+            # generate the filenames for these files
+            def_file = os.path.join(temp_dir, dll_name + ".def")
+
+            # Generate .def file
+            contents = [f"LIBRARY {os.path.basename(output_filename)}", "EXPORTS"]
+            for sym in export_symbols:
+                contents.append(sym)
+            self.execute(write_file, (def_file, contents), f"writing {def_file}")
+
+            # next add options for def-file
+
+            # for gcc/ld the def-file is specified as any object files
+            objects.append(def_file)
+
+        # end: if ((export_symbols is not None) and
+        #        (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")):
+
+        # who wants symbols and a many times larger output file
+        # should explicitly switch the debug mode on
+        # otherwise we let ld strip the output file
+        # (On my machine: 10KiB < stripped_file < ??100KiB
+        #   unstripped_file = stripped_file + XXX KiB
+        #  ( XXX=254 for a typical python extension))
+        if not debug:
+            extra_preargs.append("-s")
+
+        UnixCCompiler.link(
+            self,
+            target_desc,
+            objects,
+            output_filename,
+            output_dir,
+            libraries,
+            library_dirs,
+            runtime_library_dirs,
+            None,  # export_symbols, we do this in our def-file
+            debug,
+            extra_preargs,
+            extra_postargs,
+            build_temp,
+            target_lang,
+        )
+
+    def runtime_library_dir_option(self, dir):
+        # cygwin doesn't support rpath. While in theory we could error
+        # out like MSVC does, code might expect it to work like on Unix, so
+        # just warn and hope for the best.
+        self.warn(_runtime_library_dirs_msg)
+        return []
+
+    # -- Miscellaneous methods -----------------------------------------
+
+    def _make_out_path(self, output_dir, strip_dir, src_name):
+        # use normcase to make sure '.rc' is really '.rc' and not '.RC'
+        norm_src_name = os.path.normcase(src_name)
+        return super()._make_out_path(output_dir, strip_dir, norm_src_name)
+
+    @property
+    def out_extensions(self):
+        """
+        Add support for rc and res files.
+        """
+        return {
+            **super().out_extensions,
+            **{ext: ext + self.obj_extension for ext in ('.res', '.rc')},
+        }
+
+
+# the same as cygwin plus some additional parameters
+class Mingw32CCompiler(CygwinCCompiler):
+    """Handles the Mingw32 port of the GNU C compiler to Windows."""
+
+    compiler_type = 'mingw32'
+
+    def __init__(self, verbose=False, dry_run=False, force=False):
+        super().__init__(verbose, dry_run, force)
+
+        shared_option = "-shared"
+
+        if is_cygwincc(self.cc):
+            raise CCompilerError('Cygwin gcc cannot be used with --compiler=mingw32')
+
+        self.set_executables(
+            compiler=f'{self.cc} -O -Wall',
+            compiler_so=f'{self.cc} -shared -O -Wall',
+            compiler_cxx=f'{self.cxx} -O -Wall',
+            linker_exe=f'{self.cc}',
+            linker_so=f'{self.linker_dll} {shared_option}',
+        )
+
+    def runtime_library_dir_option(self, dir):
+        raise DistutilsPlatformError(_runtime_library_dirs_msg)
+
+
+# Because these compilers aren't configured in Python's pyconfig.h file by
+# default, we should at least warn the user if he is using an unmodified
+# version.
+
+CONFIG_H_OK = "ok"
+CONFIG_H_NOTOK = "not ok"
+CONFIG_H_UNCERTAIN = "uncertain"
+
+
+def check_config_h():
+    """Check if the current Python installation appears amenable to building
+    extensions with GCC.
+
+    Returns a tuple (status, details), where 'status' is one of the following
+    constants:
+
+    - CONFIG_H_OK: all is well, go ahead and compile
+    - CONFIG_H_NOTOK: doesn't look good
+    - CONFIG_H_UNCERTAIN: not sure -- unable to read pyconfig.h
+
+    'details' is a human-readable string explaining the situation.
+
+    Note there are two ways to conclude "OK": either 'sys.version' contains
+    the string "GCC" (implying that this Python was built with GCC), or the
+    installed "pyconfig.h" contains the string "__GNUC__".
+    """
+
+    # XXX since this function also checks sys.version, it's not strictly a
+    # "pyconfig.h" check -- should probably be renamed...
+
+    from distutils import sysconfig
+
+    # if sys.version contains GCC then python was compiled with GCC, and the
+    # pyconfig.h file should be OK
+    if "GCC" in sys.version:
+        return CONFIG_H_OK, "sys.version mentions 'GCC'"
+
+    # Clang would also work
+    if "Clang" in sys.version:
+        return CONFIG_H_OK, "sys.version mentions 'Clang'"
+
+    # let's see if __GNUC__ is mentioned in python.h
+    fn = sysconfig.get_config_h_filename()
+    try:
+        config_h = pathlib.Path(fn).read_text(encoding='utf-8')
+        substring = '__GNUC__'
+        if substring in config_h:
+            code = CONFIG_H_OK
+            mention_inflected = 'mentions'
+        else:
+            code = CONFIG_H_NOTOK
+            mention_inflected = 'does not mention'
+        return code, f"{fn!r} {mention_inflected} {substring!r}"
+    except OSError as exc:
+        return (CONFIG_H_UNCERTAIN, f"couldn't read '{fn}': {exc.strerror}")
+
+
+def is_cygwincc(cc):
+    """Try to determine if the compiler that would be used is from cygwin."""
+    out_string = check_output(shlex.split(cc) + ['-dumpmachine'])
+    return out_string.strip().endswith(b'cygwin')
+
+
+get_versions = None
+"""
+A stand-in for the previous get_versions() function to prevent failures
+when monkeypatched. See pypa/setuptools#2969.
+"""
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/debug.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/debug.py
new file mode 100644
index 0000000000000000000000000000000000000000..daf1660f0d821143e388d37532a39ddfd2ca0347
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/debug.py
@@ -0,0 +1,5 @@
+import os
+
+# If DISTUTILS_DEBUG is anything other than the empty string, we run in
+# debug mode.
+DEBUG = os.environ.get('DISTUTILS_DEBUG')
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dep_util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dep_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..09a8a2e126c8bb009dbbe61c5c4bd5e358744996
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dep_util.py
@@ -0,0 +1,14 @@
+import warnings
+
+from . import _modified
+
+
+def __getattr__(name):
+    if name not in ['newer', 'newer_group', 'newer_pairwise']:
+        raise AttributeError(name)
+    warnings.warn(
+        "dep_util is Deprecated. Use functions from setuptools instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return getattr(_modified, name)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dir_util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dir_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..724afeff6f2d0206420c7e68ef1be6a6b5c490f7
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dir_util.py
@@ -0,0 +1,238 @@
+"""distutils.dir_util
+
+Utility functions for manipulating directories and directory trees."""
+
+import errno
+import os
+
+from ._log import log
+from .errors import DistutilsFileError, DistutilsInternalError
+
+# cache for by mkpath() -- in addition to cheapening redundant calls,
+# eliminates redundant "creating /foo/bar/baz" messages in dry-run mode
+_path_created = set()
+
+
+def mkpath(name, mode=0o777, verbose=True, dry_run=False):  # noqa: C901
+    """Create a directory and any missing ancestor directories.
+
+    If the directory already exists (or if 'name' is the empty string, which
+    means the current directory, which of course exists), then do nothing.
+    Raise DistutilsFileError if unable to create some directory along the way
+    (eg. some sub-path exists, but is a file rather than a directory).
+    If 'verbose' is true, print a one-line summary of each mkdir to stdout.
+    Return the list of directories actually created.
+
+    os.makedirs is not used because:
+
+    a) It's new to Python 1.5.2, and
+    b) it blows up if the directory already exists (in which case it should
+       silently succeed).
+    """
+
+    global _path_created
+
+    # Detect a common bug -- name is None
+    if not isinstance(name, str):
+        raise DistutilsInternalError(f"mkpath: 'name' must be a string (got {name!r})")
+
+    # XXX what's the better way to handle verbosity? print as we create
+    # each directory in the path (the current behaviour), or only announce
+    # the creation of the whole path? (quite easy to do the latter since
+    # we're not using a recursive algorithm)
+
+    name = os.path.normpath(name)
+    created_dirs = []
+    if os.path.isdir(name) or name == '':
+        return created_dirs
+    if os.path.abspath(name) in _path_created:
+        return created_dirs
+
+    (head, tail) = os.path.split(name)
+    tails = [tail]  # stack of lone dirs to create
+
+    while head and tail and not os.path.isdir(head):
+        (head, tail) = os.path.split(head)
+        tails.insert(0, tail)  # push next higher dir onto stack
+
+    # now 'head' contains the deepest directory that already exists
+    # (that is, the child of 'head' in 'name' is the highest directory
+    # that does *not* exist)
+    for d in tails:
+        # print "head = %s, d = %s: " % (head, d),
+        head = os.path.join(head, d)
+        abs_head = os.path.abspath(head)
+
+        if abs_head in _path_created:
+            continue
+
+        if verbose >= 1:
+            log.info("creating %s", head)
+
+        if not dry_run:
+            try:
+                os.mkdir(head, mode)
+            except OSError as exc:
+                if not (exc.errno == errno.EEXIST and os.path.isdir(head)):
+                    raise DistutilsFileError(
+                        f"could not create '{head}': {exc.args[-1]}"
+                    )
+            created_dirs.append(head)
+
+        _path_created.add(abs_head)
+    return created_dirs
+
+
+def create_tree(base_dir, files, mode=0o777, verbose=True, dry_run=False):
+    """Create all the empty directories under 'base_dir' needed to put 'files'
+    there.
+
+    'base_dir' is just the name of a directory which doesn't necessarily
+    exist yet; 'files' is a list of filenames to be interpreted relative to
+    'base_dir'.  'base_dir' + the directory portion of every file in 'files'
+    will be created if it doesn't already exist.  'mode', 'verbose' and
+    'dry_run' flags are as for 'mkpath()'.
+    """
+    # First get the list of directories to create
+    need_dir = set(os.path.join(base_dir, os.path.dirname(file)) for file in files)
+
+    # Now create them
+    for dir in sorted(need_dir):
+        mkpath(dir, mode, verbose=verbose, dry_run=dry_run)
+
+
+def copy_tree(  # noqa: C901
+    src,
+    dst,
+    preserve_mode=True,
+    preserve_times=True,
+    preserve_symlinks=False,
+    update=False,
+    verbose=True,
+    dry_run=False,
+):
+    """Copy an entire directory tree 'src' to a new location 'dst'.
+
+    Both 'src' and 'dst' must be directory names.  If 'src' is not a
+    directory, raise DistutilsFileError.  If 'dst' does not exist, it is
+    created with 'mkpath()'.  The end result of the copy is that every
+    file in 'src' is copied to 'dst', and directories under 'src' are
+    recursively copied to 'dst'.  Return the list of files that were
+    copied or might have been copied, using their output name.  The
+    return value is unaffected by 'update' or 'dry_run': it is simply
+    the list of all files under 'src', with the names changed to be
+    under 'dst'.
+
+    'preserve_mode' and 'preserve_times' are the same as for
+    'copy_file'; note that they only apply to regular files, not to
+    directories.  If 'preserve_symlinks' is true, symlinks will be
+    copied as symlinks (on platforms that support them!); otherwise
+    (the default), the destination of the symlink will be copied.
+    'update' and 'verbose' are the same as for 'copy_file'.
+    """
+    from distutils.file_util import copy_file
+
+    if not dry_run and not os.path.isdir(src):
+        raise DistutilsFileError(f"cannot copy tree '{src}': not a directory")
+    try:
+        names = os.listdir(src)
+    except OSError as e:
+        if dry_run:
+            names = []
+        else:
+            raise DistutilsFileError(f"error listing files in '{src}': {e.strerror}")
+
+    if not dry_run:
+        mkpath(dst, verbose=verbose)
+
+    outputs = []
+
+    for n in names:
+        src_name = os.path.join(src, n)
+        dst_name = os.path.join(dst, n)
+
+        if n.startswith('.nfs'):
+            # skip NFS rename files
+            continue
+
+        if preserve_symlinks and os.path.islink(src_name):
+            link_dest = os.readlink(src_name)
+            if verbose >= 1:
+                log.info("linking %s -> %s", dst_name, link_dest)
+            if not dry_run:
+                os.symlink(link_dest, dst_name)
+            outputs.append(dst_name)
+
+        elif os.path.isdir(src_name):
+            outputs.extend(
+                copy_tree(
+                    src_name,
+                    dst_name,
+                    preserve_mode,
+                    preserve_times,
+                    preserve_symlinks,
+                    update,
+                    verbose=verbose,
+                    dry_run=dry_run,
+                )
+            )
+        else:
+            copy_file(
+                src_name,
+                dst_name,
+                preserve_mode,
+                preserve_times,
+                update,
+                verbose=verbose,
+                dry_run=dry_run,
+            )
+            outputs.append(dst_name)
+
+    return outputs
+
+
+def _build_cmdtuple(path, cmdtuples):
+    """Helper for remove_tree()."""
+    for f in os.listdir(path):
+        real_f = os.path.join(path, f)
+        if os.path.isdir(real_f) and not os.path.islink(real_f):
+            _build_cmdtuple(real_f, cmdtuples)
+        else:
+            cmdtuples.append((os.remove, real_f))
+    cmdtuples.append((os.rmdir, path))
+
+
+def remove_tree(directory, verbose=True, dry_run=False):
+    """Recursively remove an entire directory tree.
+
+    Any errors are ignored (apart from being reported to stdout if 'verbose'
+    is true).
+    """
+    global _path_created
+
+    if verbose >= 1:
+        log.info("removing '%s' (and everything under it)", directory)
+    if dry_run:
+        return
+    cmdtuples = []
+    _build_cmdtuple(directory, cmdtuples)
+    for cmd in cmdtuples:
+        try:
+            cmd[0](cmd[1])
+            # remove dir from cache if it's already there
+            abspath = os.path.abspath(cmd[1])
+            if abspath in _path_created:
+                _path_created.remove(abspath)
+        except OSError as exc:
+            log.warning("error removing %s: %s", directory, exc)
+
+
+def ensure_relative(path):
+    """Take the full path 'path', and make it a relative path.
+
+    This is useful to make 'path' the second argument to os.path.join().
+    """
+    drive, path = os.path.splitdrive(path)
+    if path[0:1] == os.sep:
+        path = drive + path[1:]
+    return path
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dist.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dist.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7d4ca8fc8b9fb7c0bfbc35e7fcefa2b28bd0b2f
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/dist.py
@@ -0,0 +1,1307 @@
+"""distutils.dist
+
+Provides the Distribution class, which represents the module distribution
+being built/installed/distributed.
+"""
+
+import contextlib
+import logging
+import os
+import pathlib
+import re
+import sys
+from collections.abc import Iterable
+from email import message_from_file
+
+from ._vendor.packaging.utils import canonicalize_name, canonicalize_version
+
+try:
+    import warnings
+except ImportError:
+    warnings = None
+
+from ._log import log
+from .debug import DEBUG
+from .errors import (
+    DistutilsArgError,
+    DistutilsClassError,
+    DistutilsModuleError,
+    DistutilsOptionError,
+)
+from .fancy_getopt import FancyGetopt, translate_longopt
+from .util import check_environ, rfc822_escape, strtobool
+
+# Regex to define acceptable Distutils command names.  This is not *quite*
+# the same as a Python NAME -- I don't allow leading underscores.  The fact
+# that they're very similar is no coincidence; the default naming scheme is
+# to look for a Python module named after the command.
+command_re = re.compile(r'^[a-zA-Z]([a-zA-Z0-9_]*)$')
+
+
+def _ensure_list(value, fieldname):
+    if isinstance(value, str):
+        # a string containing comma separated values is okay.  It will
+        # be converted to a list by Distribution.finalize_options().
+        pass
+    elif not isinstance(value, list):
+        # passing a tuple or an iterator perhaps, warn and convert
+        typename = type(value).__name__
+        msg = "Warning: '{fieldname}' should be a list, got type '{typename}'"
+        msg = msg.format(**locals())
+        log.warning(msg)
+        value = list(value)
+    return value
+
+
+class Distribution:
+    """The core of the Distutils.  Most of the work hiding behind 'setup'
+    is really done within a Distribution instance, which farms the work out
+    to the Distutils commands specified on the command line.
+
+    Setup scripts will almost never instantiate Distribution directly,
+    unless the 'setup()' function is totally inadequate to their needs.
+    However, it is conceivable that a setup script might wish to subclass
+    Distribution for some specialized purpose, and then pass the subclass
+    to 'setup()' as the 'distclass' keyword argument.  If so, it is
+    necessary to respect the expectations that 'setup' has of Distribution.
+    See the code for 'setup()', in core.py, for details.
+    """
+
+    # 'global_options' describes the command-line options that may be
+    # supplied to the setup script prior to any actual commands.
+    # Eg. "./setup.py -n" or "./setup.py --quiet" both take advantage of
+    # these global options.  This list should be kept to a bare minimum,
+    # since every global option is also valid as a command option -- and we
+    # don't want to pollute the commands with too many options that they
+    # have minimal control over.
+    # The fourth entry for verbose means that it can be repeated.
+    global_options = [
+        ('verbose', 'v', "run verbosely (default)", 1),
+        ('quiet', 'q', "run quietly (turns verbosity off)"),
+        ('dry-run', 'n', "don't actually do anything"),
+        ('help', 'h', "show detailed help message"),
+        ('no-user-cfg', None, 'ignore pydistutils.cfg in your home directory'),
+    ]
+
+    # 'common_usage' is a short (2-3 line) string describing the common
+    # usage of the setup script.
+    common_usage = """\
+Common commands: (see '--help-commands' for more)
+
+  setup.py build      will build the package underneath 'build/'
+  setup.py install    will install the package
+"""
+
+    # options that are not propagated to the commands
+    display_options = [
+        ('help-commands', None, "list all available commands"),
+        ('name', None, "print package name"),
+        ('version', 'V', "print package version"),
+        ('fullname', None, "print -"),
+        ('author', None, "print the author's name"),
+        ('author-email', None, "print the author's email address"),
+        ('maintainer', None, "print the maintainer's name"),
+        ('maintainer-email', None, "print the maintainer's email address"),
+        ('contact', None, "print the maintainer's name if known, else the author's"),
+        (
+            'contact-email',
+            None,
+            "print the maintainer's email address if known, else the author's",
+        ),
+        ('url', None, "print the URL for this package"),
+        ('license', None, "print the license of the package"),
+        ('licence', None, "alias for --license"),
+        ('description', None, "print the package description"),
+        ('long-description', None, "print the long package description"),
+        ('platforms', None, "print the list of platforms"),
+        ('classifiers', None, "print the list of classifiers"),
+        ('keywords', None, "print the list of keywords"),
+        ('provides', None, "print the list of packages/modules provided"),
+        ('requires', None, "print the list of packages/modules required"),
+        ('obsoletes', None, "print the list of packages/modules made obsolete"),
+    ]
+    display_option_names = [translate_longopt(x[0]) for x in display_options]
+
+    # negative options are options that exclude other options
+    negative_opt = {'quiet': 'verbose'}
+
+    # -- Creation/initialization methods -------------------------------
+
+    def __init__(self, attrs=None):  # noqa: C901
+        """Construct a new Distribution instance: initialize all the
+        attributes of a Distribution, and then use 'attrs' (a dictionary
+        mapping attribute names to values) to assign some of those
+        attributes their "real" values.  (Any attributes not mentioned in
+        'attrs' will be assigned to some null value: 0, None, an empty list
+        or dictionary, etc.)  Most importantly, initialize the
+        'command_obj' attribute to the empty dictionary; this will be
+        filled in with real command objects by 'parse_command_line()'.
+        """
+
+        # Default values for our command-line options
+        self.verbose = True
+        self.dry_run = False
+        self.help = False
+        for attr in self.display_option_names:
+            setattr(self, attr, 0)
+
+        # Store the distribution meta-data (name, version, author, and so
+        # forth) in a separate object -- we're getting to have enough
+        # information here (and enough command-line options) that it's
+        # worth it.  Also delegate 'get_XXX()' methods to the 'metadata'
+        # object in a sneaky and underhanded (but efficient!) way.
+        self.metadata = DistributionMetadata()
+        for basename in self.metadata._METHOD_BASENAMES:
+            method_name = "get_" + basename
+            setattr(self, method_name, getattr(self.metadata, method_name))
+
+        # 'cmdclass' maps command names to class objects, so we
+        # can 1) quickly figure out which class to instantiate when
+        # we need to create a new command object, and 2) have a way
+        # for the setup script to override command classes
+        self.cmdclass = {}
+
+        # 'command_packages' is a list of packages in which commands
+        # are searched for.  The factory for command 'foo' is expected
+        # to be named 'foo' in the module 'foo' in one of the packages
+        # named here.  This list is searched from the left; an error
+        # is raised if no named package provides the command being
+        # searched for.  (Always access using get_command_packages().)
+        self.command_packages = None
+
+        # 'script_name' and 'script_args' are usually set to sys.argv[0]
+        # and sys.argv[1:], but they can be overridden when the caller is
+        # not necessarily a setup script run from the command-line.
+        self.script_name = None
+        self.script_args = None
+
+        # 'command_options' is where we store command options between
+        # parsing them (from config files, the command-line, etc.) and when
+        # they are actually needed -- ie. when the command in question is
+        # instantiated.  It is a dictionary of dictionaries of 2-tuples:
+        #   command_options = { command_name : { option : (source, value) } }
+        self.command_options = {}
+
+        # 'dist_files' is the list of (command, pyversion, file) that
+        # have been created by any dist commands run so far. This is
+        # filled regardless of whether the run is dry or not. pyversion
+        # gives sysconfig.get_python_version() if the dist file is
+        # specific to a Python version, 'any' if it is good for all
+        # Python versions on the target platform, and '' for a source
+        # file. pyversion should not be used to specify minimum or
+        # maximum required Python versions; use the metainfo for that
+        # instead.
+        self.dist_files = []
+
+        # These options are really the business of various commands, rather
+        # than of the Distribution itself.  We provide aliases for them in
+        # Distribution as a convenience to the developer.
+        self.packages = None
+        self.package_data = {}
+        self.package_dir = None
+        self.py_modules = None
+        self.libraries = None
+        self.headers = None
+        self.ext_modules = None
+        self.ext_package = None
+        self.include_dirs = None
+        self.extra_path = None
+        self.scripts = None
+        self.data_files = None
+        self.password = ''
+
+        # And now initialize bookkeeping stuff that can't be supplied by
+        # the caller at all.  'command_obj' maps command names to
+        # Command instances -- that's how we enforce that every command
+        # class is a singleton.
+        self.command_obj = {}
+
+        # 'have_run' maps command names to boolean values; it keeps track
+        # of whether we have actually run a particular command, to make it
+        # cheap to "run" a command whenever we think we might need to -- if
+        # it's already been done, no need for expensive filesystem
+        # operations, we just check the 'have_run' dictionary and carry on.
+        # It's only safe to query 'have_run' for a command class that has
+        # been instantiated -- a false value will be inserted when the
+        # command object is created, and replaced with a true value when
+        # the command is successfully run.  Thus it's probably best to use
+        # '.get()' rather than a straight lookup.
+        self.have_run = {}
+
+        # Now we'll use the attrs dictionary (ultimately, keyword args from
+        # the setup script) to possibly override any or all of these
+        # distribution options.
+
+        if attrs:
+            # Pull out the set of command options and work on them
+            # specifically.  Note that this order guarantees that aliased
+            # command options will override any supplied redundantly
+            # through the general options dictionary.
+            options = attrs.get('options')
+            if options is not None:
+                del attrs['options']
+                for command, cmd_options in options.items():
+                    opt_dict = self.get_option_dict(command)
+                    for opt, val in cmd_options.items():
+                        opt_dict[opt] = ("setup script", val)
+
+            if 'licence' in attrs:
+                attrs['license'] = attrs['licence']
+                del attrs['licence']
+                msg = "'licence' distribution option is deprecated; use 'license'"
+                if warnings is not None:
+                    warnings.warn(msg)
+                else:
+                    sys.stderr.write(msg + "\n")
+
+            # Now work on the rest of the attributes.  Any attribute that's
+            # not already defined is invalid!
+            for key, val in attrs.items():
+                if hasattr(self.metadata, "set_" + key):
+                    getattr(self.metadata, "set_" + key)(val)
+                elif hasattr(self.metadata, key):
+                    setattr(self.metadata, key, val)
+                elif hasattr(self, key):
+                    setattr(self, key, val)
+                else:
+                    msg = f"Unknown distribution option: {key!r}"
+                    warnings.warn(msg)
+
+        # no-user-cfg is handled before other command line args
+        # because other args override the config files, and this
+        # one is needed before we can load the config files.
+        # If attrs['script_args'] wasn't passed, assume false.
+        #
+        # This also make sure we just look at the global options
+        self.want_user_cfg = True
+
+        if self.script_args is not None:
+            for arg in self.script_args:
+                if not arg.startswith('-'):
+                    break
+                if arg == '--no-user-cfg':
+                    self.want_user_cfg = False
+                    break
+
+        self.finalize_options()
+
+    def get_option_dict(self, command):
+        """Get the option dictionary for a given command.  If that
+        command's option dictionary hasn't been created yet, then create it
+        and return the new dictionary; otherwise, return the existing
+        option dictionary.
+        """
+        dict = self.command_options.get(command)
+        if dict is None:
+            dict = self.command_options[command] = {}
+        return dict
+
+    def dump_option_dicts(self, header=None, commands=None, indent=""):
+        from pprint import pformat
+
+        if commands is None:  # dump all command option dicts
+            commands = sorted(self.command_options.keys())
+
+        if header is not None:
+            self.announce(indent + header)
+            indent = indent + "  "
+
+        if not commands:
+            self.announce(indent + "no commands known yet")
+            return
+
+        for cmd_name in commands:
+            opt_dict = self.command_options.get(cmd_name)
+            if opt_dict is None:
+                self.announce(indent + f"no option dict for '{cmd_name}' command")
+            else:
+                self.announce(indent + f"option dict for '{cmd_name}' command:")
+                out = pformat(opt_dict)
+                for line in out.split('\n'):
+                    self.announce(indent + "  " + line)
+
+    # -- Config file finding/parsing methods ---------------------------
+
+    def find_config_files(self):
+        """Find as many configuration files as should be processed for this
+        platform, and return a list of filenames in the order in which they
+        should be parsed.  The filenames returned are guaranteed to exist
+        (modulo nasty race conditions).
+
+        There are multiple possible config files:
+        - distutils.cfg in the Distutils installation directory (i.e.
+          where the top-level Distutils __inst__.py file lives)
+        - a file in the user's home directory named .pydistutils.cfg
+          on Unix and pydistutils.cfg on Windows/Mac; may be disabled
+          with the ``--no-user-cfg`` option
+        - setup.cfg in the current directory
+        - a file named by an environment variable
+        """
+        check_environ()
+        files = [str(path) for path in self._gen_paths() if os.path.isfile(path)]
+
+        if DEBUG:
+            self.announce("using config files: {}".format(', '.join(files)))
+
+        return files
+
+    def _gen_paths(self):
+        # The system-wide Distutils config file
+        sys_dir = pathlib.Path(sys.modules['distutils'].__file__).parent
+        yield sys_dir / "distutils.cfg"
+
+        # The per-user config file
+        prefix = '.' * (os.name == 'posix')
+        filename = prefix + 'pydistutils.cfg'
+        if self.want_user_cfg:
+            yield pathlib.Path('~').expanduser() / filename
+
+        # All platforms support local setup.cfg
+        yield pathlib.Path('setup.cfg')
+
+        # Additional config indicated in the environment
+        with contextlib.suppress(TypeError):
+            yield pathlib.Path(os.getenv("DIST_EXTRA_CONFIG"))
+
+    def parse_config_files(self, filenames=None):  # noqa: C901
+        from configparser import ConfigParser
+
+        # Ignore install directory options if we have a venv
+        if sys.prefix != sys.base_prefix:
+            ignore_options = [
+                'install-base',
+                'install-platbase',
+                'install-lib',
+                'install-platlib',
+                'install-purelib',
+                'install-headers',
+                'install-scripts',
+                'install-data',
+                'prefix',
+                'exec-prefix',
+                'home',
+                'user',
+                'root',
+            ]
+        else:
+            ignore_options = []
+
+        ignore_options = frozenset(ignore_options)
+
+        if filenames is None:
+            filenames = self.find_config_files()
+
+        if DEBUG:
+            self.announce("Distribution.parse_config_files():")
+
+        parser = ConfigParser()
+        for filename in filenames:
+            if DEBUG:
+                self.announce(f"  reading {filename}")
+            parser.read(filename, encoding='utf-8')
+            for section in parser.sections():
+                options = parser.options(section)
+                opt_dict = self.get_option_dict(section)
+
+                for opt in options:
+                    if opt != '__name__' and opt not in ignore_options:
+                        val = parser.get(section, opt)
+                        opt = opt.replace('-', '_')
+                        opt_dict[opt] = (filename, val)
+
+            # Make the ConfigParser forget everything (so we retain
+            # the original filenames that options come from)
+            parser.__init__()
+
+        # If there was a "global" section in the config file, use it
+        # to set Distribution options.
+
+        if 'global' in self.command_options:
+            for opt, (_src, val) in self.command_options['global'].items():
+                alias = self.negative_opt.get(opt)
+                try:
+                    if alias:
+                        setattr(self, alias, not strtobool(val))
+                    elif opt in ('verbose', 'dry_run'):  # ugh!
+                        setattr(self, opt, strtobool(val))
+                    else:
+                        setattr(self, opt, val)
+                except ValueError as msg:
+                    raise DistutilsOptionError(msg)
+
+    # -- Command-line parsing methods ----------------------------------
+
+    def parse_command_line(self):
+        """Parse the setup script's command line, taken from the
+        'script_args' instance attribute (which defaults to 'sys.argv[1:]'
+        -- see 'setup()' in core.py).  This list is first processed for
+        "global options" -- options that set attributes of the Distribution
+        instance.  Then, it is alternately scanned for Distutils commands
+        and options for that command.  Each new command terminates the
+        options for the previous command.  The allowed options for a
+        command are determined by the 'user_options' attribute of the
+        command class -- thus, we have to be able to load command classes
+        in order to parse the command line.  Any error in that 'options'
+        attribute raises DistutilsGetoptError; any error on the
+        command-line raises DistutilsArgError.  If no Distutils commands
+        were found on the command line, raises DistutilsArgError.  Return
+        true if command-line was successfully parsed and we should carry
+        on with executing commands; false if no errors but we shouldn't
+        execute commands (currently, this only happens if user asks for
+        help).
+        """
+        #
+        # We now have enough information to show the Macintosh dialog
+        # that allows the user to interactively specify the "command line".
+        #
+        toplevel_options = self._get_toplevel_options()
+
+        # We have to parse the command line a bit at a time -- global
+        # options, then the first command, then its options, and so on --
+        # because each command will be handled by a different class, and
+        # the options that are valid for a particular class aren't known
+        # until we have loaded the command class, which doesn't happen
+        # until we know what the command is.
+
+        self.commands = []
+        parser = FancyGetopt(toplevel_options + self.display_options)
+        parser.set_negative_aliases(self.negative_opt)
+        parser.set_aliases({'licence': 'license'})
+        args = parser.getopt(args=self.script_args, object=self)
+        option_order = parser.get_option_order()
+        logging.getLogger().setLevel(logging.WARN - 10 * self.verbose)
+
+        # for display options we return immediately
+        if self.handle_display_options(option_order):
+            return
+        while args:
+            args = self._parse_command_opts(parser, args)
+            if args is None:  # user asked for help (and got it)
+                return
+
+        # Handle the cases of --help as a "global" option, ie.
+        # "setup.py --help" and "setup.py --help command ...".  For the
+        # former, we show global options (--verbose, --dry-run, etc.)
+        # and display-only options (--name, --version, etc.); for the
+        # latter, we omit the display-only options and show help for
+        # each command listed on the command line.
+        if self.help:
+            self._show_help(
+                parser, display_options=len(self.commands) == 0, commands=self.commands
+            )
+            return
+
+        # Oops, no commands found -- an end-user error
+        if not self.commands:
+            raise DistutilsArgError("no commands supplied")
+
+        # All is well: return true
+        return True
+
+    def _get_toplevel_options(self):
+        """Return the non-display options recognized at the top level.
+
+        This includes options that are recognized *only* at the top
+        level as well as options recognized for commands.
+        """
+        return self.global_options + [
+            (
+                "command-packages=",
+                None,
+                "list of packages that provide distutils commands",
+            ),
+        ]
+
+    def _parse_command_opts(self, parser, args):  # noqa: C901
+        """Parse the command-line options for a single command.
+        'parser' must be a FancyGetopt instance; 'args' must be the list
+        of arguments, starting with the current command (whose options
+        we are about to parse).  Returns a new version of 'args' with
+        the next command at the front of the list; will be the empty
+        list if there are no more commands on the command line.  Returns
+        None if the user asked for help on this command.
+        """
+        # late import because of mutual dependence between these modules
+        from distutils.cmd import Command
+
+        # Pull the current command from the head of the command line
+        command = args[0]
+        if not command_re.match(command):
+            raise SystemExit(f"invalid command name '{command}'")
+        self.commands.append(command)
+
+        # Dig up the command class that implements this command, so we
+        # 1) know that it's a valid command, and 2) know which options
+        # it takes.
+        try:
+            cmd_class = self.get_command_class(command)
+        except DistutilsModuleError as msg:
+            raise DistutilsArgError(msg)
+
+        # Require that the command class be derived from Command -- want
+        # to be sure that the basic "command" interface is implemented.
+        if not issubclass(cmd_class, Command):
+            raise DistutilsClassError(
+                f"command class {cmd_class} must subclass Command"
+            )
+
+        # Also make sure that the command object provides a list of its
+        # known options.
+        if not (
+            hasattr(cmd_class, 'user_options')
+            and isinstance(cmd_class.user_options, list)
+        ):
+            msg = (
+                "command class %s must provide "
+                "'user_options' attribute (a list of tuples)"
+            )
+            raise DistutilsClassError(msg % cmd_class)
+
+        # If the command class has a list of negative alias options,
+        # merge it in with the global negative aliases.
+        negative_opt = self.negative_opt
+        if hasattr(cmd_class, 'negative_opt'):
+            negative_opt = negative_opt.copy()
+            negative_opt.update(cmd_class.negative_opt)
+
+        # Check for help_options in command class.  They have a different
+        # format (tuple of four) so we need to preprocess them here.
+        if hasattr(cmd_class, 'help_options') and isinstance(
+            cmd_class.help_options, list
+        ):
+            help_options = fix_help_options(cmd_class.help_options)
+        else:
+            help_options = []
+
+        # All commands support the global options too, just by adding
+        # in 'global_options'.
+        parser.set_option_table(
+            self.global_options + cmd_class.user_options + help_options
+        )
+        parser.set_negative_aliases(negative_opt)
+        (args, opts) = parser.getopt(args[1:])
+        if hasattr(opts, 'help') and opts.help:
+            self._show_help(parser, display_options=False, commands=[cmd_class])
+            return
+
+        if hasattr(cmd_class, 'help_options') and isinstance(
+            cmd_class.help_options, list
+        ):
+            help_option_found = 0
+            for help_option, _short, _desc, func in cmd_class.help_options:
+                if hasattr(opts, parser.get_attr_name(help_option)):
+                    help_option_found = 1
+                    if callable(func):
+                        func()
+                    else:
+                        raise DistutilsClassError(
+                            f"invalid help function {func!r} for help option '{help_option}': "
+                            "must be a callable object (function, etc.)"
+                        )
+
+            if help_option_found:
+                return
+
+        # Put the options from the command-line into their official
+        # holding pen, the 'command_options' dictionary.
+        opt_dict = self.get_option_dict(command)
+        for name, value in vars(opts).items():
+            opt_dict[name] = ("command line", value)
+
+        return args
+
+    def finalize_options(self):
+        """Set final values for all the options on the Distribution
+        instance, analogous to the .finalize_options() method of Command
+        objects.
+        """
+        for attr in ('keywords', 'platforms'):
+            value = getattr(self.metadata, attr)
+            if value is None:
+                continue
+            if isinstance(value, str):
+                value = [elm.strip() for elm in value.split(',')]
+                setattr(self.metadata, attr, value)
+
+    def _show_help(
+        self, parser, global_options=True, display_options=True, commands: Iterable = ()
+    ):
+        """Show help for the setup script command-line in the form of
+        several lists of command-line options.  'parser' should be a
+        FancyGetopt instance; do not expect it to be returned in the
+        same state, as its option table will be reset to make it
+        generate the correct help text.
+
+        If 'global_options' is true, lists the global options:
+        --verbose, --dry-run, etc.  If 'display_options' is true, lists
+        the "display-only" options: --name, --version, etc.  Finally,
+        lists per-command help for every command name or command class
+        in 'commands'.
+        """
+        # late import because of mutual dependence between these modules
+        from distutils.cmd import Command
+        from distutils.core import gen_usage
+
+        if global_options:
+            if display_options:
+                options = self._get_toplevel_options()
+            else:
+                options = self.global_options
+            parser.set_option_table(options)
+            parser.print_help(self.common_usage + "\nGlobal options:")
+            print()
+
+        if display_options:
+            parser.set_option_table(self.display_options)
+            parser.print_help(
+                "Information display options (just display information, ignore any commands)"
+            )
+            print()
+
+        for command in self.commands:
+            if isinstance(command, type) and issubclass(command, Command):
+                klass = command
+            else:
+                klass = self.get_command_class(command)
+            if hasattr(klass, 'help_options') and isinstance(klass.help_options, list):
+                parser.set_option_table(
+                    klass.user_options + fix_help_options(klass.help_options)
+                )
+            else:
+                parser.set_option_table(klass.user_options)
+            parser.print_help(f"Options for '{klass.__name__}' command:")
+            print()
+
+        print(gen_usage(self.script_name))
+
+    def handle_display_options(self, option_order):
+        """If there were any non-global "display-only" options
+        (--help-commands or the metadata display options) on the command
+        line, display the requested info and return true; else return
+        false.
+        """
+        from distutils.core import gen_usage
+
+        # User just wants a list of commands -- we'll print it out and stop
+        # processing now (ie. if they ran "setup --help-commands foo bar",
+        # we ignore "foo bar").
+        if self.help_commands:
+            self.print_commands()
+            print()
+            print(gen_usage(self.script_name))
+            return 1
+
+        # If user supplied any of the "display metadata" options, then
+        # display that metadata in the order in which the user supplied the
+        # metadata options.
+        any_display_options = 0
+        is_display_option = set()
+        for option in self.display_options:
+            is_display_option.add(option[0])
+
+        for opt, val in option_order:
+            if val and opt in is_display_option:
+                opt = translate_longopt(opt)
+                value = getattr(self.metadata, "get_" + opt)()
+                if opt in ('keywords', 'platforms'):
+                    print(','.join(value))
+                elif opt in ('classifiers', 'provides', 'requires', 'obsoletes'):
+                    print('\n'.join(value))
+                else:
+                    print(value)
+                any_display_options = 1
+
+        return any_display_options
+
+    def print_command_list(self, commands, header, max_length):
+        """Print a subset of the list of all commands -- used by
+        'print_commands()'.
+        """
+        print(header + ":")
+
+        for cmd in commands:
+            klass = self.cmdclass.get(cmd)
+            if not klass:
+                klass = self.get_command_class(cmd)
+            try:
+                description = klass.description
+            except AttributeError:
+                description = "(no description available)"
+
+            print("  %-*s  %s" % (max_length, cmd, description))
+
+    def print_commands(self):
+        """Print out a help message listing all available commands with a
+        description of each.  The list is divided into "standard commands"
+        (listed in distutils.command.__all__) and "extra commands"
+        (mentioned in self.cmdclass, but not a standard command).  The
+        descriptions come from the command class attribute
+        'description'.
+        """
+        import distutils.command
+
+        std_commands = distutils.command.__all__
+        is_std = set()
+        for cmd in std_commands:
+            is_std.add(cmd)
+
+        extra_commands = []
+        for cmd in self.cmdclass.keys():
+            if cmd not in is_std:
+                extra_commands.append(cmd)
+
+        max_length = 0
+        for cmd in std_commands + extra_commands:
+            if len(cmd) > max_length:
+                max_length = len(cmd)
+
+        self.print_command_list(std_commands, "Standard commands", max_length)
+        if extra_commands:
+            print()
+            self.print_command_list(extra_commands, "Extra commands", max_length)
+
+    def get_command_list(self):
+        """Get a list of (command, description) tuples.
+        The list is divided into "standard commands" (listed in
+        distutils.command.__all__) and "extra commands" (mentioned in
+        self.cmdclass, but not a standard command).  The descriptions come
+        from the command class attribute 'description'.
+        """
+        # Currently this is only used on Mac OS, for the Mac-only GUI
+        # Distutils interface (by Jack Jansen)
+        import distutils.command
+
+        std_commands = distutils.command.__all__
+        is_std = set()
+        for cmd in std_commands:
+            is_std.add(cmd)
+
+        extra_commands = []
+        for cmd in self.cmdclass.keys():
+            if cmd not in is_std:
+                extra_commands.append(cmd)
+
+        rv = []
+        for cmd in std_commands + extra_commands:
+            klass = self.cmdclass.get(cmd)
+            if not klass:
+                klass = self.get_command_class(cmd)
+            try:
+                description = klass.description
+            except AttributeError:
+                description = "(no description available)"
+            rv.append((cmd, description))
+        return rv
+
+    # -- Command class/object methods ----------------------------------
+
+    def get_command_packages(self):
+        """Return a list of packages from which commands are loaded."""
+        pkgs = self.command_packages
+        if not isinstance(pkgs, list):
+            if pkgs is None:
+                pkgs = ''
+            pkgs = [pkg.strip() for pkg in pkgs.split(',') if pkg != '']
+            if "distutils.command" not in pkgs:
+                pkgs.insert(0, "distutils.command")
+            self.command_packages = pkgs
+        return pkgs
+
+    def get_command_class(self, command):
+        """Return the class that implements the Distutils command named by
+        'command'.  First we check the 'cmdclass' dictionary; if the
+        command is mentioned there, we fetch the class object from the
+        dictionary and return it.  Otherwise we load the command module
+        ("distutils.command." + command) and fetch the command class from
+        the module.  The loaded class is also stored in 'cmdclass'
+        to speed future calls to 'get_command_class()'.
+
+        Raises DistutilsModuleError if the expected module could not be
+        found, or if that module does not define the expected class.
+        """
+        klass = self.cmdclass.get(command)
+        if klass:
+            return klass
+
+        for pkgname in self.get_command_packages():
+            module_name = f"{pkgname}.{command}"
+            klass_name = command
+
+            try:
+                __import__(module_name)
+                module = sys.modules[module_name]
+            except ImportError:
+                continue
+
+            try:
+                klass = getattr(module, klass_name)
+            except AttributeError:
+                raise DistutilsModuleError(
+                    f"invalid command '{command}' (no class '{klass_name}' in module '{module_name}')"
+                )
+
+            self.cmdclass[command] = klass
+            return klass
+
+        raise DistutilsModuleError(f"invalid command '{command}'")
+
+    def get_command_obj(self, command, create=True):
+        """Return the command object for 'command'.  Normally this object
+        is cached on a previous call to 'get_command_obj()'; if no command
+        object for 'command' is in the cache, then we either create and
+        return it (if 'create' is true) or return None.
+        """
+        cmd_obj = self.command_obj.get(command)
+        if not cmd_obj and create:
+            if DEBUG:
+                self.announce(
+                    "Distribution.get_command_obj(): "
+                    f"creating '{command}' command object"
+                )
+
+            klass = self.get_command_class(command)
+            cmd_obj = self.command_obj[command] = klass(self)
+            self.have_run[command] = False
+
+            # Set any options that were supplied in config files
+            # or on the command line.  (NB. support for error
+            # reporting is lame here: any errors aren't reported
+            # until 'finalize_options()' is called, which means
+            # we won't report the source of the error.)
+            options = self.command_options.get(command)
+            if options:
+                self._set_command_options(cmd_obj, options)
+
+        return cmd_obj
+
+    def _set_command_options(self, command_obj, option_dict=None):  # noqa: C901
+        """Set the options for 'command_obj' from 'option_dict'.  Basically
+        this means copying elements of a dictionary ('option_dict') to
+        attributes of an instance ('command').
+
+        'command_obj' must be a Command instance.  If 'option_dict' is not
+        supplied, uses the standard option dictionary for this command
+        (from 'self.command_options').
+        """
+        command_name = command_obj.get_command_name()
+        if option_dict is None:
+            option_dict = self.get_option_dict(command_name)
+
+        if DEBUG:
+            self.announce(f"  setting options for '{command_name}' command:")
+        for option, (source, value) in option_dict.items():
+            if DEBUG:
+                self.announce(f"    {option} = {value} (from {source})")
+            try:
+                bool_opts = [translate_longopt(o) for o in command_obj.boolean_options]
+            except AttributeError:
+                bool_opts = []
+            try:
+                neg_opt = command_obj.negative_opt
+            except AttributeError:
+                neg_opt = {}
+
+            try:
+                is_string = isinstance(value, str)
+                if option in neg_opt and is_string:
+                    setattr(command_obj, neg_opt[option], not strtobool(value))
+                elif option in bool_opts and is_string:
+                    setattr(command_obj, option, strtobool(value))
+                elif hasattr(command_obj, option):
+                    setattr(command_obj, option, value)
+                else:
+                    raise DistutilsOptionError(
+                        f"error in {source}: command '{command_name}' has no such option '{option}'"
+                    )
+            except ValueError as msg:
+                raise DistutilsOptionError(msg)
+
+    def reinitialize_command(self, command, reinit_subcommands=False):
+        """Reinitializes a command to the state it was in when first
+        returned by 'get_command_obj()': ie., initialized but not yet
+        finalized.  This provides the opportunity to sneak option
+        values in programmatically, overriding or supplementing
+        user-supplied values from the config files and command line.
+        You'll have to re-finalize the command object (by calling
+        'finalize_options()' or 'ensure_finalized()') before using it for
+        real.
+
+        'command' should be a command name (string) or command object.  If
+        'reinit_subcommands' is true, also reinitializes the command's
+        sub-commands, as declared by the 'sub_commands' class attribute (if
+        it has one).  See the "install" command for an example.  Only
+        reinitializes the sub-commands that actually matter, ie. those
+        whose test predicates return true.
+
+        Returns the reinitialized command object.
+        """
+        from distutils.cmd import Command
+
+        if not isinstance(command, Command):
+            command_name = command
+            command = self.get_command_obj(command_name)
+        else:
+            command_name = command.get_command_name()
+
+        if not command.finalized:
+            return command
+        command.initialize_options()
+        command.finalized = False
+        self.have_run[command_name] = False
+        self._set_command_options(command)
+
+        if reinit_subcommands:
+            for sub in command.get_sub_commands():
+                self.reinitialize_command(sub, reinit_subcommands)
+
+        return command
+
+    # -- Methods that operate on the Distribution ----------------------
+
+    def announce(self, msg, level=logging.INFO):
+        log.log(level, msg)
+
+    def run_commands(self):
+        """Run each command that was seen on the setup script command line.
+        Uses the list of commands found and cache of command objects
+        created by 'get_command_obj()'.
+        """
+        for cmd in self.commands:
+            self.run_command(cmd)
+
+    # -- Methods that operate on its Commands --------------------------
+
+    def run_command(self, command):
+        """Do whatever it takes to run a command (including nothing at all,
+        if the command has already been run).  Specifically: if we have
+        already created and run the command named by 'command', return
+        silently without doing anything.  If the command named by 'command'
+        doesn't even have a command object yet, create one.  Then invoke
+        'run()' on that command object (or an existing one).
+        """
+        # Already been here, done that? then return silently.
+        if self.have_run.get(command):
+            return
+
+        log.info("running %s", command)
+        cmd_obj = self.get_command_obj(command)
+        cmd_obj.ensure_finalized()
+        cmd_obj.run()
+        self.have_run[command] = True
+
+    # -- Distribution query methods ------------------------------------
+
+    def has_pure_modules(self):
+        return len(self.packages or self.py_modules or []) > 0
+
+    def has_ext_modules(self):
+        return self.ext_modules and len(self.ext_modules) > 0
+
+    def has_c_libraries(self):
+        return self.libraries and len(self.libraries) > 0
+
+    def has_modules(self):
+        return self.has_pure_modules() or self.has_ext_modules()
+
+    def has_headers(self):
+        return self.headers and len(self.headers) > 0
+
+    def has_scripts(self):
+        return self.scripts and len(self.scripts) > 0
+
+    def has_data_files(self):
+        return self.data_files and len(self.data_files) > 0
+
+    def is_pure(self):
+        return (
+            self.has_pure_modules()
+            and not self.has_ext_modules()
+            and not self.has_c_libraries()
+        )
+
+    # -- Metadata query methods ----------------------------------------
+
+    # If you're looking for 'get_name()', 'get_version()', and so forth,
+    # they are defined in a sneaky way: the constructor binds self.get_XXX
+    # to self.metadata.get_XXX.  The actual code is in the
+    # DistributionMetadata class, below.
+
+
+class DistributionMetadata:
+    """Dummy class to hold the distribution meta-data: name, version,
+    author, and so forth.
+    """
+
+    _METHOD_BASENAMES = (
+        "name",
+        "version",
+        "author",
+        "author_email",
+        "maintainer",
+        "maintainer_email",
+        "url",
+        "license",
+        "description",
+        "long_description",
+        "keywords",
+        "platforms",
+        "fullname",
+        "contact",
+        "contact_email",
+        "classifiers",
+        "download_url",
+        # PEP 314
+        "provides",
+        "requires",
+        "obsoletes",
+    )
+
+    def __init__(self, path=None):
+        if path is not None:
+            self.read_pkg_file(open(path))
+        else:
+            self.name = None
+            self.version = None
+            self.author = None
+            self.author_email = None
+            self.maintainer = None
+            self.maintainer_email = None
+            self.url = None
+            self.license = None
+            self.description = None
+            self.long_description = None
+            self.keywords = None
+            self.platforms = None
+            self.classifiers = None
+            self.download_url = None
+            # PEP 314
+            self.provides = None
+            self.requires = None
+            self.obsoletes = None
+
+    def read_pkg_file(self, file):
+        """Reads the metadata values from a file object."""
+        msg = message_from_file(file)
+
+        def _read_field(name):
+            value = msg[name]
+            if value and value != "UNKNOWN":
+                return value
+
+        def _read_list(name):
+            values = msg.get_all(name, None)
+            if values == []:
+                return None
+            return values
+
+        metadata_version = msg['metadata-version']
+        self.name = _read_field('name')
+        self.version = _read_field('version')
+        self.description = _read_field('summary')
+        # we are filling author only.
+        self.author = _read_field('author')
+        self.maintainer = None
+        self.author_email = _read_field('author-email')
+        self.maintainer_email = None
+        self.url = _read_field('home-page')
+        self.license = _read_field('license')
+
+        if 'download-url' in msg:
+            self.download_url = _read_field('download-url')
+        else:
+            self.download_url = None
+
+        self.long_description = _read_field('description')
+        self.description = _read_field('summary')
+
+        if 'keywords' in msg:
+            self.keywords = _read_field('keywords').split(',')
+
+        self.platforms = _read_list('platform')
+        self.classifiers = _read_list('classifier')
+
+        # PEP 314 - these fields only exist in 1.1
+        if metadata_version == '1.1':
+            self.requires = _read_list('requires')
+            self.provides = _read_list('provides')
+            self.obsoletes = _read_list('obsoletes')
+        else:
+            self.requires = None
+            self.provides = None
+            self.obsoletes = None
+
+    def write_pkg_info(self, base_dir):
+        """Write the PKG-INFO file into the release tree."""
+        with open(
+            os.path.join(base_dir, 'PKG-INFO'), 'w', encoding='UTF-8'
+        ) as pkg_info:
+            self.write_pkg_file(pkg_info)
+
+    def write_pkg_file(self, file):
+        """Write the PKG-INFO format data to a file object."""
+        version = '1.0'
+        if (
+            self.provides
+            or self.requires
+            or self.obsoletes
+            or self.classifiers
+            or self.download_url
+        ):
+            version = '1.1'
+
+        # required fields
+        file.write(f'Metadata-Version: {version}\n')
+        file.write(f'Name: {self.get_name()}\n')
+        file.write(f'Version: {self.get_version()}\n')
+
+        def maybe_write(header, val):
+            if val:
+                file.write(f"{header}: {val}\n")
+
+        # optional fields
+        maybe_write("Summary", self.get_description())
+        maybe_write("Home-page", self.get_url())
+        maybe_write("Author", self.get_contact())
+        maybe_write("Author-email", self.get_contact_email())
+        maybe_write("License", self.get_license())
+        maybe_write("Download-URL", self.download_url)
+        maybe_write("Description", rfc822_escape(self.get_long_description() or ""))
+        maybe_write("Keywords", ",".join(self.get_keywords()))
+
+        self._write_list(file, 'Platform', self.get_platforms())
+        self._write_list(file, 'Classifier', self.get_classifiers())
+
+        # PEP 314
+        self._write_list(file, 'Requires', self.get_requires())
+        self._write_list(file, 'Provides', self.get_provides())
+        self._write_list(file, 'Obsoletes', self.get_obsoletes())
+
+    def _write_list(self, file, name, values):
+        values = values or []
+        for value in values:
+            file.write(f'{name}: {value}\n')
+
+    # -- Metadata query methods ----------------------------------------
+
+    def get_name(self):
+        return self.name or "UNKNOWN"
+
+    def get_version(self):
+        return self.version or "0.0.0"
+
+    def get_fullname(self):
+        return self._fullname(self.get_name(), self.get_version())
+
+    @staticmethod
+    def _fullname(name: str, version: str) -> str:
+        """
+        >>> DistributionMetadata._fullname('setup.tools', '1.0-2')
+        'setup_tools-1.0.post2'
+        >>> DistributionMetadata._fullname('setup-tools', '1.2post2')
+        'setup_tools-1.2.post2'
+        >>> DistributionMetadata._fullname('setup-tools', '1.0-r2')
+        'setup_tools-1.0.post2'
+        >>> DistributionMetadata._fullname('setup.tools', '1.0.post')
+        'setup_tools-1.0.post0'
+        >>> DistributionMetadata._fullname('setup.tools', '1.0+ubuntu-1')
+        'setup_tools-1.0+ubuntu.1'
+        """
+        return "{}-{}".format(
+            canonicalize_name(name).replace('-', '_'),
+            canonicalize_version(version, strip_trailing_zero=False),
+        )
+
+    def get_author(self):
+        return self.author
+
+    def get_author_email(self):
+        return self.author_email
+
+    def get_maintainer(self):
+        return self.maintainer
+
+    def get_maintainer_email(self):
+        return self.maintainer_email
+
+    def get_contact(self):
+        return self.maintainer or self.author
+
+    def get_contact_email(self):
+        return self.maintainer_email or self.author_email
+
+    def get_url(self):
+        return self.url
+
+    def get_license(self):
+        return self.license
+
+    get_licence = get_license
+
+    def get_description(self):
+        return self.description
+
+    def get_long_description(self):
+        return self.long_description
+
+    def get_keywords(self):
+        return self.keywords or []
+
+    def set_keywords(self, value):
+        self.keywords = _ensure_list(value, 'keywords')
+
+    def get_platforms(self):
+        return self.platforms
+
+    def set_platforms(self, value):
+        self.platforms = _ensure_list(value, 'platforms')
+
+    def get_classifiers(self):
+        return self.classifiers or []
+
+    def set_classifiers(self, value):
+        self.classifiers = _ensure_list(value, 'classifiers')
+
+    def get_download_url(self):
+        return self.download_url
+
+    # PEP 314
+    def get_requires(self):
+        return self.requires or []
+
+    def set_requires(self, value):
+        import distutils.versionpredicate
+
+        for v in value:
+            distutils.versionpredicate.VersionPredicate(v)
+        self.requires = list(value)
+
+    def get_provides(self):
+        return self.provides or []
+
+    def set_provides(self, value):
+        value = [v.strip() for v in value]
+        for v in value:
+            import distutils.versionpredicate
+
+            distutils.versionpredicate.split_provision(v)
+        self.provides = value
+
+    def get_obsoletes(self):
+        return self.obsoletes or []
+
+    def set_obsoletes(self, value):
+        import distutils.versionpredicate
+
+        for v in value:
+            distutils.versionpredicate.VersionPredicate(v)
+        self.obsoletes = list(value)
+
+
+def fix_help_options(options):
+    """Convert a 4-tuple 'help_options' list as found in various command
+    classes to the 3-tuple form required by FancyGetopt.
+    """
+    new_options = []
+    for help_tuple in options:
+        new_options.append(help_tuple[0:3])
+    return new_options
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/errors.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/errors.py
new file mode 100644
index 0000000000000000000000000000000000000000..626254c321fb31033c54fed7ff57a0df5eaaa608
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/errors.py
@@ -0,0 +1,127 @@
+"""distutils.errors
+
+Provides exceptions used by the Distutils modules.  Note that Distutils
+modules may raise standard exceptions; in particular, SystemExit is
+usually raised for errors that are obviously the end-user's fault
+(eg. bad command-line arguments).
+
+This module is safe to use in "from ... import *" mode; it only exports
+symbols whose names start with "Distutils" and end with "Error"."""
+
+
+class DistutilsError(Exception):
+    """The root of all Distutils evil."""
+
+    pass
+
+
+class DistutilsModuleError(DistutilsError):
+    """Unable to load an expected module, or to find an expected class
+    within some module (in particular, command modules and classes)."""
+
+    pass
+
+
+class DistutilsClassError(DistutilsError):
+    """Some command class (or possibly distribution class, if anyone
+    feels a need to subclass Distribution) is found not to be holding
+    up its end of the bargain, ie. implementing some part of the
+    "command "interface."""
+
+    pass
+
+
+class DistutilsGetoptError(DistutilsError):
+    """The option table provided to 'fancy_getopt()' is bogus."""
+
+    pass
+
+
+class DistutilsArgError(DistutilsError):
+    """Raised by fancy_getopt in response to getopt.error -- ie. an
+    error in the command line usage."""
+
+    pass
+
+
+class DistutilsFileError(DistutilsError):
+    """Any problems in the filesystem: expected file not found, etc.
+    Typically this is for problems that we detect before OSError
+    could be raised."""
+
+    pass
+
+
+class DistutilsOptionError(DistutilsError):
+    """Syntactic/semantic errors in command options, such as use of
+    mutually conflicting options, or inconsistent options,
+    badly-spelled values, etc.  No distinction is made between option
+    values originating in the setup script, the command line, config
+    files, or what-have-you -- but if we *know* something originated in
+    the setup script, we'll raise DistutilsSetupError instead."""
+
+    pass
+
+
+class DistutilsSetupError(DistutilsError):
+    """For errors that can be definitely blamed on the setup script,
+    such as invalid keyword arguments to 'setup()'."""
+
+    pass
+
+
+class DistutilsPlatformError(DistutilsError):
+    """We don't know how to do something on the current platform (but
+    we do know how to do it on some platform) -- eg. trying to compile
+    C files on a platform not supported by a CCompiler subclass."""
+
+    pass
+
+
+class DistutilsExecError(DistutilsError):
+    """Any problems executing an external program (such as the C
+    compiler, when compiling C files)."""
+
+    pass
+
+
+class DistutilsInternalError(DistutilsError):
+    """Internal inconsistencies or impossibilities (obviously, this
+    should never be seen if the code is working!)."""
+
+    pass
+
+
+class DistutilsTemplateError(DistutilsError):
+    """Syntax error in a file list template."""
+
+
+class DistutilsByteCompileError(DistutilsError):
+    """Byte compile error."""
+
+
+# Exception classes used by the CCompiler implementation classes
+class CCompilerError(Exception):
+    """Some compile/link operation failed."""
+
+
+class PreprocessError(CCompilerError):
+    """Failure to preprocess one or more C/C++ files."""
+
+
+class CompileError(CCompilerError):
+    """Failure to compile one or more C/C++ source files."""
+
+
+class LibError(CCompilerError):
+    """Failure to create a static library from one or more C/C++ object
+    files."""
+
+
+class LinkError(CCompilerError):
+    """Failure to link one or more C/C++ object files into an executable
+    or shared library file."""
+
+
+class UnknownFileError(CCompilerError):
+    """Attempt to process an unknown file type."""
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/extension.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/extension.py
new file mode 100644
index 0000000000000000000000000000000000000000..04e871bcd659042791f88d2c3edd9dee25f19adf
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/extension.py
@@ -0,0 +1,242 @@
+"""distutils.extension
+
+Provides the Extension class, used to describe C/C++ extension
+modules in setup scripts."""
+
+import os
+import warnings
+
+# This class is really only used by the "build_ext" command, so it might
+# make sense to put it in distutils.command.build_ext.  However, that
+# module is already big enough, and I want to make this class a bit more
+# complex to simplify some common cases ("foo" module in "foo.c") and do
+# better error-checking ("foo.c" actually exists).
+#
+# Also, putting this in build_ext.py means every setup script would have to
+# import that large-ish module (indirectly, through distutils.core) in
+# order to do anything.
+
+
+class Extension:
+    """Just a collection of attributes that describes an extension
+    module and everything needed to build it (hopefully in a portable
+    way, but there are hooks that let you be as unportable as you need).
+
+    Instance attributes:
+      name : string
+        the full name of the extension, including any packages -- ie.
+        *not* a filename or pathname, but Python dotted name
+      sources : [string]
+        list of source filenames, relative to the distribution root
+        (where the setup script lives), in Unix form (slash-separated)
+        for portability.  Source files may be C, C++, SWIG (.i),
+        platform-specific resource files, or whatever else is recognized
+        by the "build_ext" command as source for a Python extension.
+      include_dirs : [string]
+        list of directories to search for C/C++ header files (in Unix
+        form for portability)
+      define_macros : [(name : string, value : string|None)]
+        list of macros to define; each macro is defined using a 2-tuple,
+        where 'value' is either the string to define it to or None to
+        define it without a particular value (equivalent of "#define
+        FOO" in source or -DFOO on Unix C compiler command line)
+      undef_macros : [string]
+        list of macros to undefine explicitly
+      library_dirs : [string]
+        list of directories to search for C/C++ libraries at link time
+      libraries : [string]
+        list of library names (not filenames or paths) to link against
+      runtime_library_dirs : [string]
+        list of directories to search for C/C++ libraries at run time
+        (for shared extensions, this is when the extension is loaded)
+      extra_objects : [string]
+        list of extra files to link with (eg. object files not implied
+        by 'sources', static library that must be explicitly specified,
+        binary resource files, etc.)
+      extra_compile_args : [string]
+        any extra platform- and compiler-specific information to use
+        when compiling the source files in 'sources'.  For platforms and
+        compilers where "command line" makes sense, this is typically a
+        list of command-line arguments, but for other platforms it could
+        be anything.
+      extra_link_args : [string]
+        any extra platform- and compiler-specific information to use
+        when linking object files together to create the extension (or
+        to create a new static Python interpreter).  Similar
+        interpretation as for 'extra_compile_args'.
+      export_symbols : [string]
+        list of symbols to be exported from a shared extension.  Not
+        used on all platforms, and not generally necessary for Python
+        extensions, which typically export exactly one symbol: "init" +
+        extension_name.
+      swig_opts : [string]
+        any extra options to pass to SWIG if a source file has the .i
+        extension.
+      depends : [string]
+        list of files that the extension depends on
+      language : string
+        extension language (i.e. "c", "c++", "objc"). Will be detected
+        from the source extensions if not provided.
+      optional : boolean
+        specifies that a build failure in the extension should not abort the
+        build process, but simply not install the failing extension.
+    """
+
+    # When adding arguments to this constructor, be sure to update
+    # setup_keywords in core.py.
+    def __init__(
+        self,
+        name,
+        sources,
+        include_dirs=None,
+        define_macros=None,
+        undef_macros=None,
+        library_dirs=None,
+        libraries=None,
+        runtime_library_dirs=None,
+        extra_objects=None,
+        extra_compile_args=None,
+        extra_link_args=None,
+        export_symbols=None,
+        swig_opts=None,
+        depends=None,
+        language=None,
+        optional=None,
+        **kw,  # To catch unknown keywords
+    ):
+        if not isinstance(name, str):
+            raise AssertionError("'name' must be a string")
+        if not (isinstance(sources, list) and all(isinstance(v, str) for v in sources)):
+            raise AssertionError("'sources' must be a list of strings")
+
+        self.name = name
+        self.sources = sources
+        self.include_dirs = include_dirs or []
+        self.define_macros = define_macros or []
+        self.undef_macros = undef_macros or []
+        self.library_dirs = library_dirs or []
+        self.libraries = libraries or []
+        self.runtime_library_dirs = runtime_library_dirs or []
+        self.extra_objects = extra_objects or []
+        self.extra_compile_args = extra_compile_args or []
+        self.extra_link_args = extra_link_args or []
+        self.export_symbols = export_symbols or []
+        self.swig_opts = swig_opts or []
+        self.depends = depends or []
+        self.language = language
+        self.optional = optional
+
+        # If there are unknown keyword options, warn about them
+        if len(kw) > 0:
+            options = [repr(option) for option in kw]
+            options = ', '.join(sorted(options))
+            msg = f"Unknown Extension options: {options}"
+            warnings.warn(msg)
+
+    def __repr__(self):
+        return f'<{self.__class__.__module__}.{self.__class__.__qualname__}({self.name!r}) at {id(self):#x}>'
+
+
+def read_setup_file(filename):  # noqa: C901
+    """Reads a Setup file and returns Extension instances."""
+    from distutils.sysconfig import _variable_rx, expand_makefile_vars, parse_makefile
+    from distutils.text_file import TextFile
+    from distutils.util import split_quoted
+
+    # First pass over the file to gather "VAR = VALUE" assignments.
+    vars = parse_makefile(filename)
+
+    # Second pass to gobble up the real content: lines of the form
+    #    ... [ ...] [ ...] [ ...]
+    file = TextFile(
+        filename,
+        strip_comments=True,
+        skip_blanks=True,
+        join_lines=True,
+        lstrip_ws=True,
+        rstrip_ws=True,
+    )
+    try:
+        extensions = []
+
+        while True:
+            line = file.readline()
+            if line is None:  # eof
+                break
+            if _variable_rx.match(line):  # VAR=VALUE, handled in first pass
+                continue
+
+            if line[0] == line[-1] == "*":
+                file.warn(f"'{line}' lines not handled yet")
+                continue
+
+            line = expand_makefile_vars(line, vars)
+            words = split_quoted(line)
+
+            # NB. this parses a slightly different syntax than the old
+            # makesetup script: here, there must be exactly one extension per
+            # line, and it must be the first word of the line.  I have no idea
+            # why the old syntax supported multiple extensions per line, as
+            # they all wind up being the same.
+
+            module = words[0]
+            ext = Extension(module, [])
+            append_next_word = None
+
+            for word in words[1:]:
+                if append_next_word is not None:
+                    append_next_word.append(word)
+                    append_next_word = None
+                    continue
+
+                suffix = os.path.splitext(word)[1]
+                switch = word[0:2]
+                value = word[2:]
+
+                if suffix in (".c", ".cc", ".cpp", ".cxx", ".c++", ".m", ".mm"):
+                    # hmm, should we do something about C vs. C++ sources?
+                    # or leave it up to the CCompiler implementation to
+                    # worry about?
+                    ext.sources.append(word)
+                elif switch == "-I":
+                    ext.include_dirs.append(value)
+                elif switch == "-D":
+                    equals = value.find("=")
+                    if equals == -1:  # bare "-DFOO" -- no value
+                        ext.define_macros.append((value, None))
+                    else:  # "-DFOO=blah"
+                        ext.define_macros.append((value[0:equals], value[equals + 2 :]))
+                elif switch == "-U":
+                    ext.undef_macros.append(value)
+                elif switch == "-C":  # only here 'cause makesetup has it!
+                    ext.extra_compile_args.append(word)
+                elif switch == "-l":
+                    ext.libraries.append(value)
+                elif switch == "-L":
+                    ext.library_dirs.append(value)
+                elif switch == "-R":
+                    ext.runtime_library_dirs.append(value)
+                elif word == "-rpath":
+                    append_next_word = ext.runtime_library_dirs
+                elif word == "-Xlinker":
+                    append_next_word = ext.extra_link_args
+                elif word == "-Xcompiler":
+                    append_next_word = ext.extra_compile_args
+                elif switch == "-u":
+                    ext.extra_link_args.append(word)
+                    if not value:
+                        append_next_word = ext.extra_link_args
+                elif suffix in (".a", ".so", ".sl", ".o", ".dylib"):
+                    # NB. a really faithful emulation of makesetup would
+                    # append a .o file to extra_objects only if it
+                    # had a slash in it; otherwise, it would s/.o/.c/
+                    # and append it to sources.  Hmmmm.
+                    ext.extra_objects.append(word)
+                else:
+                    file.warn(f"unrecognized argument '{word}'")
+
+            extensions.append(ext)
+    finally:
+        file.close()
+
+    return extensions
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/fancy_getopt.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/fancy_getopt.py
new file mode 100644
index 0000000000000000000000000000000000000000..907cc2b73c6cbec7a4cf0ebabd8d0a669f8540e5
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/fancy_getopt.py
@@ -0,0 +1,469 @@
+"""distutils.fancy_getopt
+
+Wrapper around the standard getopt module that provides the following
+additional features:
+  * short and long options are tied together
+  * options have help strings, so fancy_getopt could potentially
+    create a complete usage summary
+  * options set attributes of a passed-in object
+"""
+
+import getopt
+import re
+import string
+import sys
+from typing import Any, Sequence
+
+from .errors import DistutilsArgError, DistutilsGetoptError
+
+# Much like command_re in distutils.core, this is close to but not quite
+# the same as a Python NAME -- except, in the spirit of most GNU
+# utilities, we use '-' in place of '_'.  (The spirit of LISP lives on!)
+# The similarities to NAME are again not a coincidence...
+longopt_pat = r'[a-zA-Z](?:[a-zA-Z0-9-]*)'
+longopt_re = re.compile(rf'^{longopt_pat}$')
+
+# For recognizing "negative alias" options, eg. "quiet=!verbose"
+neg_alias_re = re.compile(f"^({longopt_pat})=!({longopt_pat})$")
+
+# This is used to translate long options to legitimate Python identifiers
+# (for use as attributes of some object).
+longopt_xlate = str.maketrans('-', '_')
+
+
+class FancyGetopt:
+    """Wrapper around the standard 'getopt()' module that provides some
+    handy extra functionality:
+      * short and long options are tied together
+      * options have help strings, and help text can be assembled
+        from them
+      * options set attributes of a passed-in object
+      * boolean options can have "negative aliases" -- eg. if
+        --quiet is the "negative alias" of --verbose, then "--quiet"
+        on the command line sets 'verbose' to false
+    """
+
+    def __init__(self, option_table=None):
+        # The option table is (currently) a list of tuples.  The
+        # tuples may have 3 or four values:
+        #   (long_option, short_option, help_string [, repeatable])
+        # if an option takes an argument, its long_option should have '='
+        # appended; short_option should just be a single character, no ':'
+        # in any case.  If a long_option doesn't have a corresponding
+        # short_option, short_option should be None.  All option tuples
+        # must have long options.
+        self.option_table = option_table
+
+        # 'option_index' maps long option names to entries in the option
+        # table (ie. those 3-tuples).
+        self.option_index = {}
+        if self.option_table:
+            self._build_index()
+
+        # 'alias' records (duh) alias options; {'foo': 'bar'} means
+        # --foo is an alias for --bar
+        self.alias = {}
+
+        # 'negative_alias' keeps track of options that are the boolean
+        # opposite of some other option
+        self.negative_alias = {}
+
+        # These keep track of the information in the option table.  We
+        # don't actually populate these structures until we're ready to
+        # parse the command-line, since the 'option_table' passed in here
+        # isn't necessarily the final word.
+        self.short_opts = []
+        self.long_opts = []
+        self.short2long = {}
+        self.attr_name = {}
+        self.takes_arg = {}
+
+        # And 'option_order' is filled up in 'getopt()'; it records the
+        # original order of options (and their values) on the command-line,
+        # but expands short options, converts aliases, etc.
+        self.option_order = []
+
+    def _build_index(self):
+        self.option_index.clear()
+        for option in self.option_table:
+            self.option_index[option[0]] = option
+
+    def set_option_table(self, option_table):
+        self.option_table = option_table
+        self._build_index()
+
+    def add_option(self, long_option, short_option=None, help_string=None):
+        if long_option in self.option_index:
+            raise DistutilsGetoptError(
+                f"option conflict: already an option '{long_option}'"
+            )
+        else:
+            option = (long_option, short_option, help_string)
+            self.option_table.append(option)
+            self.option_index[long_option] = option
+
+    def has_option(self, long_option):
+        """Return true if the option table for this parser has an
+        option with long name 'long_option'."""
+        return long_option in self.option_index
+
+    def get_attr_name(self, long_option):
+        """Translate long option name 'long_option' to the form it
+        has as an attribute of some object: ie., translate hyphens
+        to underscores."""
+        return long_option.translate(longopt_xlate)
+
+    def _check_alias_dict(self, aliases, what):
+        assert isinstance(aliases, dict)
+        for alias, opt in aliases.items():
+            if alias not in self.option_index:
+                raise DistutilsGetoptError(
+                    f"invalid {what} '{alias}': option '{alias}' not defined"
+                )
+            if opt not in self.option_index:
+                raise DistutilsGetoptError(
+                    f"invalid {what} '{alias}': aliased option '{opt}' not defined"
+                )
+
+    def set_aliases(self, alias):
+        """Set the aliases for this option parser."""
+        self._check_alias_dict(alias, "alias")
+        self.alias = alias
+
+    def set_negative_aliases(self, negative_alias):
+        """Set the negative aliases for this option parser.
+        'negative_alias' should be a dictionary mapping option names to
+        option names, both the key and value must already be defined
+        in the option table."""
+        self._check_alias_dict(negative_alias, "negative alias")
+        self.negative_alias = negative_alias
+
+    def _grok_option_table(self):  # noqa: C901
+        """Populate the various data structures that keep tabs on the
+        option table.  Called by 'getopt()' before it can do anything
+        worthwhile.
+        """
+        self.long_opts = []
+        self.short_opts = []
+        self.short2long.clear()
+        self.repeat = {}
+
+        for option in self.option_table:
+            if len(option) == 3:
+                long, short, help = option
+                repeat = 0
+            elif len(option) == 4:
+                long, short, help, repeat = option
+            else:
+                # the option table is part of the code, so simply
+                # assert that it is correct
+                raise ValueError(f"invalid option tuple: {option!r}")
+
+            # Type- and value-check the option names
+            if not isinstance(long, str) or len(long) < 2:
+                raise DistutilsGetoptError(
+                    f"invalid long option '{long}': must be a string of length >= 2"
+                )
+
+            if not ((short is None) or (isinstance(short, str) and len(short) == 1)):
+                raise DistutilsGetoptError(
+                    f"invalid short option '{short}': "
+                    "must a single character or None"
+                )
+
+            self.repeat[long] = repeat
+            self.long_opts.append(long)
+
+            if long[-1] == '=':  # option takes an argument?
+                if short:
+                    short = short + ':'
+                long = long[0:-1]
+                self.takes_arg[long] = True
+            else:
+                # Is option is a "negative alias" for some other option (eg.
+                # "quiet" == "!verbose")?
+                alias_to = self.negative_alias.get(long)
+                if alias_to is not None:
+                    if self.takes_arg[alias_to]:
+                        raise DistutilsGetoptError(
+                            f"invalid negative alias '{long}': "
+                            f"aliased option '{alias_to}' takes a value"
+                        )
+
+                    self.long_opts[-1] = long  # XXX redundant?!
+                self.takes_arg[long] = False
+
+            # If this is an alias option, make sure its "takes arg" flag is
+            # the same as the option it's aliased to.
+            alias_to = self.alias.get(long)
+            if alias_to is not None:
+                if self.takes_arg[long] != self.takes_arg[alias_to]:
+                    raise DistutilsGetoptError(
+                        f"invalid alias '{long}': inconsistent with "
+                        f"aliased option '{alias_to}' (one of them takes a value, "
+                        "the other doesn't"
+                    )
+
+            # Now enforce some bondage on the long option name, so we can
+            # later translate it to an attribute name on some object.  Have
+            # to do this a bit late to make sure we've removed any trailing
+            # '='.
+            if not longopt_re.match(long):
+                raise DistutilsGetoptError(
+                    f"invalid long option name '{long}' "
+                    "(must be letters, numbers, hyphens only"
+                )
+
+            self.attr_name[long] = self.get_attr_name(long)
+            if short:
+                self.short_opts.append(short)
+                self.short2long[short[0]] = long
+
+    def getopt(self, args=None, object=None):  # noqa: C901
+        """Parse command-line options in args. Store as attributes on object.
+
+        If 'args' is None or not supplied, uses 'sys.argv[1:]'.  If
+        'object' is None or not supplied, creates a new OptionDummy
+        object, stores option values there, and returns a tuple (args,
+        object).  If 'object' is supplied, it is modified in place and
+        'getopt()' just returns 'args'; in both cases, the returned
+        'args' is a modified copy of the passed-in 'args' list, which
+        is left untouched.
+        """
+        if args is None:
+            args = sys.argv[1:]
+        if object is None:
+            object = OptionDummy()
+            created_object = True
+        else:
+            created_object = False
+
+        self._grok_option_table()
+
+        short_opts = ' '.join(self.short_opts)
+        try:
+            opts, args = getopt.getopt(args, short_opts, self.long_opts)
+        except getopt.error as msg:
+            raise DistutilsArgError(msg)
+
+        for opt, val in opts:
+            if len(opt) == 2 and opt[0] == '-':  # it's a short option
+                opt = self.short2long[opt[1]]
+            else:
+                assert len(opt) > 2 and opt[:2] == '--'
+                opt = opt[2:]
+
+            alias = self.alias.get(opt)
+            if alias:
+                opt = alias
+
+            if not self.takes_arg[opt]:  # boolean option?
+                assert val == '', "boolean option can't have value"
+                alias = self.negative_alias.get(opt)
+                if alias:
+                    opt = alias
+                    val = 0
+                else:
+                    val = 1
+
+            attr = self.attr_name[opt]
+            # The only repeating option at the moment is 'verbose'.
+            # It has a negative option -q quiet, which should set verbose = False.
+            if val and self.repeat.get(attr) is not None:
+                val = getattr(object, attr, 0) + 1
+            setattr(object, attr, val)
+            self.option_order.append((opt, val))
+
+        # for opts
+        if created_object:
+            return args, object
+        else:
+            return args
+
+    def get_option_order(self):
+        """Returns the list of (option, value) tuples processed by the
+        previous run of 'getopt()'.  Raises RuntimeError if
+        'getopt()' hasn't been called yet.
+        """
+        if self.option_order is None:
+            raise RuntimeError("'getopt()' hasn't been called yet")
+        else:
+            return self.option_order
+
+    def generate_help(self, header=None):  # noqa: C901
+        """Generate help text (a list of strings, one per suggested line of
+        output) from the option table for this FancyGetopt object.
+        """
+        # Blithely assume the option table is good: probably wouldn't call
+        # 'generate_help()' unless you've already called 'getopt()'.
+
+        # First pass: determine maximum length of long option names
+        max_opt = 0
+        for option in self.option_table:
+            long = option[0]
+            short = option[1]
+            ell = len(long)
+            if long[-1] == '=':
+                ell = ell - 1
+            if short is not None:
+                ell = ell + 5  # " (-x)" where short == 'x'
+            if ell > max_opt:
+                max_opt = ell
+
+        opt_width = max_opt + 2 + 2 + 2  # room for indent + dashes + gutter
+
+        # Typical help block looks like this:
+        #   --foo       controls foonabulation
+        # Help block for longest option looks like this:
+        #   --flimflam  set the flim-flam level
+        # and with wrapped text:
+        #   --flimflam  set the flim-flam level (must be between
+        #               0 and 100, except on Tuesdays)
+        # Options with short names will have the short name shown (but
+        # it doesn't contribute to max_opt):
+        #   --foo (-f)  controls foonabulation
+        # If adding the short option would make the left column too wide,
+        # we push the explanation off to the next line
+        #   --flimflam (-l)
+        #               set the flim-flam level
+        # Important parameters:
+        #   - 2 spaces before option block start lines
+        #   - 2 dashes for each long option name
+        #   - min. 2 spaces between option and explanation (gutter)
+        #   - 5 characters (incl. space) for short option name
+
+        # Now generate lines of help text.  (If 80 columns were good enough
+        # for Jesus, then 78 columns are good enough for me!)
+        line_width = 78
+        text_width = line_width - opt_width
+        big_indent = ' ' * opt_width
+        if header:
+            lines = [header]
+        else:
+            lines = ['Option summary:']
+
+        for option in self.option_table:
+            long, short, help = option[:3]
+            text = wrap_text(help, text_width)
+            if long[-1] == '=':
+                long = long[0:-1]
+
+            # Case 1: no short option at all (makes life easy)
+            if short is None:
+                if text:
+                    lines.append("  --%-*s  %s" % (max_opt, long, text[0]))
+                else:
+                    lines.append("  --%-*s  " % (max_opt, long))
+
+            # Case 2: we have a short option, so we have to include it
+            # just after the long option
+            else:
+                opt_names = f"{long} (-{short})"
+                if text:
+                    lines.append("  --%-*s  %s" % (max_opt, opt_names, text[0]))
+                else:
+                    lines.append("  --%-*s" % opt_names)
+
+            for ell in text[1:]:
+                lines.append(big_indent + ell)
+        return lines
+
+    def print_help(self, header=None, file=None):
+        if file is None:
+            file = sys.stdout
+        for line in self.generate_help(header):
+            file.write(line + "\n")
+
+
+def fancy_getopt(options, negative_opt, object, args):
+    parser = FancyGetopt(options)
+    parser.set_negative_aliases(negative_opt)
+    return parser.getopt(args, object)
+
+
+WS_TRANS = {ord(_wschar): ' ' for _wschar in string.whitespace}
+
+
+def wrap_text(text, width):
+    """wrap_text(text : string, width : int) -> [string]
+
+    Split 'text' into multiple lines of no more than 'width' characters
+    each, and return the list of strings that results.
+    """
+    if text is None:
+        return []
+    if len(text) <= width:
+        return [text]
+
+    text = text.expandtabs()
+    text = text.translate(WS_TRANS)
+    chunks = re.split(r'( +|-+)', text)
+    chunks = [ch for ch in chunks if ch]  # ' - ' results in empty strings
+    lines = []
+
+    while chunks:
+        cur_line = []  # list of chunks (to-be-joined)
+        cur_len = 0  # length of current line
+
+        while chunks:
+            ell = len(chunks[0])
+            if cur_len + ell <= width:  # can squeeze (at least) this chunk in
+                cur_line.append(chunks[0])
+                del chunks[0]
+                cur_len = cur_len + ell
+            else:  # this line is full
+                # drop last chunk if all space
+                if cur_line and cur_line[-1][0] == ' ':
+                    del cur_line[-1]
+                break
+
+        if chunks:  # any chunks left to process?
+            # if the current line is still empty, then we had a single
+            # chunk that's too big too fit on a line -- so we break
+            # down and break it up at the line width
+            if cur_len == 0:
+                cur_line.append(chunks[0][0:width])
+                chunks[0] = chunks[0][width:]
+
+            # all-whitespace chunks at the end of a line can be discarded
+            # (and we know from the re.split above that if a chunk has
+            # *any* whitespace, it is *all* whitespace)
+            if chunks[0][0] == ' ':
+                del chunks[0]
+
+        # and store this line in the list-of-all-lines -- as a single
+        # string, of course!
+        lines.append(''.join(cur_line))
+
+    return lines
+
+
+def translate_longopt(opt):
+    """Convert a long option name to a valid Python identifier by
+    changing "-" to "_".
+    """
+    return opt.translate(longopt_xlate)
+
+
+class OptionDummy:
+    """Dummy class just used as a place to hold command-line option
+    values as instance attributes."""
+
+    def __init__(self, options: Sequence[Any] = []):
+        """Create a new OptionDummy instance.  The attributes listed in
+        'options' will be initialized to None."""
+        for opt in options:
+            setattr(self, opt, None)
+
+
+if __name__ == "__main__":
+    text = """\
+Tra-la-la, supercalifragilisticexpialidocious.
+How *do* you spell that odd word, anyways?
+(Someone ask Mary -- she'll know [or she'll
+say, "How should I know?"].)"""
+
+    for w in (10, 20, 30, 40):
+        print("width: %d" % w)
+        print("\n".join(wrap_text(text, w)))
+        print()
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/file_util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/file_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..b19a5dcfa40a8da6a072f543acee5234c4c0c90f
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/file_util.py
@@ -0,0 +1,235 @@
+"""distutils.file_util
+
+Utility functions for operating on single files.
+"""
+
+import os
+
+from ._log import log
+from .errors import DistutilsFileError
+
+# for generating verbose output in 'copy_file()'
+_copy_action = {None: 'copying', 'hard': 'hard linking', 'sym': 'symbolically linking'}
+
+
+def _copy_file_contents(src, dst, buffer_size=16 * 1024):  # noqa: C901
+    """Copy the file 'src' to 'dst'; both must be filenames.  Any error
+    opening either file, reading from 'src', or writing to 'dst', raises
+    DistutilsFileError.  Data is read/written in chunks of 'buffer_size'
+    bytes (default 16k).  No attempt is made to handle anything apart from
+    regular files.
+    """
+    # Stolen from shutil module in the standard library, but with
+    # custom error-handling added.
+    fsrc = None
+    fdst = None
+    try:
+        try:
+            fsrc = open(src, 'rb')
+        except OSError as e:
+            raise DistutilsFileError(f"could not open '{src}': {e.strerror}")
+
+        if os.path.exists(dst):
+            try:
+                os.unlink(dst)
+            except OSError as e:
+                raise DistutilsFileError(f"could not delete '{dst}': {e.strerror}")
+
+        try:
+            fdst = open(dst, 'wb')
+        except OSError as e:
+            raise DistutilsFileError(f"could not create '{dst}': {e.strerror}")
+
+        while True:
+            try:
+                buf = fsrc.read(buffer_size)
+            except OSError as e:
+                raise DistutilsFileError(f"could not read from '{src}': {e.strerror}")
+
+            if not buf:
+                break
+
+            try:
+                fdst.write(buf)
+            except OSError as e:
+                raise DistutilsFileError(f"could not write to '{dst}': {e.strerror}")
+    finally:
+        if fdst:
+            fdst.close()
+        if fsrc:
+            fsrc.close()
+
+
+def copy_file(  # noqa: C901
+    src,
+    dst,
+    preserve_mode=True,
+    preserve_times=True,
+    update=False,
+    link=None,
+    verbose=True,
+    dry_run=False,
+):
+    """Copy a file 'src' to 'dst'.  If 'dst' is a directory, then 'src' is
+    copied there with the same name; otherwise, it must be a filename.  (If
+    the file exists, it will be ruthlessly clobbered.)  If 'preserve_mode'
+    is true (the default), the file's mode (type and permission bits, or
+    whatever is analogous on the current platform) is copied.  If
+    'preserve_times' is true (the default), the last-modified and
+    last-access times are copied as well.  If 'update' is true, 'src' will
+    only be copied if 'dst' does not exist, or if 'dst' does exist but is
+    older than 'src'.
+
+    'link' allows you to make hard links (os.link) or symbolic links
+    (os.symlink) instead of copying: set it to "hard" or "sym"; if it is
+    None (the default), files are copied.  Don't set 'link' on systems that
+    don't support it: 'copy_file()' doesn't check if hard or symbolic
+    linking is available. If hardlink fails, falls back to
+    _copy_file_contents().
+
+    Under Mac OS, uses the native file copy function in macostools; on
+    other systems, uses '_copy_file_contents()' to copy file contents.
+
+    Return a tuple (dest_name, copied): 'dest_name' is the actual name of
+    the output file, and 'copied' is true if the file was copied (or would
+    have been copied, if 'dry_run' true).
+    """
+    # XXX if the destination file already exists, we clobber it if
+    # copying, but blow up if linking.  Hmmm.  And I don't know what
+    # macostools.copyfile() does.  Should definitely be consistent, and
+    # should probably blow up if destination exists and we would be
+    # changing it (ie. it's not already a hard/soft link to src OR
+    # (not update) and (src newer than dst).
+
+    from distutils._modified import newer
+    from stat import S_IMODE, ST_ATIME, ST_MODE, ST_MTIME
+
+    if not os.path.isfile(src):
+        raise DistutilsFileError(
+            f"can't copy '{src}': doesn't exist or not a regular file"
+        )
+
+    if os.path.isdir(dst):
+        dir = dst
+        dst = os.path.join(dst, os.path.basename(src))
+    else:
+        dir = os.path.dirname(dst)
+
+    if update and not newer(src, dst):
+        if verbose >= 1:
+            log.debug("not copying %s (output up-to-date)", src)
+        return (dst, 0)
+
+    try:
+        action = _copy_action[link]
+    except KeyError:
+        raise ValueError(f"invalid value '{link}' for 'link' argument")
+
+    if verbose >= 1:
+        if os.path.basename(dst) == os.path.basename(src):
+            log.info("%s %s -> %s", action, src, dir)
+        else:
+            log.info("%s %s -> %s", action, src, dst)
+
+    if dry_run:
+        return (dst, 1)
+
+    # If linking (hard or symbolic), use the appropriate system call
+    # (Unix only, of course, but that's the caller's responsibility)
+    elif link == 'hard':
+        if not (os.path.exists(dst) and os.path.samefile(src, dst)):
+            try:
+                os.link(src, dst)
+                return (dst, 1)
+            except OSError:
+                # If hard linking fails, fall back on copying file
+                # (some special filesystems don't support hard linking
+                #  even under Unix, see issue #8876).
+                pass
+    elif link == 'sym':
+        if not (os.path.exists(dst) and os.path.samefile(src, dst)):
+            os.symlink(src, dst)
+            return (dst, 1)
+
+    # Otherwise (non-Mac, not linking), copy the file contents and
+    # (optionally) copy the times and mode.
+    _copy_file_contents(src, dst)
+    if preserve_mode or preserve_times:
+        st = os.stat(src)
+
+        # According to David Ascher , utime() should be done
+        # before chmod() (at least under NT).
+        if preserve_times:
+            os.utime(dst, (st[ST_ATIME], st[ST_MTIME]))
+        if preserve_mode:
+            os.chmod(dst, S_IMODE(st[ST_MODE]))
+
+    return (dst, 1)
+
+
+# XXX I suspect this is Unix-specific -- need porting help!
+def move_file(src, dst, verbose=True, dry_run=False):  # noqa: C901
+    """Move a file 'src' to 'dst'.  If 'dst' is a directory, the file will
+    be moved into it with the same name; otherwise, 'src' is just renamed
+    to 'dst'.  Return the new full name of the file.
+
+    Handles cross-device moves on Unix using 'copy_file()'.  What about
+    other systems???
+    """
+    import errno
+    from os.path import basename, dirname, exists, isdir, isfile
+
+    if verbose >= 1:
+        log.info("moving %s -> %s", src, dst)
+
+    if dry_run:
+        return dst
+
+    if not isfile(src):
+        raise DistutilsFileError(f"can't move '{src}': not a regular file")
+
+    if isdir(dst):
+        dst = os.path.join(dst, basename(src))
+    elif exists(dst):
+        raise DistutilsFileError(
+            f"can't move '{src}': destination '{dst}' already exists"
+        )
+
+    if not isdir(dirname(dst)):
+        raise DistutilsFileError(
+            f"can't move '{src}': destination '{dst}' not a valid path"
+        )
+
+    copy_it = False
+    try:
+        os.rename(src, dst)
+    except OSError as e:
+        (num, msg) = e.args
+        if num == errno.EXDEV:
+            copy_it = True
+        else:
+            raise DistutilsFileError(f"couldn't move '{src}' to '{dst}': {msg}")
+
+    if copy_it:
+        copy_file(src, dst, verbose=verbose)
+        try:
+            os.unlink(src)
+        except OSError as e:
+            (num, msg) = e.args
+            try:
+                os.unlink(dst)
+            except OSError:
+                pass
+            raise DistutilsFileError(
+                f"couldn't move '{src}' to '{dst}' by copy/delete: "
+                f"delete '{src}' failed: {msg}"
+            )
+    return dst
+
+
+def write_file(filename, contents):
+    """Create a file with the specified name and write 'contents' (a
+    sequence of strings without line terminators) to it.
+    """
+    with open(filename, 'w', encoding='utf-8') as f:
+        f.writelines(line + '\n' for line in contents)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/filelist.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/filelist.py
new file mode 100644
index 0000000000000000000000000000000000000000..44ae9e67ef686242745a02cdf054a90cb6921129
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/filelist.py
@@ -0,0 +1,369 @@
+"""distutils.filelist
+
+Provides the FileList class, used for poking about the filesystem
+and building lists of files.
+"""
+
+import fnmatch
+import functools
+import os
+import re
+
+from ._log import log
+from .errors import DistutilsInternalError, DistutilsTemplateError
+from .util import convert_path
+
+
+class FileList:
+    """A list of files built by on exploring the filesystem and filtered by
+    applying various patterns to what we find there.
+
+    Instance attributes:
+      dir
+        directory from which files will be taken -- only used if
+        'allfiles' not supplied to constructor
+      files
+        list of filenames currently being built/filtered/manipulated
+      allfiles
+        complete list of files under consideration (ie. without any
+        filtering applied)
+    """
+
+    def __init__(self, warn=None, debug_print=None):
+        # ignore argument to FileList, but keep them for backwards
+        # compatibility
+        self.allfiles = None
+        self.files = []
+
+    def set_allfiles(self, allfiles):
+        self.allfiles = allfiles
+
+    def findall(self, dir=os.curdir):
+        self.allfiles = findall(dir)
+
+    def debug_print(self, msg):
+        """Print 'msg' to stdout if the global DEBUG (taken from the
+        DISTUTILS_DEBUG environment variable) flag is true.
+        """
+        from distutils.debug import DEBUG
+
+        if DEBUG:
+            print(msg)
+
+    # Collection methods
+
+    def append(self, item):
+        self.files.append(item)
+
+    def extend(self, items):
+        self.files.extend(items)
+
+    def sort(self):
+        # Not a strict lexical sort!
+        sortable_files = sorted(map(os.path.split, self.files))
+        self.files = []
+        for sort_tuple in sortable_files:
+            self.files.append(os.path.join(*sort_tuple))
+
+    # Other miscellaneous utility methods
+
+    def remove_duplicates(self):
+        # Assumes list has been sorted!
+        for i in range(len(self.files) - 1, 0, -1):
+            if self.files[i] == self.files[i - 1]:
+                del self.files[i]
+
+    # "File template" methods
+
+    def _parse_template_line(self, line):
+        words = line.split()
+        action = words[0]
+
+        patterns = dir = dir_pattern = None
+
+        if action in ('include', 'exclude', 'global-include', 'global-exclude'):
+            if len(words) < 2:
+                raise DistutilsTemplateError(
+                    f"'{action}' expects   ..."
+                )
+            patterns = [convert_path(w) for w in words[1:]]
+        elif action in ('recursive-include', 'recursive-exclude'):
+            if len(words) < 3:
+                raise DistutilsTemplateError(
+                    f"'{action}' expects    ..."
+                )
+            dir = convert_path(words[1])
+            patterns = [convert_path(w) for w in words[2:]]
+        elif action in ('graft', 'prune'):
+            if len(words) != 2:
+                raise DistutilsTemplateError(
+                    f"'{action}' expects a single "
+                )
+            dir_pattern = convert_path(words[1])
+        else:
+            raise DistutilsTemplateError(f"unknown action '{action}'")
+
+        return (action, patterns, dir, dir_pattern)
+
+    def process_template_line(self, line):  # noqa: C901
+        # Parse the line: split it up, make sure the right number of words
+        # is there, and return the relevant words.  'action' is always
+        # defined: it's the first word of the line.  Which of the other
+        # three are defined depends on the action; it'll be either
+        # patterns, (dir and patterns), or (dir_pattern).
+        (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
+
+        # OK, now we know that the action is valid and we have the
+        # right number of words on the line for that action -- so we
+        # can proceed with minimal error-checking.
+        if action == 'include':
+            self.debug_print("include " + ' '.join(patterns))
+            for pattern in patterns:
+                if not self.include_pattern(pattern, anchor=True):
+                    log.warning("warning: no files found matching '%s'", pattern)
+
+        elif action == 'exclude':
+            self.debug_print("exclude " + ' '.join(patterns))
+            for pattern in patterns:
+                if not self.exclude_pattern(pattern, anchor=True):
+                    log.warning(
+                        (
+                            "warning: no previously-included files "
+                            "found matching '%s'"
+                        ),
+                        pattern,
+                    )
+
+        elif action == 'global-include':
+            self.debug_print("global-include " + ' '.join(patterns))
+            for pattern in patterns:
+                if not self.include_pattern(pattern, anchor=False):
+                    log.warning(
+                        (
+                            "warning: no files found matching '%s' "
+                            "anywhere in distribution"
+                        ),
+                        pattern,
+                    )
+
+        elif action == 'global-exclude':
+            self.debug_print("global-exclude " + ' '.join(patterns))
+            for pattern in patterns:
+                if not self.exclude_pattern(pattern, anchor=False):
+                    log.warning(
+                        (
+                            "warning: no previously-included files matching "
+                            "'%s' found anywhere in distribution"
+                        ),
+                        pattern,
+                    )
+
+        elif action == 'recursive-include':
+            self.debug_print("recursive-include {} {}".format(dir, ' '.join(patterns)))
+            for pattern in patterns:
+                if not self.include_pattern(pattern, prefix=dir):
+                    msg = "warning: no files found matching '%s' under directory '%s'"
+                    log.warning(msg, pattern, dir)
+
+        elif action == 'recursive-exclude':
+            self.debug_print("recursive-exclude {} {}".format(dir, ' '.join(patterns)))
+            for pattern in patterns:
+                if not self.exclude_pattern(pattern, prefix=dir):
+                    log.warning(
+                        (
+                            "warning: no previously-included files matching "
+                            "'%s' found under directory '%s'"
+                        ),
+                        pattern,
+                        dir,
+                    )
+
+        elif action == 'graft':
+            self.debug_print("graft " + dir_pattern)
+            if not self.include_pattern(None, prefix=dir_pattern):
+                log.warning("warning: no directories found matching '%s'", dir_pattern)
+
+        elif action == 'prune':
+            self.debug_print("prune " + dir_pattern)
+            if not self.exclude_pattern(None, prefix=dir_pattern):
+                log.warning(
+                    ("no previously-included directories found matching '%s'"),
+                    dir_pattern,
+                )
+        else:
+            raise DistutilsInternalError(
+                f"this cannot happen: invalid action '{action}'"
+            )
+
+    # Filtering/selection methods
+
+    def include_pattern(self, pattern, anchor=True, prefix=None, is_regex=False):
+        """Select strings (presumably filenames) from 'self.files' that
+        match 'pattern', a Unix-style wildcard (glob) pattern.  Patterns
+        are not quite the same as implemented by the 'fnmatch' module: '*'
+        and '?'  match non-special characters, where "special" is platform-
+        dependent: slash on Unix; colon, slash, and backslash on
+        DOS/Windows; and colon on Mac OS.
+
+        If 'anchor' is true (the default), then the pattern match is more
+        stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
+        'anchor' is false, both of these will match.
+
+        If 'prefix' is supplied, then only filenames starting with 'prefix'
+        (itself a pattern) and ending with 'pattern', with anything in between
+        them, will match.  'anchor' is ignored in this case.
+
+        If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
+        'pattern' is assumed to be either a string containing a regex or a
+        regex object -- no translation is done, the regex is just compiled
+        and used as-is.
+
+        Selected strings will be added to self.files.
+
+        Return True if files are found, False otherwise.
+        """
+        # XXX docstring lying about what the special chars are?
+        files_found = False
+        pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
+        self.debug_print(f"include_pattern: applying regex r'{pattern_re.pattern}'")
+
+        # delayed loading of allfiles list
+        if self.allfiles is None:
+            self.findall()
+
+        for name in self.allfiles:
+            if pattern_re.search(name):
+                self.debug_print(" adding " + name)
+                self.files.append(name)
+                files_found = True
+        return files_found
+
+    def exclude_pattern(self, pattern, anchor=True, prefix=None, is_regex=False):
+        """Remove strings (presumably filenames) from 'files' that match
+        'pattern'.  Other parameters are the same as for
+        'include_pattern()', above.
+        The list 'self.files' is modified in place.
+        Return True if files are found, False otherwise.
+        """
+        files_found = False
+        pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
+        self.debug_print(f"exclude_pattern: applying regex r'{pattern_re.pattern}'")
+        for i in range(len(self.files) - 1, -1, -1):
+            if pattern_re.search(self.files[i]):
+                self.debug_print(" removing " + self.files[i])
+                del self.files[i]
+                files_found = True
+        return files_found
+
+
+# Utility functions
+
+
+def _find_all_simple(path):
+    """
+    Find all files under 'path'
+    """
+    all_unique = _UniqueDirs.filter(os.walk(path, followlinks=True))
+    results = (
+        os.path.join(base, file) for base, dirs, files in all_unique for file in files
+    )
+    return filter(os.path.isfile, results)
+
+
+class _UniqueDirs(set):
+    """
+    Exclude previously-seen dirs from walk results,
+    avoiding infinite recursion.
+    Ref https://bugs.python.org/issue44497.
+    """
+
+    def __call__(self, walk_item):
+        """
+        Given an item from an os.walk result, determine
+        if the item represents a unique dir for this instance
+        and if not, prevent further traversal.
+        """
+        base, dirs, files = walk_item
+        stat = os.stat(base)
+        candidate = stat.st_dev, stat.st_ino
+        found = candidate in self
+        if found:
+            del dirs[:]
+        self.add(candidate)
+        return not found
+
+    @classmethod
+    def filter(cls, items):
+        return filter(cls(), items)
+
+
+def findall(dir=os.curdir):
+    """
+    Find all files under 'dir' and return the list of full filenames.
+    Unless dir is '.', return full filenames with dir prepended.
+    """
+    files = _find_all_simple(dir)
+    if dir == os.curdir:
+        make_rel = functools.partial(os.path.relpath, start=dir)
+        files = map(make_rel, files)
+    return list(files)
+
+
+def glob_to_re(pattern):
+    """Translate a shell-like glob pattern to a regular expression; return
+    a string containing the regex.  Differs from 'fnmatch.translate()' in
+    that '*' does not match "special characters" (which are
+    platform-specific).
+    """
+    pattern_re = fnmatch.translate(pattern)
+
+    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
+    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
+    # and by extension they shouldn't match such "special characters" under
+    # any OS.  So change all non-escaped dots in the RE to match any
+    # character except the special characters (currently: just os.sep).
+    sep = os.sep
+    if os.sep == '\\':
+        # we're using a regex to manipulate a regex, so we need
+        # to escape the backslash twice
+        sep = r'\\\\'
+    escaped = rf'\1[^{sep}]'
+    pattern_re = re.sub(r'((?= 2:
+        set_threshold(logging.DEBUG)
+
+
+class Log(logging.Logger):
+    """distutils.log.Log is deprecated, please use an alternative from `logging`."""
+
+    def __init__(self, threshold=WARN):
+        warnings.warn(Log.__doc__)  # avoid DeprecationWarning to ensure warn is shown
+        super().__init__(__name__, level=threshold)
+
+    @property
+    def threshold(self):
+        return self.level
+
+    @threshold.setter
+    def threshold(self, level):
+        self.setLevel(level)
+
+    warn = logging.Logger.warning
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/msvc9compiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/msvc9compiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..f860a8d383068c283363733925f2f88ba7ece200
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/msvc9compiler.py
@@ -0,0 +1,824 @@
+"""distutils.msvc9compiler
+
+Contains MSVCCompiler, an implementation of the abstract CCompiler class
+for the Microsoft Visual Studio 2008.
+
+The module is compatible with VS 2005 and VS 2008. You can find legacy support
+for older versions of VS in distutils.msvccompiler.
+"""
+
+# Written by Perry Stoll
+# hacked by Robin Becker and Thomas Heller to do a better job of
+#   finding DevStudio (through the registry)
+# ported to VS2005 and VS 2008 by Christian Heimes
+
+import os
+import re
+import subprocess
+import sys
+import warnings
+import winreg
+
+from ._log import log
+from .ccompiler import CCompiler, gen_lib_options
+from .errors import (
+    CompileError,
+    DistutilsExecError,
+    DistutilsPlatformError,
+    LibError,
+    LinkError,
+)
+from .util import get_platform
+
+warnings.warn(
+    "msvc9compiler is deprecated and slated to be removed "
+    "in the future. Please discontinue use or file an issue "
+    "with pypa/distutils describing your use case.",
+    DeprecationWarning,
+)
+
+RegOpenKeyEx = winreg.OpenKeyEx
+RegEnumKey = winreg.EnumKey
+RegEnumValue = winreg.EnumValue
+RegError = winreg.error
+
+HKEYS = (
+    winreg.HKEY_USERS,
+    winreg.HKEY_CURRENT_USER,
+    winreg.HKEY_LOCAL_MACHINE,
+    winreg.HKEY_CLASSES_ROOT,
+)
+
+NATIVE_WIN64 = sys.platform == 'win32' and sys.maxsize > 2**32
+if NATIVE_WIN64:
+    # Visual C++ is a 32-bit application, so we need to look in
+    # the corresponding registry branch, if we're running a
+    # 64-bit Python on Win64
+    VS_BASE = r"Software\Wow6432Node\Microsoft\VisualStudio\%0.1f"
+    WINSDK_BASE = r"Software\Wow6432Node\Microsoft\Microsoft SDKs\Windows"
+    NET_BASE = r"Software\Wow6432Node\Microsoft\.NETFramework"
+else:
+    VS_BASE = r"Software\Microsoft\VisualStudio\%0.1f"
+    WINSDK_BASE = r"Software\Microsoft\Microsoft SDKs\Windows"
+    NET_BASE = r"Software\Microsoft\.NETFramework"
+
+# A map keyed by get_platform() return values to values accepted by
+# 'vcvarsall.bat'.  Note a cross-compile may combine these (eg, 'x86_amd64' is
+# the param to cross-compile on x86 targeting amd64.)
+PLAT_TO_VCVARS = {
+    'win32': 'x86',
+    'win-amd64': 'amd64',
+}
+
+
+class Reg:
+    """Helper class to read values from the registry"""
+
+    def get_value(cls, path, key):
+        for base in HKEYS:
+            d = cls.read_values(base, path)
+            if d and key in d:
+                return d[key]
+        raise KeyError(key)
+
+    get_value = classmethod(get_value)
+
+    def read_keys(cls, base, key):
+        """Return list of registry keys."""
+        try:
+            handle = RegOpenKeyEx(base, key)
+        except RegError:
+            return None
+        L = []
+        i = 0
+        while True:
+            try:
+                k = RegEnumKey(handle, i)
+            except RegError:
+                break
+            L.append(k)
+            i += 1
+        return L
+
+    read_keys = classmethod(read_keys)
+
+    def read_values(cls, base, key):
+        """Return dict of registry keys and values.
+
+        All names are converted to lowercase.
+        """
+        try:
+            handle = RegOpenKeyEx(base, key)
+        except RegError:
+            return None
+        d = {}
+        i = 0
+        while True:
+            try:
+                name, value, type = RegEnumValue(handle, i)
+            except RegError:
+                break
+            name = name.lower()
+            d[cls.convert_mbcs(name)] = cls.convert_mbcs(value)
+            i += 1
+        return d
+
+    read_values = classmethod(read_values)
+
+    def convert_mbcs(s):
+        dec = getattr(s, "decode", None)
+        if dec is not None:
+            try:
+                s = dec("mbcs")
+            except UnicodeError:
+                pass
+        return s
+
+    convert_mbcs = staticmethod(convert_mbcs)
+
+
+class MacroExpander:
+    def __init__(self, version):
+        self.macros = {}
+        self.vsbase = VS_BASE % version
+        self.load_macros(version)
+
+    def set_macro(self, macro, path, key):
+        self.macros[f"$({macro})"] = Reg.get_value(path, key)
+
+    def load_macros(self, version):
+        self.set_macro("VCInstallDir", self.vsbase + r"\Setup\VC", "productdir")
+        self.set_macro("VSInstallDir", self.vsbase + r"\Setup\VS", "productdir")
+        self.set_macro("FrameworkDir", NET_BASE, "installroot")
+        try:
+            if version >= 8.0:
+                self.set_macro("FrameworkSDKDir", NET_BASE, "sdkinstallrootv2.0")
+            else:
+                raise KeyError("sdkinstallrootv2.0")
+        except KeyError:
+            raise DistutilsPlatformError(
+                """Python was built with Visual Studio 2008;
+extensions must be built with a compiler than can generate compatible binaries.
+Visual Studio 2008 was not found on this system. If you have Cygwin installed,
+you can try compiling with MingW32, by passing "-c mingw32" to setup.py."""
+            )
+
+        if version >= 9.0:
+            self.set_macro("FrameworkVersion", self.vsbase, "clr version")
+            self.set_macro("WindowsSdkDir", WINSDK_BASE, "currentinstallfolder")
+        else:
+            p = r"Software\Microsoft\NET Framework Setup\Product"
+            for base in HKEYS:
+                try:
+                    h = RegOpenKeyEx(base, p)
+                except RegError:
+                    continue
+                key = RegEnumKey(h, 0)
+                d = Reg.get_value(base, rf"{p}\{key}")
+                self.macros["$(FrameworkVersion)"] = d["version"]
+
+    def sub(self, s):
+        for k, v in self.macros.items():
+            s = s.replace(k, v)
+        return s
+
+
+def get_build_version():
+    """Return the version of MSVC that was used to build Python.
+
+    For Python 2.3 and up, the version number is included in
+    sys.version.  For earlier versions, assume the compiler is MSVC 6.
+    """
+    prefix = "MSC v."
+    i = sys.version.find(prefix)
+    if i == -1:
+        return 6
+    i = i + len(prefix)
+    s, rest = sys.version[i:].split(" ", 1)
+    majorVersion = int(s[:-2]) - 6
+    if majorVersion >= 13:
+        # v13 was skipped and should be v14
+        majorVersion += 1
+    minorVersion = int(s[2:3]) / 10.0
+    # I don't think paths are affected by minor version in version 6
+    if majorVersion == 6:
+        minorVersion = 0
+    if majorVersion >= 6:
+        return majorVersion + minorVersion
+    # else we don't know what version of the compiler this is
+    return None
+
+
+def normalize_and_reduce_paths(paths):
+    """Return a list of normalized paths with duplicates removed.
+
+    The current order of paths is maintained.
+    """
+    # Paths are normalized so things like:  /a and /a/ aren't both preserved.
+    reduced_paths = []
+    for p in paths:
+        np = os.path.normpath(p)
+        # XXX(nnorwitz): O(n**2), if reduced_paths gets long perhaps use a set.
+        if np not in reduced_paths:
+            reduced_paths.append(np)
+    return reduced_paths
+
+
+def removeDuplicates(variable):
+    """Remove duplicate values of an environment variable."""
+    oldList = variable.split(os.pathsep)
+    newList = []
+    for i in oldList:
+        if i not in newList:
+            newList.append(i)
+    newVariable = os.pathsep.join(newList)
+    return newVariable
+
+
+def find_vcvarsall(version):
+    """Find the vcvarsall.bat file
+
+    At first it tries to find the productdir of VS 2008 in the registry. If
+    that fails it falls back to the VS90COMNTOOLS env var.
+    """
+    vsbase = VS_BASE % version
+    try:
+        productdir = Reg.get_value(rf"{vsbase}\Setup\VC", "productdir")
+    except KeyError:
+        log.debug("Unable to find productdir in registry")
+        productdir = None
+
+    if not productdir or not os.path.isdir(productdir):
+        toolskey = f"VS{version:0.0f}0COMNTOOLS"
+        toolsdir = os.environ.get(toolskey, None)
+
+        if toolsdir and os.path.isdir(toolsdir):
+            productdir = os.path.join(toolsdir, os.pardir, os.pardir, "VC")
+            productdir = os.path.abspath(productdir)
+            if not os.path.isdir(productdir):
+                log.debug(f"{productdir} is not a valid directory")
+                return None
+        else:
+            log.debug(f"Env var {toolskey} is not set or invalid")
+    if not productdir:
+        log.debug("No productdir found")
+        return None
+    vcvarsall = os.path.join(productdir, "vcvarsall.bat")
+    if os.path.isfile(vcvarsall):
+        return vcvarsall
+    log.debug("Unable to find vcvarsall.bat")
+    return None
+
+
+def query_vcvarsall(version, arch="x86"):
+    """Launch vcvarsall.bat and read the settings from its environment"""
+    vcvarsall = find_vcvarsall(version)
+    interesting = {"include", "lib", "libpath", "path"}
+    result = {}
+
+    if vcvarsall is None:
+        raise DistutilsPlatformError("Unable to find vcvarsall.bat")
+    log.debug("Calling 'vcvarsall.bat %s' (version=%s)", arch, version)
+    popen = subprocess.Popen(
+        f'"{vcvarsall}" {arch} & set',
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    try:
+        stdout, stderr = popen.communicate()
+        if popen.wait() != 0:
+            raise DistutilsPlatformError(stderr.decode("mbcs"))
+
+        stdout = stdout.decode("mbcs")
+        for line in stdout.split("\n"):
+            line = Reg.convert_mbcs(line)
+            if '=' not in line:
+                continue
+            line = line.strip()
+            key, value = line.split('=', 1)
+            key = key.lower()
+            if key in interesting:
+                if value.endswith(os.pathsep):
+                    value = value[:-1]
+                result[key] = removeDuplicates(value)
+
+    finally:
+        popen.stdout.close()
+        popen.stderr.close()
+
+    if len(result) != len(interesting):
+        raise ValueError(str(list(result.keys())))
+
+    return result
+
+
+# More globals
+VERSION = get_build_version()
+# MACROS = MacroExpander(VERSION)
+
+
+class MSVCCompiler(CCompiler):
+    """Concrete class that implements an interface to Microsoft Visual C++,
+    as defined by the CCompiler abstract class."""
+
+    compiler_type = 'msvc'
+
+    # Just set this so CCompiler's constructor doesn't barf.  We currently
+    # don't use the 'set_executables()' bureaucracy provided by CCompiler,
+    # as it really isn't necessary for this sort of single-compiler class.
+    # Would be nice to have a consistent interface with UnixCCompiler,
+    # though, so it's worth thinking about.
+    executables = {}
+
+    # Private class data (need to distinguish C from C++ source for compiler)
+    _c_extensions = ['.c']
+    _cpp_extensions = ['.cc', '.cpp', '.cxx']
+    _rc_extensions = ['.rc']
+    _mc_extensions = ['.mc']
+
+    # Needed for the filename generation methods provided by the
+    # base class, CCompiler.
+    src_extensions = _c_extensions + _cpp_extensions + _rc_extensions + _mc_extensions
+    res_extension = '.res'
+    obj_extension = '.obj'
+    static_lib_extension = '.lib'
+    shared_lib_extension = '.dll'
+    static_lib_format = shared_lib_format = '%s%s'
+    exe_extension = '.exe'
+
+    def __init__(self, verbose=False, dry_run=False, force=False):
+        super().__init__(verbose, dry_run, force)
+        self.__version = VERSION
+        self.__root = r"Software\Microsoft\VisualStudio"
+        # self.__macros = MACROS
+        self.__paths = []
+        # target platform (.plat_name is consistent with 'bdist')
+        self.plat_name = None
+        self.__arch = None  # deprecated name
+        self.initialized = False
+
+    def initialize(self, plat_name=None):  # noqa: C901
+        # multi-init means we would need to check platform same each time...
+        assert not self.initialized, "don't init multiple times"
+        if self.__version < 8.0:
+            raise DistutilsPlatformError(
+                f"VC {self.__version:0.1f} is not supported by this module"
+            )
+        if plat_name is None:
+            plat_name = get_platform()
+        # sanity check for platforms to prevent obscure errors later.
+        ok_plats = 'win32', 'win-amd64'
+        if plat_name not in ok_plats:
+            raise DistutilsPlatformError(f"--plat-name must be one of {ok_plats}")
+
+        if (
+            "DISTUTILS_USE_SDK" in os.environ
+            and "MSSdk" in os.environ
+            and self.find_exe("cl.exe")
+        ):
+            # Assume that the SDK set up everything alright; don't try to be
+            # smarter
+            self.cc = "cl.exe"
+            self.linker = "link.exe"
+            self.lib = "lib.exe"
+            self.rc = "rc.exe"
+            self.mc = "mc.exe"
+        else:
+            # On x86, 'vcvars32.bat amd64' creates an env that doesn't work;
+            # to cross compile, you use 'x86_amd64'.
+            # On AMD64, 'vcvars32.bat amd64' is a native build env; to cross
+            # compile use 'x86' (ie, it runs the x86 compiler directly)
+            if plat_name in (get_platform(), 'win32'):
+                # native build or cross-compile to win32
+                plat_spec = PLAT_TO_VCVARS[plat_name]
+            else:
+                # cross compile from win32 -> some 64bit
+                plat_spec = (
+                    PLAT_TO_VCVARS[get_platform()] + '_' + PLAT_TO_VCVARS[plat_name]
+                )
+
+            vc_env = query_vcvarsall(VERSION, plat_spec)
+
+            self.__paths = vc_env['path'].split(os.pathsep)
+            os.environ['lib'] = vc_env['lib']
+            os.environ['include'] = vc_env['include']
+
+            if len(self.__paths) == 0:
+                raise DistutilsPlatformError(
+                    f"Python was built with {self.__product}, "
+                    "and extensions need to be built with the same "
+                    "version of the compiler, but it isn't installed."
+                )
+
+            self.cc = self.find_exe("cl.exe")
+            self.linker = self.find_exe("link.exe")
+            self.lib = self.find_exe("lib.exe")
+            self.rc = self.find_exe("rc.exe")  # resource compiler
+            self.mc = self.find_exe("mc.exe")  # message compiler
+            # self.set_path_env_var('lib')
+            # self.set_path_env_var('include')
+
+        # extend the MSVC path with the current path
+        try:
+            for p in os.environ['path'].split(';'):
+                self.__paths.append(p)
+        except KeyError:
+            pass
+        self.__paths = normalize_and_reduce_paths(self.__paths)
+        os.environ['path'] = ";".join(self.__paths)
+
+        self.preprocess_options = None
+        if self.__arch == "x86":
+            self.compile_options = ['/nologo', '/O2', '/MD', '/W3', '/DNDEBUG']
+            self.compile_options_debug = [
+                '/nologo',
+                '/Od',
+                '/MDd',
+                '/W3',
+                '/Z7',
+                '/D_DEBUG',
+            ]
+        else:
+            # Win64
+            self.compile_options = ['/nologo', '/O2', '/MD', '/W3', '/GS-', '/DNDEBUG']
+            self.compile_options_debug = [
+                '/nologo',
+                '/Od',
+                '/MDd',
+                '/W3',
+                '/GS-',
+                '/Z7',
+                '/D_DEBUG',
+            ]
+
+        self.ldflags_shared = ['/DLL', '/nologo', '/INCREMENTAL:NO']
+        if self.__version >= 7:
+            self.ldflags_shared_debug = ['/DLL', '/nologo', '/INCREMENTAL:no', '/DEBUG']
+        self.ldflags_static = ['/nologo']
+
+        self.initialized = True
+
+    # -- Worker methods ------------------------------------------------
+
+    def object_filenames(self, source_filenames, strip_dir=False, output_dir=''):
+        # Copied from ccompiler.py, extended to return .res as 'object'-file
+        # for .rc input file
+        if output_dir is None:
+            output_dir = ''
+        obj_names = []
+        for src_name in source_filenames:
+            (base, ext) = os.path.splitext(src_name)
+            base = os.path.splitdrive(base)[1]  # Chop off the drive
+            base = base[os.path.isabs(base) :]  # If abs, chop off leading /
+            if ext not in self.src_extensions:
+                # Better to raise an exception instead of silently continuing
+                # and later complain about sources and targets having
+                # different lengths
+                raise CompileError(f"Don't know how to compile {src_name}")
+            if strip_dir:
+                base = os.path.basename(base)
+            if ext in self._rc_extensions:
+                obj_names.append(os.path.join(output_dir, base + self.res_extension))
+            elif ext in self._mc_extensions:
+                obj_names.append(os.path.join(output_dir, base + self.res_extension))
+            else:
+                obj_names.append(os.path.join(output_dir, base + self.obj_extension))
+        return obj_names
+
+    def compile(  # noqa: C901
+        self,
+        sources,
+        output_dir=None,
+        macros=None,
+        include_dirs=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        depends=None,
+    ):
+        if not self.initialized:
+            self.initialize()
+        compile_info = self._setup_compile(
+            output_dir, macros, include_dirs, sources, depends, extra_postargs
+        )
+        macros, objects, extra_postargs, pp_opts, build = compile_info
+
+        compile_opts = extra_preargs or []
+        compile_opts.append('/c')
+        if debug:
+            compile_opts.extend(self.compile_options_debug)
+        else:
+            compile_opts.extend(self.compile_options)
+
+        for obj in objects:
+            try:
+                src, ext = build[obj]
+            except KeyError:
+                continue
+            if debug:
+                # pass the full pathname to MSVC in debug mode,
+                # this allows the debugger to find the source file
+                # without asking the user to browse for it
+                src = os.path.abspath(src)
+
+            if ext in self._c_extensions:
+                input_opt = "/Tc" + src
+            elif ext in self._cpp_extensions:
+                input_opt = "/Tp" + src
+            elif ext in self._rc_extensions:
+                # compile .RC to .RES file
+                input_opt = src
+                output_opt = "/fo" + obj
+                try:
+                    self.spawn([self.rc] + pp_opts + [output_opt] + [input_opt])
+                except DistutilsExecError as msg:
+                    raise CompileError(msg)
+                continue
+            elif ext in self._mc_extensions:
+                # Compile .MC to .RC file to .RES file.
+                #   * '-h dir' specifies the directory for the
+                #     generated include file
+                #   * '-r dir' specifies the target directory of the
+                #     generated RC file and the binary message resource
+                #     it includes
+                #
+                # For now (since there are no options to change this),
+                # we use the source-directory for the include file and
+                # the build directory for the RC file and message
+                # resources. This works at least for win32all.
+                h_dir = os.path.dirname(src)
+                rc_dir = os.path.dirname(obj)
+                try:
+                    # first compile .MC to .RC and .H file
+                    self.spawn([self.mc] + ['-h', h_dir, '-r', rc_dir] + [src])
+                    base, _ = os.path.splitext(os.path.basename(src))
+                    rc_file = os.path.join(rc_dir, base + '.rc')
+                    # then compile .RC to .RES file
+                    self.spawn([self.rc] + ["/fo" + obj] + [rc_file])
+
+                except DistutilsExecError as msg:
+                    raise CompileError(msg)
+                continue
+            else:
+                # how to handle this file?
+                raise CompileError(f"Don't know how to compile {src} to {obj}")
+
+            output_opt = "/Fo" + obj
+            try:
+                self.spawn(
+                    [self.cc]
+                    + compile_opts
+                    + pp_opts
+                    + [input_opt, output_opt]
+                    + extra_postargs
+                )
+            except DistutilsExecError as msg:
+                raise CompileError(msg)
+
+        return objects
+
+    def create_static_lib(
+        self, objects, output_libname, output_dir=None, debug=False, target_lang=None
+    ):
+        if not self.initialized:
+            self.initialize()
+        (objects, output_dir) = self._fix_object_args(objects, output_dir)
+        output_filename = self.library_filename(output_libname, output_dir=output_dir)
+
+        if self._need_link(objects, output_filename):
+            lib_args = objects + ['/OUT:' + output_filename]
+            if debug:
+                pass  # XXX what goes here?
+            try:
+                self.spawn([self.lib] + lib_args)
+            except DistutilsExecError as msg:
+                raise LibError(msg)
+        else:
+            log.debug("skipping %s (up-to-date)", output_filename)
+
+    def link(  # noqa: C901
+        self,
+        target_desc,
+        objects,
+        output_filename,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        if not self.initialized:
+            self.initialize()
+        (objects, output_dir) = self._fix_object_args(objects, output_dir)
+        fixed_args = self._fix_lib_args(libraries, library_dirs, runtime_library_dirs)
+        (libraries, library_dirs, runtime_library_dirs) = fixed_args
+
+        if runtime_library_dirs:
+            self.warn(
+                "I don't know what to do with 'runtime_library_dirs': "
+                + str(runtime_library_dirs)
+            )
+
+        lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries)
+        if output_dir is not None:
+            output_filename = os.path.join(output_dir, output_filename)
+
+        if self._need_link(objects, output_filename):
+            if target_desc == CCompiler.EXECUTABLE:
+                if debug:
+                    ldflags = self.ldflags_shared_debug[1:]
+                else:
+                    ldflags = self.ldflags_shared[1:]
+            else:
+                if debug:
+                    ldflags = self.ldflags_shared_debug
+                else:
+                    ldflags = self.ldflags_shared
+
+            export_opts = []
+            for sym in export_symbols or []:
+                export_opts.append("/EXPORT:" + sym)
+
+            ld_args = (
+                ldflags + lib_opts + export_opts + objects + ['/OUT:' + output_filename]
+            )
+
+            # The MSVC linker generates .lib and .exp files, which cannot be
+            # suppressed by any linker switches. The .lib files may even be
+            # needed! Make sure they are generated in the temporary build
+            # directory. Since they have different names for debug and release
+            # builds, they can go into the same directory.
+            build_temp = os.path.dirname(objects[0])
+            if export_symbols is not None:
+                (dll_name, dll_ext) = os.path.splitext(
+                    os.path.basename(output_filename)
+                )
+                implib_file = os.path.join(build_temp, self.library_filename(dll_name))
+                ld_args.append('/IMPLIB:' + implib_file)
+
+            self.manifest_setup_ldargs(output_filename, build_temp, ld_args)
+
+            if extra_preargs:
+                ld_args[:0] = extra_preargs
+            if extra_postargs:
+                ld_args.extend(extra_postargs)
+
+            self.mkpath(os.path.dirname(output_filename))
+            try:
+                self.spawn([self.linker] + ld_args)
+            except DistutilsExecError as msg:
+                raise LinkError(msg)
+
+            # embed the manifest
+            # XXX - this is somewhat fragile - if mt.exe fails, distutils
+            # will still consider the DLL up-to-date, but it will not have a
+            # manifest.  Maybe we should link to a temp file?  OTOH, that
+            # implies a build environment error that shouldn't go undetected.
+            mfinfo = self.manifest_get_embed_info(target_desc, ld_args)
+            if mfinfo is not None:
+                mffilename, mfid = mfinfo
+                out_arg = f'-outputresource:{output_filename};{mfid}'
+                try:
+                    self.spawn(['mt.exe', '-nologo', '-manifest', mffilename, out_arg])
+                except DistutilsExecError as msg:
+                    raise LinkError(msg)
+        else:
+            log.debug("skipping %s (up-to-date)", output_filename)
+
+    def manifest_setup_ldargs(self, output_filename, build_temp, ld_args):
+        # If we need a manifest at all, an embedded manifest is recommended.
+        # See MSDN article titled
+        # "Understanding manifest generation for C/C++ programs"
+        # (currently at https://learn.microsoft.com/en-us/cpp/build/understanding-manifest-generation-for-c-cpp-programs)
+        # Ask the linker to generate the manifest in the temp dir, so
+        # we can check it, and possibly embed it, later.
+        temp_manifest = os.path.join(
+            build_temp, os.path.basename(output_filename) + ".manifest"
+        )
+        ld_args.append('/MANIFESTFILE:' + temp_manifest)
+
+    def manifest_get_embed_info(self, target_desc, ld_args):
+        # If a manifest should be embedded, return a tuple of
+        # (manifest_filename, resource_id).  Returns None if no manifest
+        # should be embedded.  See https://bugs.python.org/issue7833 for why
+        # we want to avoid any manifest for extension modules if we can)
+        for arg in ld_args:
+            if arg.startswith("/MANIFESTFILE:"):
+                temp_manifest = arg.split(":", 1)[1]
+                break
+        else:
+            # no /MANIFESTFILE so nothing to do.
+            return None
+        if target_desc == CCompiler.EXECUTABLE:
+            # by default, executables always get the manifest with the
+            # CRT referenced.
+            mfid = 1
+        else:
+            # Extension modules try and avoid any manifest if possible.
+            mfid = 2
+            temp_manifest = self._remove_visual_c_ref(temp_manifest)
+        if temp_manifest is None:
+            return None
+        return temp_manifest, mfid
+
+    def _remove_visual_c_ref(self, manifest_file):
+        try:
+            # Remove references to the Visual C runtime, so they will
+            # fall through to the Visual C dependency of Python.exe.
+            # This way, when installed for a restricted user (e.g.
+            # runtimes are not in WinSxS folder, but in Python's own
+            # folder), the runtimes do not need to be in every folder
+            # with .pyd's.
+            # Returns either the filename of the modified manifest or
+            # None if no manifest should be embedded.
+            manifest_f = open(manifest_file)
+            try:
+                manifest_buf = manifest_f.read()
+            finally:
+                manifest_f.close()
+            pattern = re.compile(
+                r"""|)""",
+                re.DOTALL,
+            )
+            manifest_buf = re.sub(pattern, "", manifest_buf)
+            pattern = r"\s*"
+            manifest_buf = re.sub(pattern, "", manifest_buf)
+            # Now see if any other assemblies are referenced - if not, we
+            # don't want a manifest embedded.
+            pattern = re.compile(
+                r"""|)""",
+                re.DOTALL,
+            )
+            if re.search(pattern, manifest_buf) is None:
+                return None
+
+            manifest_f = open(manifest_file, 'w')
+            try:
+                manifest_f.write(manifest_buf)
+                return manifest_file
+            finally:
+                manifest_f.close()
+        except OSError:
+            pass
+
+    # -- Miscellaneous methods -----------------------------------------
+    # These are all used by the 'gen_lib_options() function, in
+    # ccompiler.py.
+
+    def library_dir_option(self, dir):
+        return "/LIBPATH:" + dir
+
+    def runtime_library_dir_option(self, dir):
+        raise DistutilsPlatformError(
+            "don't know how to set runtime library search path for MSVC++"
+        )
+
+    def library_option(self, lib):
+        return self.library_filename(lib)
+
+    def find_library_file(self, dirs, lib, debug=False):
+        # Prefer a debugging library if found (and requested), but deal
+        # with it if we don't have one.
+        if debug:
+            try_names = [lib + "_d", lib]
+        else:
+            try_names = [lib]
+        for dir in dirs:
+            for name in try_names:
+                libfile = os.path.join(dir, self.library_filename(name))
+                if os.path.exists(libfile):
+                    return libfile
+        else:
+            # Oops, didn't find it in *any* of 'dirs'
+            return None
+
+    # Helper methods for using the MSVC registry settings
+
+    def find_exe(self, exe):
+        """Return path to an MSVC executable program.
+
+        Tries to find the program in several places: first, one of the
+        MSVC program search paths from the registry; next, the directories
+        in the PATH environment variable.  If any of those work, return an
+        absolute path that is known to exist.  If none of them work, just
+        return the original program name, 'exe'.
+        """
+        for p in self.__paths:
+            fn = os.path.join(os.path.abspath(p), exe)
+            if os.path.isfile(fn):
+                return fn
+
+        # didn't find it; try existing path
+        for p in os.environ['Path'].split(';'):
+            fn = os.path.join(os.path.abspath(p), exe)
+            if os.path.isfile(fn):
+                return fn
+
+        return exe
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/msvccompiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/msvccompiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bf94e60c9a70559fd87d31b623e2e6675dd45be
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/msvccompiler.py
@@ -0,0 +1,689 @@
+"""distutils.msvccompiler
+
+Contains MSVCCompiler, an implementation of the abstract CCompiler class
+for the Microsoft Visual Studio.
+"""
+
+# Written by Perry Stoll
+# hacked by Robin Becker and Thomas Heller to do a better job of
+#   finding DevStudio (through the registry)
+
+import os
+import sys
+import warnings
+
+from ._log import log
+from .ccompiler import CCompiler, gen_lib_options
+from .errors import (
+    CompileError,
+    DistutilsExecError,
+    DistutilsPlatformError,
+    LibError,
+    LinkError,
+)
+
+_can_read_reg = False
+try:
+    import winreg
+
+    _can_read_reg = True
+    hkey_mod = winreg
+
+    RegOpenKeyEx = winreg.OpenKeyEx
+    RegEnumKey = winreg.EnumKey
+    RegEnumValue = winreg.EnumValue
+    RegError = winreg.error
+
+except ImportError:
+    try:
+        import win32api
+        import win32con
+
+        _can_read_reg = True
+        hkey_mod = win32con
+
+        RegOpenKeyEx = win32api.RegOpenKeyEx
+        RegEnumKey = win32api.RegEnumKey
+        RegEnumValue = win32api.RegEnumValue
+        RegError = win32api.error
+    except ImportError:
+        log.info(
+            "Warning: Can't read registry to find the "
+            "necessary compiler setting\n"
+            "Make sure that Python modules winreg, "
+            "win32api or win32con are installed."
+        )
+        pass
+
+if _can_read_reg:
+    HKEYS = (
+        hkey_mod.HKEY_USERS,
+        hkey_mod.HKEY_CURRENT_USER,
+        hkey_mod.HKEY_LOCAL_MACHINE,
+        hkey_mod.HKEY_CLASSES_ROOT,
+    )
+
+
+warnings.warn(
+    "msvccompiler is deprecated and slated to be removed "
+    "in the future. Please discontinue use or file an issue "
+    "with pypa/distutils describing your use case.",
+    DeprecationWarning,
+)
+
+
+def read_keys(base, key):
+    """Return list of registry keys."""
+    try:
+        handle = RegOpenKeyEx(base, key)
+    except RegError:
+        return None
+    L = []
+    i = 0
+    while True:
+        try:
+            k = RegEnumKey(handle, i)
+        except RegError:
+            break
+        L.append(k)
+        i += 1
+    return L
+
+
+def read_values(base, key):
+    """Return dict of registry keys and values.
+
+    All names are converted to lowercase.
+    """
+    try:
+        handle = RegOpenKeyEx(base, key)
+    except RegError:
+        return None
+    d = {}
+    i = 0
+    while True:
+        try:
+            name, value, type = RegEnumValue(handle, i)
+        except RegError:
+            break
+        name = name.lower()
+        d[convert_mbcs(name)] = convert_mbcs(value)
+        i += 1
+    return d
+
+
+def convert_mbcs(s):
+    dec = getattr(s, "decode", None)
+    if dec is not None:
+        try:
+            s = dec("mbcs")
+        except UnicodeError:
+            pass
+    return s
+
+
+class MacroExpander:
+    def __init__(self, version):
+        self.macros = {}
+        self.load_macros(version)
+
+    def set_macro(self, macro, path, key):
+        for base in HKEYS:
+            d = read_values(base, path)
+            if d:
+                self.macros[f"$({macro})"] = d[key]
+                break
+
+    def load_macros(self, version):
+        vsbase = rf"Software\Microsoft\VisualStudio\{version:0.1f}"
+        self.set_macro("VCInstallDir", vsbase + r"\Setup\VC", "productdir")
+        self.set_macro("VSInstallDir", vsbase + r"\Setup\VS", "productdir")
+        net = r"Software\Microsoft\.NETFramework"
+        self.set_macro("FrameworkDir", net, "installroot")
+        try:
+            if version > 7.0:
+                self.set_macro("FrameworkSDKDir", net, "sdkinstallrootv1.1")
+            else:
+                self.set_macro("FrameworkSDKDir", net, "sdkinstallroot")
+        except KeyError:
+            raise DistutilsPlatformError(
+                """Python was built with Visual Studio 2003;
+extensions must be built with a compiler than can generate compatible binaries.
+Visual Studio 2003 was not found on this system. If you have Cygwin installed,
+you can try compiling with MingW32, by passing "-c mingw32" to setup.py."""
+            )
+
+        p = r"Software\Microsoft\NET Framework Setup\Product"
+        for base in HKEYS:
+            try:
+                h = RegOpenKeyEx(base, p)
+            except RegError:
+                continue
+            key = RegEnumKey(h, 0)
+            d = read_values(base, rf"{p}\{key}")
+            self.macros["$(FrameworkVersion)"] = d["version"]
+
+    def sub(self, s):
+        for k, v in self.macros.items():
+            s = s.replace(k, v)
+        return s
+
+
+def get_build_version():
+    """Return the version of MSVC that was used to build Python.
+
+    For Python 2.3 and up, the version number is included in
+    sys.version.  For earlier versions, assume the compiler is MSVC 6.
+    """
+    prefix = "MSC v."
+    i = sys.version.find(prefix)
+    if i == -1:
+        return 6
+    i = i + len(prefix)
+    s, rest = sys.version[i:].split(" ", 1)
+    majorVersion = int(s[:-2]) - 6
+    if majorVersion >= 13:
+        # v13 was skipped and should be v14
+        majorVersion += 1
+    minorVersion = int(s[2:3]) / 10.0
+    # I don't think paths are affected by minor version in version 6
+    if majorVersion == 6:
+        minorVersion = 0
+    if majorVersion >= 6:
+        return majorVersion + minorVersion
+    # else we don't know what version of the compiler this is
+    return None
+
+
+def get_build_architecture():
+    """Return the processor architecture.
+
+    Possible results are "Intel" or "AMD64".
+    """
+
+    prefix = " bit ("
+    i = sys.version.find(prefix)
+    if i == -1:
+        return "Intel"
+    j = sys.version.find(")", i)
+    return sys.version[i + len(prefix) : j]
+
+
+def normalize_and_reduce_paths(paths):
+    """Return a list of normalized paths with duplicates removed.
+
+    The current order of paths is maintained.
+    """
+    # Paths are normalized so things like:  /a and /a/ aren't both preserved.
+    reduced_paths = []
+    for p in paths:
+        np = os.path.normpath(p)
+        # XXX(nnorwitz): O(n**2), if reduced_paths gets long perhaps use a set.
+        if np not in reduced_paths:
+            reduced_paths.append(np)
+    return reduced_paths
+
+
+class MSVCCompiler(CCompiler):
+    """Concrete class that implements an interface to Microsoft Visual C++,
+    as defined by the CCompiler abstract class."""
+
+    compiler_type = 'msvc'
+
+    # Just set this so CCompiler's constructor doesn't barf.  We currently
+    # don't use the 'set_executables()' bureaucracy provided by CCompiler,
+    # as it really isn't necessary for this sort of single-compiler class.
+    # Would be nice to have a consistent interface with UnixCCompiler,
+    # though, so it's worth thinking about.
+    executables = {}
+
+    # Private class data (need to distinguish C from C++ source for compiler)
+    _c_extensions = ['.c']
+    _cpp_extensions = ['.cc', '.cpp', '.cxx']
+    _rc_extensions = ['.rc']
+    _mc_extensions = ['.mc']
+
+    # Needed for the filename generation methods provided by the
+    # base class, CCompiler.
+    src_extensions = _c_extensions + _cpp_extensions + _rc_extensions + _mc_extensions
+    res_extension = '.res'
+    obj_extension = '.obj'
+    static_lib_extension = '.lib'
+    shared_lib_extension = '.dll'
+    static_lib_format = shared_lib_format = '%s%s'
+    exe_extension = '.exe'
+
+    def __init__(self, verbose=False, dry_run=False, force=False):
+        super().__init__(verbose, dry_run, force)
+        self.__version = get_build_version()
+        self.__arch = get_build_architecture()
+        if self.__arch == "Intel":
+            # x86
+            if self.__version >= 7:
+                self.__root = r"Software\Microsoft\VisualStudio"
+                self.__macros = MacroExpander(self.__version)
+            else:
+                self.__root = r"Software\Microsoft\Devstudio"
+            self.__product = f"Visual Studio version {self.__version}"
+        else:
+            # Win64. Assume this was built with the platform SDK
+            self.__product = "Microsoft SDK compiler %s" % (self.__version + 6)
+
+        self.initialized = False
+
+    def initialize(self):
+        self.__paths = []
+        if (
+            "DISTUTILS_USE_SDK" in os.environ
+            and "MSSdk" in os.environ
+            and self.find_exe("cl.exe")
+        ):
+            # Assume that the SDK set up everything alright; don't try to be
+            # smarter
+            self.cc = "cl.exe"
+            self.linker = "link.exe"
+            self.lib = "lib.exe"
+            self.rc = "rc.exe"
+            self.mc = "mc.exe"
+        else:
+            self.__paths = self.get_msvc_paths("path")
+
+            if len(self.__paths) == 0:
+                raise DistutilsPlatformError(
+                    f"Python was built with {self.__product}, "
+                    "and extensions need to be built with the same "
+                    "version of the compiler, but it isn't installed."
+                )
+
+            self.cc = self.find_exe("cl.exe")
+            self.linker = self.find_exe("link.exe")
+            self.lib = self.find_exe("lib.exe")
+            self.rc = self.find_exe("rc.exe")  # resource compiler
+            self.mc = self.find_exe("mc.exe")  # message compiler
+            self.set_path_env_var('lib')
+            self.set_path_env_var('include')
+
+        # extend the MSVC path with the current path
+        try:
+            for p in os.environ['path'].split(';'):
+                self.__paths.append(p)
+        except KeyError:
+            pass
+        self.__paths = normalize_and_reduce_paths(self.__paths)
+        os.environ['path'] = ";".join(self.__paths)
+
+        self.preprocess_options = None
+        if self.__arch == "Intel":
+            self.compile_options = ['/nologo', '/O2', '/MD', '/W3', '/GX', '/DNDEBUG']
+            self.compile_options_debug = [
+                '/nologo',
+                '/Od',
+                '/MDd',
+                '/W3',
+                '/GX',
+                '/Z7',
+                '/D_DEBUG',
+            ]
+        else:
+            # Win64
+            self.compile_options = ['/nologo', '/O2', '/MD', '/W3', '/GS-', '/DNDEBUG']
+            self.compile_options_debug = [
+                '/nologo',
+                '/Od',
+                '/MDd',
+                '/W3',
+                '/GS-',
+                '/Z7',
+                '/D_DEBUG',
+            ]
+
+        self.ldflags_shared = ['/DLL', '/nologo', '/INCREMENTAL:NO']
+        if self.__version >= 7:
+            self.ldflags_shared_debug = ['/DLL', '/nologo', '/INCREMENTAL:no', '/DEBUG']
+        else:
+            self.ldflags_shared_debug = [
+                '/DLL',
+                '/nologo',
+                '/INCREMENTAL:no',
+                '/pdb:None',
+                '/DEBUG',
+            ]
+        self.ldflags_static = ['/nologo']
+
+        self.initialized = True
+
+    # -- Worker methods ------------------------------------------------
+
+    def object_filenames(self, source_filenames, strip_dir=False, output_dir=''):
+        # Copied from ccompiler.py, extended to return .res as 'object'-file
+        # for .rc input file
+        if output_dir is None:
+            output_dir = ''
+        obj_names = []
+        for src_name in source_filenames:
+            (base, ext) = os.path.splitext(src_name)
+            base = os.path.splitdrive(base)[1]  # Chop off the drive
+            base = base[os.path.isabs(base) :]  # If abs, chop off leading /
+            if ext not in self.src_extensions:
+                # Better to raise an exception instead of silently continuing
+                # and later complain about sources and targets having
+                # different lengths
+                raise CompileError(f"Don't know how to compile {src_name}")
+            if strip_dir:
+                base = os.path.basename(base)
+            if ext in self._rc_extensions:
+                obj_names.append(os.path.join(output_dir, base + self.res_extension))
+            elif ext in self._mc_extensions:
+                obj_names.append(os.path.join(output_dir, base + self.res_extension))
+            else:
+                obj_names.append(os.path.join(output_dir, base + self.obj_extension))
+        return obj_names
+
+    def compile(  # noqa: C901
+        self,
+        sources,
+        output_dir=None,
+        macros=None,
+        include_dirs=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        depends=None,
+    ):
+        if not self.initialized:
+            self.initialize()
+        compile_info = self._setup_compile(
+            output_dir, macros, include_dirs, sources, depends, extra_postargs
+        )
+        macros, objects, extra_postargs, pp_opts, build = compile_info
+
+        compile_opts = extra_preargs or []
+        compile_opts.append('/c')
+        if debug:
+            compile_opts.extend(self.compile_options_debug)
+        else:
+            compile_opts.extend(self.compile_options)
+
+        for obj in objects:
+            try:
+                src, ext = build[obj]
+            except KeyError:
+                continue
+            if debug:
+                # pass the full pathname to MSVC in debug mode,
+                # this allows the debugger to find the source file
+                # without asking the user to browse for it
+                src = os.path.abspath(src)
+
+            if ext in self._c_extensions:
+                input_opt = "/Tc" + src
+            elif ext in self._cpp_extensions:
+                input_opt = "/Tp" + src
+            elif ext in self._rc_extensions:
+                # compile .RC to .RES file
+                input_opt = src
+                output_opt = "/fo" + obj
+                try:
+                    self.spawn([self.rc] + pp_opts + [output_opt] + [input_opt])
+                except DistutilsExecError as msg:
+                    raise CompileError(msg)
+                continue
+            elif ext in self._mc_extensions:
+                # Compile .MC to .RC file to .RES file.
+                #   * '-h dir' specifies the directory for the
+                #     generated include file
+                #   * '-r dir' specifies the target directory of the
+                #     generated RC file and the binary message resource
+                #     it includes
+                #
+                # For now (since there are no options to change this),
+                # we use the source-directory for the include file and
+                # the build directory for the RC file and message
+                # resources. This works at least for win32all.
+                h_dir = os.path.dirname(src)
+                rc_dir = os.path.dirname(obj)
+                try:
+                    # first compile .MC to .RC and .H file
+                    self.spawn([self.mc] + ['-h', h_dir, '-r', rc_dir] + [src])
+                    base, _ = os.path.splitext(os.path.basename(src))
+                    rc_file = os.path.join(rc_dir, base + '.rc')
+                    # then compile .RC to .RES file
+                    self.spawn([self.rc] + ["/fo" + obj] + [rc_file])
+
+                except DistutilsExecError as msg:
+                    raise CompileError(msg)
+                continue
+            else:
+                # how to handle this file?
+                raise CompileError(f"Don't know how to compile {src} to {obj}")
+
+            output_opt = "/Fo" + obj
+            try:
+                self.spawn(
+                    [self.cc]
+                    + compile_opts
+                    + pp_opts
+                    + [input_opt, output_opt]
+                    + extra_postargs
+                )
+            except DistutilsExecError as msg:
+                raise CompileError(msg)
+
+        return objects
+
+    def create_static_lib(
+        self, objects, output_libname, output_dir=None, debug=False, target_lang=None
+    ):
+        if not self.initialized:
+            self.initialize()
+        (objects, output_dir) = self._fix_object_args(objects, output_dir)
+        output_filename = self.library_filename(output_libname, output_dir=output_dir)
+
+        if self._need_link(objects, output_filename):
+            lib_args = objects + ['/OUT:' + output_filename]
+            if debug:
+                pass  # XXX what goes here?
+            try:
+                self.spawn([self.lib] + lib_args)
+            except DistutilsExecError as msg:
+                raise LibError(msg)
+        else:
+            log.debug("skipping %s (up-to-date)", output_filename)
+
+    def link(  # noqa: C901
+        self,
+        target_desc,
+        objects,
+        output_filename,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        if not self.initialized:
+            self.initialize()
+        (objects, output_dir) = self._fix_object_args(objects, output_dir)
+        fixed_args = self._fix_lib_args(libraries, library_dirs, runtime_library_dirs)
+        (libraries, library_dirs, runtime_library_dirs) = fixed_args
+
+        if runtime_library_dirs:
+            self.warn(
+                "I don't know what to do with 'runtime_library_dirs': "
+                + str(runtime_library_dirs)
+            )
+
+        lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries)
+        if output_dir is not None:
+            output_filename = os.path.join(output_dir, output_filename)
+
+        if self._need_link(objects, output_filename):
+            if target_desc == CCompiler.EXECUTABLE:
+                if debug:
+                    ldflags = self.ldflags_shared_debug[1:]
+                else:
+                    ldflags = self.ldflags_shared[1:]
+            else:
+                if debug:
+                    ldflags = self.ldflags_shared_debug
+                else:
+                    ldflags = self.ldflags_shared
+
+            export_opts = []
+            for sym in export_symbols or []:
+                export_opts.append("/EXPORT:" + sym)
+
+            ld_args = (
+                ldflags + lib_opts + export_opts + objects + ['/OUT:' + output_filename]
+            )
+
+            # The MSVC linker generates .lib and .exp files, which cannot be
+            # suppressed by any linker switches. The .lib files may even be
+            # needed! Make sure they are generated in the temporary build
+            # directory. Since they have different names for debug and release
+            # builds, they can go into the same directory.
+            if export_symbols is not None:
+                (dll_name, dll_ext) = os.path.splitext(
+                    os.path.basename(output_filename)
+                )
+                implib_file = os.path.join(
+                    os.path.dirname(objects[0]), self.library_filename(dll_name)
+                )
+                ld_args.append('/IMPLIB:' + implib_file)
+
+            if extra_preargs:
+                ld_args[:0] = extra_preargs
+            if extra_postargs:
+                ld_args.extend(extra_postargs)
+
+            self.mkpath(os.path.dirname(output_filename))
+            try:
+                self.spawn([self.linker] + ld_args)
+            except DistutilsExecError as msg:
+                raise LinkError(msg)
+
+        else:
+            log.debug("skipping %s (up-to-date)", output_filename)
+
+    # -- Miscellaneous methods -----------------------------------------
+    # These are all used by the 'gen_lib_options() function, in
+    # ccompiler.py.
+
+    def library_dir_option(self, dir):
+        return "/LIBPATH:" + dir
+
+    def runtime_library_dir_option(self, dir):
+        raise DistutilsPlatformError(
+            "don't know how to set runtime library search path for MSVC++"
+        )
+
+    def library_option(self, lib):
+        return self.library_filename(lib)
+
+    def find_library_file(self, dirs, lib, debug=False):
+        # Prefer a debugging library if found (and requested), but deal
+        # with it if we don't have one.
+        if debug:
+            try_names = [lib + "_d", lib]
+        else:
+            try_names = [lib]
+        for dir in dirs:
+            for name in try_names:
+                libfile = os.path.join(dir, self.library_filename(name))
+                if os.path.exists(libfile):
+                    return libfile
+        else:
+            # Oops, didn't find it in *any* of 'dirs'
+            return None
+
+    # Helper methods for using the MSVC registry settings
+
+    def find_exe(self, exe):
+        """Return path to an MSVC executable program.
+
+        Tries to find the program in several places: first, one of the
+        MSVC program search paths from the registry; next, the directories
+        in the PATH environment variable.  If any of those work, return an
+        absolute path that is known to exist.  If none of them work, just
+        return the original program name, 'exe'.
+        """
+        for p in self.__paths:
+            fn = os.path.join(os.path.abspath(p), exe)
+            if os.path.isfile(fn):
+                return fn
+
+        # didn't find it; try existing path
+        for p in os.environ['Path'].split(';'):
+            fn = os.path.join(os.path.abspath(p), exe)
+            if os.path.isfile(fn):
+                return fn
+
+        return exe
+
+    def get_msvc_paths(self, path, platform='x86'):
+        """Get a list of devstudio directories (include, lib or path).
+
+        Return a list of strings.  The list will be empty if unable to
+        access the registry or appropriate registry keys not found.
+        """
+        if not _can_read_reg:
+            return []
+
+        path = path + " dirs"
+        if self.__version >= 7:
+            key = rf"{self.__root}\{self.__version:0.1f}\VC\VC_OBJECTS_PLATFORM_INFO\Win32\Directories"
+        else:
+            key = (
+                rf"{self.__root}\6.0\Build System\Components\Platforms"
+                rf"\Win32 ({platform})\Directories"
+            )
+
+        for base in HKEYS:
+            d = read_values(base, key)
+            if d:
+                if self.__version >= 7:
+                    return self.__macros.sub(d[path]).split(";")
+                else:
+                    return d[path].split(";")
+        # MSVC 6 seems to create the registry entries we need only when
+        # the GUI is run.
+        if self.__version == 6:
+            for base in HKEYS:
+                if read_values(base, rf"{self.__root}\6.0") is not None:
+                    self.warn(
+                        "It seems you have Visual Studio 6 installed, "
+                        "but the expected registry settings are not present.\n"
+                        "You must at least run the Visual Studio GUI once "
+                        "so that these entries are created."
+                    )
+                    break
+        return []
+
+    def set_path_env_var(self, name):
+        """Set environment variable 'name' to an MSVC path type value.
+
+        This is equivalent to a SET command prior to execution of spawned
+        commands.
+        """
+
+        if name == "lib":
+            p = self.get_msvc_paths("library")
+        else:
+            p = self.get_msvc_paths(name)
+        if p:
+            os.environ[name] = ';'.join(p)
+
+
+if get_build_version() >= 8.0:
+    log.debug("Importing new compiler from distutils.msvc9compiler")
+    OldMSVCCompiler = MSVCCompiler
+    # get_build_architecture not really relevant now we support cross-compile
+    from distutils.msvc9compiler import (
+        MacroExpander,
+        MSVCCompiler,
+    )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/spawn.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/spawn.py
new file mode 100644
index 0000000000000000000000000000000000000000..50d30a2761237347bce76bdd5407a8b2d670f0ea
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/spawn.py
@@ -0,0 +1,118 @@
+"""distutils.spawn
+
+Provides the 'spawn()' function, a front-end to various platform-
+specific functions for launching another program in a sub-process.
+"""
+
+from __future__ import annotations
+
+import os
+import platform
+import shutil
+import subprocess
+import sys
+import warnings
+
+from typing import Mapping
+
+from ._log import log
+from .debug import DEBUG
+from .errors import DistutilsExecError
+
+
+def _debug(cmd):
+    """
+    Render a subprocess command differently depending on DEBUG.
+    """
+    return cmd if DEBUG else cmd[0]
+
+
+def _inject_macos_ver(env: Mapping[str:str] | None) -> Mapping[str:str] | None:
+    if platform.system() != 'Darwin':
+        return env
+
+    from .util import MACOSX_VERSION_VAR, get_macosx_target_ver
+
+    target_ver = get_macosx_target_ver()
+    update = {MACOSX_VERSION_VAR: target_ver} if target_ver else {}
+    return {**_resolve(env), **update}
+
+
+def _resolve(env: Mapping[str:str] | None) -> Mapping[str:str]:
+    return os.environ if env is None else env
+
+
+def spawn(cmd, search_path=True, verbose=False, dry_run=False, env=None):
+    """Run another program, specified as a command list 'cmd', in a new process.
+
+    'cmd' is just the argument list for the new process, ie.
+    cmd[0] is the program to run and cmd[1:] are the rest of its arguments.
+    There is no way to run a program with a name different from that of its
+    executable.
+
+    If 'search_path' is true (the default), the system's executable
+    search path will be used to find the program; otherwise, cmd[0]
+    must be the exact path to the executable.  If 'dry_run' is true,
+    the command will not actually be run.
+
+    Raise DistutilsExecError if running the program fails in any way; just
+    return on success.
+    """
+    log.info(subprocess.list2cmdline(cmd))
+    if dry_run:
+        return
+
+    if search_path:
+        executable = shutil.which(cmd[0])
+        if executable is not None:
+            cmd[0] = executable
+
+    try:
+        subprocess.check_call(cmd, env=_inject_macos_ver(env))
+    except OSError as exc:
+        raise DistutilsExecError(
+            f"command {_debug(cmd)!r} failed: {exc.args[-1]}"
+        ) from exc
+    except subprocess.CalledProcessError as err:
+        raise DistutilsExecError(
+            f"command {_debug(cmd)!r} failed with exit code {err.returncode}"
+        ) from err
+
+
+def find_executable(executable, path=None):
+    """Tries to find 'executable' in the directories listed in 'path'.
+
+    A string listing directories separated by 'os.pathsep'; defaults to
+    os.environ['PATH'].  Returns the complete filename or None if not found.
+    """
+    warnings.warn(
+        'Use shutil.which instead of find_executable', DeprecationWarning, stacklevel=2
+    )
+    _, ext = os.path.splitext(executable)
+    if (sys.platform == 'win32') and (ext != '.exe'):
+        executable = executable + '.exe'
+
+    if os.path.isfile(executable):
+        return executable
+
+    if path is None:
+        path = os.environ.get('PATH', None)
+        # bpo-35755: Don't fall through if PATH is the empty string
+        if path is None:
+            try:
+                path = os.confstr("CS_PATH")
+            except (AttributeError, ValueError):
+                # os.confstr() or CS_PATH is not available
+                path = os.defpath
+
+    # PATH='' doesn't match, whereas PATH=':' looks in the current directory
+    if not path:
+        return None
+
+    paths = path.split(os.pathsep)
+    for p in paths:
+        f = os.path.join(p, executable)
+        if os.path.isfile(f):
+            # the file exists, we have a shot at spawn working
+            return f
+    return None
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/sysconfig.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/sysconfig.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ba0be56020e84b60a3c1b49abeb39a1dbafe8f7
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/sysconfig.py
@@ -0,0 +1,561 @@
+"""Provide access to Python's configuration information.  The specific
+configuration variables available depend heavily on the platform and
+configuration.  The values may be retrieved using
+get_config_var(name), and the list of variables is available via
+get_config_vars().keys().  Additional convenience functions are also
+available.
+
+Written by:   Fred L. Drake, Jr.
+Email:        
+"""
+
+import functools
+import os
+import pathlib
+import re
+import sys
+import sysconfig
+
+from ._functools import pass_none
+from .compat import py39
+from .errors import DistutilsPlatformError
+from .util import is_mingw
+
+IS_PYPY = '__pypy__' in sys.builtin_module_names
+
+# These are needed in a couple of spots, so just compute them once.
+PREFIX = os.path.normpath(sys.prefix)
+EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
+BASE_PREFIX = os.path.normpath(sys.base_prefix)
+BASE_EXEC_PREFIX = os.path.normpath(sys.base_exec_prefix)
+
+# Path to the base directory of the project. On Windows the binary may
+# live in project/PCbuild/win32 or project/PCbuild/amd64.
+# set for cross builds
+if "_PYTHON_PROJECT_BASE" in os.environ:
+    project_base = os.path.abspath(os.environ["_PYTHON_PROJECT_BASE"])
+else:
+    if sys.executable:
+        project_base = os.path.dirname(os.path.abspath(sys.executable))
+    else:
+        # sys.executable can be empty if argv[0] has been changed and Python is
+        # unable to retrieve the real program name
+        project_base = os.getcwd()
+
+
+def _is_python_source_dir(d):
+    """
+    Return True if the target directory appears to point to an
+    un-installed Python.
+    """
+    modules = pathlib.Path(d).joinpath('Modules')
+    return any(modules.joinpath(fn).is_file() for fn in ('Setup', 'Setup.local'))
+
+
+_sys_home = getattr(sys, '_home', None)
+
+
+def _is_parent(dir_a, dir_b):
+    """
+    Return True if a is a parent of b.
+    """
+    return os.path.normcase(dir_a).startswith(os.path.normcase(dir_b))
+
+
+if os.name == 'nt':
+
+    @pass_none
+    def _fix_pcbuild(d):
+        # In a venv, sys._home will be inside BASE_PREFIX rather than PREFIX.
+        prefixes = PREFIX, BASE_PREFIX
+        matched = (
+            prefix
+            for prefix in prefixes
+            if _is_parent(d, os.path.join(prefix, "PCbuild"))
+        )
+        return next(matched, d)
+
+    project_base = _fix_pcbuild(project_base)
+    _sys_home = _fix_pcbuild(_sys_home)
+
+
+def _python_build():
+    if _sys_home:
+        return _is_python_source_dir(_sys_home)
+    return _is_python_source_dir(project_base)
+
+
+python_build = _python_build()
+
+
+# Calculate the build qualifier flags if they are defined.  Adding the flags
+# to the include and lib directories only makes sense for an installation, not
+# an in-source build.
+build_flags = ''
+try:
+    if not python_build:
+        build_flags = sys.abiflags
+except AttributeError:
+    # It's not a configure-based build, so the sys module doesn't have
+    # this attribute, which is fine.
+    pass
+
+
+def get_python_version():
+    """Return a string containing the major and minor Python version,
+    leaving off the patchlevel.  Sample return values could be '1.5'
+    or '2.2'.
+    """
+    return '%d.%d' % sys.version_info[:2]
+
+
+def get_python_inc(plat_specific=False, prefix=None):
+    """Return the directory containing installed Python header files.
+
+    If 'plat_specific' is false (the default), this is the path to the
+    non-platform-specific header files, i.e. Python.h and so on;
+    otherwise, this is the path to platform-specific header files
+    (namely pyconfig.h).
+
+    If 'prefix' is supplied, use it instead of sys.base_prefix or
+    sys.base_exec_prefix -- i.e., ignore 'plat_specific'.
+    """
+    default_prefix = BASE_EXEC_PREFIX if plat_specific else BASE_PREFIX
+    resolved_prefix = prefix if prefix is not None else default_prefix
+    # MinGW imitates posix like layout, but os.name != posix
+    os_name = "posix" if is_mingw() else os.name
+    try:
+        getter = globals()[f'_get_python_inc_{os_name}']
+    except KeyError:
+        raise DistutilsPlatformError(
+            "I don't know where Python installs its C header files "
+            f"on platform '{os.name}'"
+        )
+    return getter(resolved_prefix, prefix, plat_specific)
+
+
+@pass_none
+def _extant(path):
+    """
+    Replace path with None if it doesn't exist.
+    """
+    return path if os.path.exists(path) else None
+
+
+def _get_python_inc_posix(prefix, spec_prefix, plat_specific):
+    if IS_PYPY and sys.version_info < (3, 8):
+        return os.path.join(prefix, 'include')
+    return (
+        _get_python_inc_posix_python(plat_specific)
+        or _extant(_get_python_inc_from_config(plat_specific, spec_prefix))
+        or _get_python_inc_posix_prefix(prefix)
+    )
+
+
+def _get_python_inc_posix_python(plat_specific):
+    """
+    Assume the executable is in the build directory. The
+    pyconfig.h file should be in the same directory. Since
+    the build directory may not be the source directory,
+    use "srcdir" from the makefile to find the "Include"
+    directory.
+    """
+    if not python_build:
+        return
+    if plat_specific:
+        return _sys_home or project_base
+    incdir = os.path.join(get_config_var('srcdir'), 'Include')
+    return os.path.normpath(incdir)
+
+
+def _get_python_inc_from_config(plat_specific, spec_prefix):
+    """
+    If no prefix was explicitly specified, provide the include
+    directory from the config vars. Useful when
+    cross-compiling, since the config vars may come from
+    the host
+    platform Python installation, while the current Python
+    executable is from the build platform installation.
+
+    >>> monkeypatch = getfixture('monkeypatch')
+    >>> gpifc = _get_python_inc_from_config
+    >>> monkeypatch.setitem(gpifc.__globals__, 'get_config_var', str.lower)
+    >>> gpifc(False, '/usr/bin/')
+    >>> gpifc(False, '')
+    >>> gpifc(False, None)
+    'includepy'
+    >>> gpifc(True, None)
+    'confincludepy'
+    """
+    if spec_prefix is None:
+        return get_config_var('CONF' * plat_specific + 'INCLUDEPY')
+
+
+def _get_python_inc_posix_prefix(prefix):
+    implementation = 'pypy' if IS_PYPY else 'python'
+    python_dir = implementation + get_python_version() + build_flags
+    return os.path.join(prefix, "include", python_dir)
+
+
+def _get_python_inc_nt(prefix, spec_prefix, plat_specific):
+    if python_build:
+        # Include both include dirs to ensure we can find pyconfig.h
+        return (
+            os.path.join(prefix, "include")
+            + os.path.pathsep
+            + os.path.dirname(sysconfig.get_config_h_filename())
+        )
+    return os.path.join(prefix, "include")
+
+
+# allow this behavior to be monkey-patched. Ref pypa/distutils#2.
+def _posix_lib(standard_lib, libpython, early_prefix, prefix):
+    if standard_lib:
+        return libpython
+    else:
+        return os.path.join(libpython, "site-packages")
+
+
+def get_python_lib(plat_specific=False, standard_lib=False, prefix=None):
+    """Return the directory containing the Python library (standard or
+    site additions).
+
+    If 'plat_specific' is true, return the directory containing
+    platform-specific modules, i.e. any module from a non-pure-Python
+    module distribution; otherwise, return the platform-shared library
+    directory.  If 'standard_lib' is true, return the directory
+    containing standard Python library modules; otherwise, return the
+    directory for site-specific modules.
+
+    If 'prefix' is supplied, use it instead of sys.base_prefix or
+    sys.base_exec_prefix -- i.e., ignore 'plat_specific'.
+    """
+
+    if IS_PYPY and sys.version_info < (3, 8):
+        # PyPy-specific schema
+        if prefix is None:
+            prefix = PREFIX
+        if standard_lib:
+            return os.path.join(prefix, "lib-python", sys.version[0])
+        return os.path.join(prefix, 'site-packages')
+
+    early_prefix = prefix
+
+    if prefix is None:
+        if standard_lib:
+            prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX
+        else:
+            prefix = plat_specific and EXEC_PREFIX or PREFIX
+
+    if os.name == "posix" or is_mingw():
+        if plat_specific or standard_lib:
+            # Platform-specific modules (any module from a non-pure-Python
+            # module distribution) or standard Python library modules.
+            libdir = getattr(sys, "platlibdir", "lib")
+        else:
+            # Pure Python
+            libdir = "lib"
+        implementation = 'pypy' if IS_PYPY else 'python'
+        libpython = os.path.join(prefix, libdir, implementation + get_python_version())
+        return _posix_lib(standard_lib, libpython, early_prefix, prefix)
+    elif os.name == "nt":
+        if standard_lib:
+            return os.path.join(prefix, "Lib")
+        else:
+            return os.path.join(prefix, "Lib", "site-packages")
+    else:
+        raise DistutilsPlatformError(
+            f"I don't know where Python installs its library on platform '{os.name}'"
+        )
+
+
+@functools.lru_cache
+def _customize_macos():
+    """
+    Perform first-time customization of compiler-related
+    config vars on macOS. Use after a compiler is known
+    to be needed. This customization exists primarily to support Pythons
+    from binary installers. The kind and paths to build tools on
+    the user system may vary significantly from the system
+    that Python itself was built on.  Also the user OS
+    version and build tools may not support the same set
+    of CPU architectures for universal builds.
+    """
+
+    sys.platform == "darwin" and __import__('_osx_support').customize_compiler(
+        get_config_vars()
+    )
+
+
+def customize_compiler(compiler):  # noqa: C901
+    """Do any platform-specific customization of a CCompiler instance.
+
+    Mainly needed on Unix, so we can plug in the information that
+    varies across Unices and is stored in Python's Makefile.
+    """
+    if compiler.compiler_type in ["unix", "cygwin", "mingw32"]:
+        _customize_macos()
+
+        (
+            cc,
+            cxx,
+            cflags,
+            ccshared,
+            ldshared,
+            shlib_suffix,
+            ar,
+            ar_flags,
+        ) = get_config_vars(
+            'CC',
+            'CXX',
+            'CFLAGS',
+            'CCSHARED',
+            'LDSHARED',
+            'SHLIB_SUFFIX',
+            'AR',
+            'ARFLAGS',
+        )
+
+        if 'CC' in os.environ:
+            newcc = os.environ['CC']
+            if 'LDSHARED' not in os.environ and ldshared.startswith(cc):
+                # If CC is overridden, use that as the default
+                #       command for LDSHARED as well
+                ldshared = newcc + ldshared[len(cc) :]
+            cc = newcc
+        if 'CXX' in os.environ:
+            cxx = os.environ['CXX']
+        if 'LDSHARED' in os.environ:
+            ldshared = os.environ['LDSHARED']
+        if 'CPP' in os.environ:
+            cpp = os.environ['CPP']
+        else:
+            cpp = cc + " -E"  # not always
+        if 'LDFLAGS' in os.environ:
+            ldshared = ldshared + ' ' + os.environ['LDFLAGS']
+        if 'CFLAGS' in os.environ:
+            cflags = cflags + ' ' + os.environ['CFLAGS']
+            ldshared = ldshared + ' ' + os.environ['CFLAGS']
+        if 'CPPFLAGS' in os.environ:
+            cpp = cpp + ' ' + os.environ['CPPFLAGS']
+            cflags = cflags + ' ' + os.environ['CPPFLAGS']
+            ldshared = ldshared + ' ' + os.environ['CPPFLAGS']
+        if 'AR' in os.environ:
+            ar = os.environ['AR']
+        if 'ARFLAGS' in os.environ:
+            archiver = ar + ' ' + os.environ['ARFLAGS']
+        else:
+            archiver = ar + ' ' + ar_flags
+
+        cc_cmd = cc + ' ' + cflags
+        compiler.set_executables(
+            preprocessor=cpp,
+            compiler=cc_cmd,
+            compiler_so=cc_cmd + ' ' + ccshared,
+            compiler_cxx=cxx,
+            linker_so=ldshared,
+            linker_exe=cc,
+            archiver=archiver,
+        )
+
+        if 'RANLIB' in os.environ and compiler.executables.get('ranlib', None):
+            compiler.set_executables(ranlib=os.environ['RANLIB'])
+
+        compiler.shared_lib_extension = shlib_suffix
+
+
+def get_config_h_filename():
+    """Return full pathname of installed pyconfig.h file."""
+    return sysconfig.get_config_h_filename()
+
+
+def get_makefile_filename():
+    """Return full pathname of installed Makefile from the Python build."""
+    return sysconfig.get_makefile_filename()
+
+
+def parse_config_h(fp, g=None):
+    """Parse a config.h-style file.
+
+    A dictionary containing name/value pairs is returned.  If an
+    optional dictionary is passed in as the second argument, it is
+    used instead of a new dictionary.
+    """
+    return sysconfig.parse_config_h(fp, vars=g)
+
+
+# Regexes needed for parsing Makefile (and similar syntaxes,
+# like old-style Setup files).
+_variable_rx = re.compile(r"([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
+_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)")
+_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}")
+
+
+def parse_makefile(fn, g=None):  # noqa: C901
+    """Parse a Makefile-style file.
+
+    A dictionary containing name/value pairs is returned.  If an
+    optional dictionary is passed in as the second argument, it is
+    used instead of a new dictionary.
+    """
+    from distutils.text_file import TextFile
+
+    fp = TextFile(
+        fn,
+        strip_comments=True,
+        skip_blanks=True,
+        join_lines=True,
+        errors="surrogateescape",
+    )
+
+    if g is None:
+        g = {}
+    done = {}
+    notdone = {}
+
+    while True:
+        line = fp.readline()
+        if line is None:  # eof
+            break
+        m = _variable_rx.match(line)
+        if m:
+            n, v = m.group(1, 2)
+            v = v.strip()
+            # `$$' is a literal `$' in make
+            tmpv = v.replace('$$', '')
+
+            if "$" in tmpv:
+                notdone[n] = v
+            else:
+                try:
+                    v = int(v)
+                except ValueError:
+                    # insert literal `$'
+                    done[n] = v.replace('$$', '$')
+                else:
+                    done[n] = v
+
+    # Variables with a 'PY_' prefix in the makefile. These need to
+    # be made available without that prefix through sysconfig.
+    # Special care is needed to ensure that variable expansion works, even
+    # if the expansion uses the name without a prefix.
+    renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS')
+
+    # do variable interpolation here
+    while notdone:
+        for name in list(notdone):
+            value = notdone[name]
+            m = _findvar1_rx.search(value) or _findvar2_rx.search(value)
+            if m:
+                n = m.group(1)
+                found = True
+                if n in done:
+                    item = str(done[n])
+                elif n in notdone:
+                    # get it on a subsequent round
+                    found = False
+                elif n in os.environ:
+                    # do it like make: fall back to environment
+                    item = os.environ[n]
+
+                elif n in renamed_variables:
+                    if name.startswith('PY_') and name[3:] in renamed_variables:
+                        item = ""
+
+                    elif 'PY_' + n in notdone:
+                        found = False
+
+                    else:
+                        item = str(done['PY_' + n])
+                else:
+                    done[n] = item = ""
+                if found:
+                    after = value[m.end() :]
+                    value = value[: m.start()] + item + after
+                    if "$" in after:
+                        notdone[name] = value
+                    else:
+                        try:
+                            value = int(value)
+                        except ValueError:
+                            done[name] = value.strip()
+                        else:
+                            done[name] = value
+                        del notdone[name]
+
+                        if name.startswith('PY_') and name[3:] in renamed_variables:
+                            name = name[3:]
+                            if name not in done:
+                                done[name] = value
+            else:
+                # bogus variable reference; just drop it since we can't deal
+                del notdone[name]
+
+    fp.close()
+
+    # strip spurious spaces
+    for k, v in done.items():
+        if isinstance(v, str):
+            done[k] = v.strip()
+
+    # save the results in the global dictionary
+    g.update(done)
+    return g
+
+
+def expand_makefile_vars(s, vars):
+    """Expand Makefile-style variables -- "${foo}" or "$(foo)" -- in
+    'string' according to 'vars' (a dictionary mapping variable names to
+    values).  Variables not present in 'vars' are silently expanded to the
+    empty string.  The variable values in 'vars' should not contain further
+    variable expansions; if 'vars' is the output of 'parse_makefile()',
+    you're fine.  Returns a variable-expanded version of 's'.
+    """
+
+    # This algorithm does multiple expansion, so if vars['foo'] contains
+    # "${bar}", it will expand ${foo} to ${bar}, and then expand
+    # ${bar}... and so forth.  This is fine as long as 'vars' comes from
+    # 'parse_makefile()', which takes care of such expansions eagerly,
+    # according to make's variable expansion semantics.
+
+    while True:
+        m = _findvar1_rx.search(s) or _findvar2_rx.search(s)
+        if m:
+            (beg, end) = m.span()
+            s = s[0:beg] + vars.get(m.group(1)) + s[end:]
+        else:
+            break
+    return s
+
+
+_config_vars = None
+
+
+def get_config_vars(*args):
+    """With no arguments, return a dictionary of all configuration
+    variables relevant for the current platform.  Generally this includes
+    everything needed to build extensions and install both pure modules and
+    extensions.  On Unix, this means every variable defined in Python's
+    installed Makefile; on Windows it's a much smaller set.
+
+    With arguments, return a list of values that result from looking up
+    each argument in the configuration variable dictionary.
+    """
+    global _config_vars
+    if _config_vars is None:
+        _config_vars = sysconfig.get_config_vars().copy()
+        py39.add_ext_suffix(_config_vars)
+
+    return [_config_vars.get(name) for name in args] if args else _config_vars
+
+
+def get_config_var(name):
+    """Return the value of a single variable using the dictionary
+    returned by 'get_config_vars()'.  Equivalent to
+    get_config_vars().get(name)
+    """
+    if name == 'SO':
+        import warnings
+
+        warnings.warn('SO is deprecated, use EXT_SUFFIX', DeprecationWarning, 2)
+    return get_config_vars().get(name)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/text_file.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/text_file.py
new file mode 100644
index 0000000000000000000000000000000000000000..fec29c73b04d0e58956a410df8e4faba52234305
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/text_file.py
@@ -0,0 +1,286 @@
+"""text_file
+
+provides the TextFile class, which gives an interface to text files
+that (optionally) takes care of stripping comments, ignoring blank
+lines, and joining lines with backslashes."""
+
+import sys
+
+
+class TextFile:
+    """Provides a file-like object that takes care of all the things you
+    commonly want to do when processing a text file that has some
+    line-by-line syntax: strip comments (as long as "#" is your
+    comment character), skip blank lines, join adjacent lines by
+    escaping the newline (ie. backslash at end of line), strip
+    leading and/or trailing whitespace.  All of these are optional
+    and independently controllable.
+
+    Provides a 'warn()' method so you can generate warning messages that
+    report physical line number, even if the logical line in question
+    spans multiple physical lines.  Also provides 'unreadline()' for
+    implementing line-at-a-time lookahead.
+
+    Constructor is called as:
+
+        TextFile (filename=None, file=None, **options)
+
+    It bombs (RuntimeError) if both 'filename' and 'file' are None;
+    'filename' should be a string, and 'file' a file object (or
+    something that provides 'readline()' and 'close()' methods).  It is
+    recommended that you supply at least 'filename', so that TextFile
+    can include it in warning messages.  If 'file' is not supplied,
+    TextFile creates its own using 'io.open()'.
+
+    The options are all boolean, and affect the value returned by
+    'readline()':
+      strip_comments [default: true]
+        strip from "#" to end-of-line, as well as any whitespace
+        leading up to the "#" -- unless it is escaped by a backslash
+      lstrip_ws [default: false]
+        strip leading whitespace from each line before returning it
+      rstrip_ws [default: true]
+        strip trailing whitespace (including line terminator!) from
+        each line before returning it
+      skip_blanks [default: true}
+        skip lines that are empty *after* stripping comments and
+        whitespace.  (If both lstrip_ws and rstrip_ws are false,
+        then some lines may consist of solely whitespace: these will
+        *not* be skipped, even if 'skip_blanks' is true.)
+      join_lines [default: false]
+        if a backslash is the last non-newline character on a line
+        after stripping comments and whitespace, join the following line
+        to it to form one "logical line"; if N consecutive lines end
+        with a backslash, then N+1 physical lines will be joined to
+        form one logical line.
+      collapse_join [default: false]
+        strip leading whitespace from lines that are joined to their
+        predecessor; only matters if (join_lines and not lstrip_ws)
+      errors [default: 'strict']
+        error handler used to decode the file content
+
+    Note that since 'rstrip_ws' can strip the trailing newline, the
+    semantics of 'readline()' must differ from those of the builtin file
+    object's 'readline()' method!  In particular, 'readline()' returns
+    None for end-of-file: an empty string might just be a blank line (or
+    an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
+    not."""
+
+    default_options = {
+        'strip_comments': 1,
+        'skip_blanks': 1,
+        'lstrip_ws': 0,
+        'rstrip_ws': 1,
+        'join_lines': 0,
+        'collapse_join': 0,
+        'errors': 'strict',
+    }
+
+    def __init__(self, filename=None, file=None, **options):
+        """Construct a new TextFile object.  At least one of 'filename'
+        (a string) and 'file' (a file-like object) must be supplied.
+        They keyword argument options are described above and affect
+        the values returned by 'readline()'."""
+        if filename is None and file is None:
+            raise RuntimeError(
+                "you must supply either or both of 'filename' and 'file'"
+            )
+
+        # set values for all options -- either from client option hash
+        # or fallback to default_options
+        for opt in self.default_options.keys():
+            if opt in options:
+                setattr(self, opt, options[opt])
+            else:
+                setattr(self, opt, self.default_options[opt])
+
+        # sanity check client option hash
+        for opt in options.keys():
+            if opt not in self.default_options:
+                raise KeyError(f"invalid TextFile option '{opt}'")
+
+        if file is None:
+            self.open(filename)
+        else:
+            self.filename = filename
+            self.file = file
+            self.current_line = 0  # assuming that file is at BOF!
+
+        # 'linebuf' is a stack of lines that will be emptied before we
+        # actually read from the file; it's only populated by an
+        # 'unreadline()' operation
+        self.linebuf = []
+
+    def open(self, filename):
+        """Open a new file named 'filename'.  This overrides both the
+        'filename' and 'file' arguments to the constructor."""
+        self.filename = filename
+        self.file = open(self.filename, errors=self.errors, encoding='utf-8')
+        self.current_line = 0
+
+    def close(self):
+        """Close the current file and forget everything we know about it
+        (filename, current line number)."""
+        file = self.file
+        self.file = None
+        self.filename = None
+        self.current_line = None
+        file.close()
+
+    def gen_error(self, msg, line=None):
+        outmsg = []
+        if line is None:
+            line = self.current_line
+        outmsg.append(self.filename + ", ")
+        if isinstance(line, (list, tuple)):
+            outmsg.append("lines %d-%d: " % tuple(line))
+        else:
+            outmsg.append("line %d: " % line)
+        outmsg.append(str(msg))
+        return "".join(outmsg)
+
+    def error(self, msg, line=None):
+        raise ValueError("error: " + self.gen_error(msg, line))
+
+    def warn(self, msg, line=None):
+        """Print (to stderr) a warning message tied to the current logical
+        line in the current file.  If the current logical line in the
+        file spans multiple physical lines, the warning refers to the
+        whole range, eg. "lines 3-5".  If 'line' supplied, it overrides
+        the current line number; it may be a list or tuple to indicate a
+        range of physical lines, or an integer for a single physical
+        line."""
+        sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
+
+    def readline(self):  # noqa: C901
+        """Read and return a single logical line from the current file (or
+        from an internal buffer if lines have previously been "unread"
+        with 'unreadline()').  If the 'join_lines' option is true, this
+        may involve reading multiple physical lines concatenated into a
+        single string.  Updates the current line number, so calling
+        'warn()' after 'readline()' emits a warning about the physical
+        line(s) just read.  Returns None on end-of-file, since the empty
+        string can occur if 'rstrip_ws' is true but 'strip_blanks' is
+        not."""
+        # If any "unread" lines waiting in 'linebuf', return the top
+        # one.  (We don't actually buffer read-ahead data -- lines only
+        # get put in 'linebuf' if the client explicitly does an
+        # 'unreadline()'.
+        if self.linebuf:
+            line = self.linebuf[-1]
+            del self.linebuf[-1]
+            return line
+
+        buildup_line = ''
+
+        while True:
+            # read the line, make it None if EOF
+            line = self.file.readline()
+            if line == '':
+                line = None
+
+            if self.strip_comments and line:
+                # Look for the first "#" in the line.  If none, never
+                # mind.  If we find one and it's the first character, or
+                # is not preceded by "\", then it starts a comment --
+                # strip the comment, strip whitespace before it, and
+                # carry on.  Otherwise, it's just an escaped "#", so
+                # unescape it (and any other escaped "#"'s that might be
+                # lurking in there) and otherwise leave the line alone.
+
+                pos = line.find("#")
+                if pos == -1:  # no "#" -- no comments
+                    pass
+
+                # It's definitely a comment -- either "#" is the first
+                # character, or it's elsewhere and unescaped.
+                elif pos == 0 or line[pos - 1] != "\\":
+                    # Have to preserve the trailing newline, because it's
+                    # the job of a later step (rstrip_ws) to remove it --
+                    # and if rstrip_ws is false, we'd better preserve it!
+                    # (NB. this means that if the final line is all comment
+                    # and has no trailing newline, we will think that it's
+                    # EOF; I think that's OK.)
+                    eol = (line[-1] == '\n') and '\n' or ''
+                    line = line[0:pos] + eol
+
+                    # If all that's left is whitespace, then skip line
+                    # *now*, before we try to join it to 'buildup_line' --
+                    # that way constructs like
+                    #   hello \\
+                    #   # comment that should be ignored
+                    #   there
+                    # result in "hello there".
+                    if line.strip() == "":
+                        continue
+                else:  # it's an escaped "#"
+                    line = line.replace("\\#", "#")
+
+            # did previous line end with a backslash? then accumulate
+            if self.join_lines and buildup_line:
+                # oops: end of file
+                if line is None:
+                    self.warn("continuation line immediately precedes end-of-file")
+                    return buildup_line
+
+                if self.collapse_join:
+                    line = line.lstrip()
+                line = buildup_line + line
+
+                # careful: pay attention to line number when incrementing it
+                if isinstance(self.current_line, list):
+                    self.current_line[1] = self.current_line[1] + 1
+                else:
+                    self.current_line = [self.current_line, self.current_line + 1]
+            # just an ordinary line, read it as usual
+            else:
+                if line is None:  # eof
+                    return None
+
+                # still have to be careful about incrementing the line number!
+                if isinstance(self.current_line, list):
+                    self.current_line = self.current_line[1] + 1
+                else:
+                    self.current_line = self.current_line + 1
+
+            # strip whitespace however the client wants (leading and
+            # trailing, or one or the other, or neither)
+            if self.lstrip_ws and self.rstrip_ws:
+                line = line.strip()
+            elif self.lstrip_ws:
+                line = line.lstrip()
+            elif self.rstrip_ws:
+                line = line.rstrip()
+
+            # blank line (whether we rstrip'ed or not)? skip to next line
+            # if appropriate
+            if line in ('', '\n') and self.skip_blanks:
+                continue
+
+            if self.join_lines:
+                if line[-1] == '\\':
+                    buildup_line = line[:-1]
+                    continue
+
+                if line[-2:] == '\\\n':
+                    buildup_line = line[0:-2] + '\n'
+                    continue
+
+            # well, I guess there's some actual content there: return it
+            return line
+
+    def readlines(self):
+        """Read and return the list of all logical lines remaining in the
+        current file."""
+        lines = []
+        while True:
+            line = self.readline()
+            if line is None:
+                return lines
+            lines.append(line)
+
+    def unreadline(self, line):
+        """Push 'line' (a string) onto an internal buffer that will be
+        checked by future 'readline()' calls.  Handy for implementing
+        a parser with line-at-a-time lookahead."""
+        self.linebuf.append(line)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/unixccompiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/unixccompiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e68596b26c496227e258e48bc06d8aad0516be4
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/unixccompiler.py
@@ -0,0 +1,403 @@
+"""distutils.unixccompiler
+
+Contains the UnixCCompiler class, a subclass of CCompiler that handles
+the "typical" Unix-style command-line C compiler:
+  * macros defined with -Dname[=value]
+  * macros undefined with -Uname
+  * include search directories specified with -Idir
+  * libraries specified with -lllib
+  * library search directories specified with -Ldir
+  * compile handled by 'cc' (or similar) executable with -c option:
+    compiles .c to .o
+  * link static library handled by 'ar' command (possibly with 'ranlib')
+  * link shared library handled by 'cc -shared'
+"""
+
+from __future__ import annotations
+
+import itertools
+import os
+import re
+import shlex
+import sys
+
+from . import sysconfig
+from ._log import log
+from ._macos_compat import compiler_fixup
+from ._modified import newer
+from .ccompiler import CCompiler, gen_lib_options, gen_preprocess_options
+from .compat import consolidate_linker_args
+from .errors import CompileError, DistutilsExecError, LibError, LinkError
+
+# XXX Things not currently handled:
+#   * optimization/debug/warning flags; we just use whatever's in Python's
+#     Makefile and live with it.  Is this adequate?  If not, we might
+#     have to have a bunch of subclasses GNUCCompiler, SGICCompiler,
+#     SunCCompiler, and I suspect down that road lies madness.
+#   * even if we don't know a warning flag from an optimization flag,
+#     we need some way for outsiders to feed preprocessor/compiler/linker
+#     flags in to us -- eg. a sysadmin might want to mandate certain flags
+#     via a site config file, or a user might want to set something for
+#     compiling this module distribution only via the setup.py command
+#     line, whatever.  As long as these options come from something on the
+#     current system, they can be as system-dependent as they like, and we
+#     should just happily stuff them into the preprocessor/compiler/linker
+#     options and carry on.
+
+
+def _split_env(cmd):
+    """
+    For macOS, split command into 'env' portion (if any)
+    and the rest of the linker command.
+
+    >>> _split_env(['a', 'b', 'c'])
+    ([], ['a', 'b', 'c'])
+    >>> _split_env(['/usr/bin/env', 'A=3', 'gcc'])
+    (['/usr/bin/env', 'A=3'], ['gcc'])
+    """
+    pivot = 0
+    if os.path.basename(cmd[0]) == "env":
+        pivot = 1
+        while '=' in cmd[pivot]:
+            pivot += 1
+    return cmd[:pivot], cmd[pivot:]
+
+
+def _split_aix(cmd):
+    """
+    AIX platforms prefix the compiler with the ld_so_aix
+    script, so split that from the linker command.
+
+    >>> _split_aix(['a', 'b', 'c'])
+    ([], ['a', 'b', 'c'])
+    >>> _split_aix(['/bin/foo/ld_so_aix', 'gcc'])
+    (['/bin/foo/ld_so_aix'], ['gcc'])
+    """
+    pivot = os.path.basename(cmd[0]) == 'ld_so_aix'
+    return cmd[:pivot], cmd[pivot:]
+
+
+def _linker_params(linker_cmd, compiler_cmd):
+    """
+    The linker command usually begins with the compiler
+    command (possibly multiple elements), followed by zero or more
+    params for shared library building.
+
+    If the LDSHARED env variable overrides the linker command,
+    however, the commands may not match.
+
+    Return the best guess of the linker parameters by stripping
+    the linker command. If the compiler command does not
+    match the linker command, assume the linker command is
+    just the first element.
+
+    >>> _linker_params('gcc foo bar'.split(), ['gcc'])
+    ['foo', 'bar']
+    >>> _linker_params('gcc foo bar'.split(), ['other'])
+    ['foo', 'bar']
+    >>> _linker_params('ccache gcc foo bar'.split(), 'ccache gcc'.split())
+    ['foo', 'bar']
+    >>> _linker_params(['gcc'], ['gcc'])
+    []
+    """
+    c_len = len(compiler_cmd)
+    pivot = c_len if linker_cmd[:c_len] == compiler_cmd else 1
+    return linker_cmd[pivot:]
+
+
+class UnixCCompiler(CCompiler):
+    compiler_type = 'unix'
+
+    # These are used by CCompiler in two places: the constructor sets
+    # instance attributes 'preprocessor', 'compiler', etc. from them, and
+    # 'set_executable()' allows any of these to be set.  The defaults here
+    # are pretty generic; they will probably have to be set by an outsider
+    # (eg. using information discovered by the sysconfig about building
+    # Python extensions).
+    executables = {
+        'preprocessor': None,
+        'compiler': ["cc"],
+        'compiler_so': ["cc"],
+        'compiler_cxx': ["cc"],
+        'linker_so': ["cc", "-shared"],
+        'linker_exe': ["cc"],
+        'archiver': ["ar", "-cr"],
+        'ranlib': None,
+    }
+
+    if sys.platform[:6] == "darwin":
+        executables['ranlib'] = ["ranlib"]
+
+    # Needed for the filename generation methods provided by the base
+    # class, CCompiler.  NB. whoever instantiates/uses a particular
+    # UnixCCompiler instance should set 'shared_lib_ext' -- we set a
+    # reasonable common default here, but it's not necessarily used on all
+    # Unices!
+
+    src_extensions = [".c", ".C", ".cc", ".cxx", ".cpp", ".m"]
+    obj_extension = ".o"
+    static_lib_extension = ".a"
+    shared_lib_extension = ".so"
+    dylib_lib_extension = ".dylib"
+    xcode_stub_lib_extension = ".tbd"
+    static_lib_format = shared_lib_format = dylib_lib_format = "lib%s%s"
+    xcode_stub_lib_format = dylib_lib_format
+    if sys.platform == "cygwin":
+        exe_extension = ".exe"
+        shared_lib_extension = ".dll.a"
+        dylib_lib_extension = ".dll"
+        dylib_lib_format = "cyg%s%s"
+
+    def preprocess(
+        self,
+        source,
+        output_file=None,
+        macros=None,
+        include_dirs=None,
+        extra_preargs=None,
+        extra_postargs=None,
+    ):
+        fixed_args = self._fix_compile_args(None, macros, include_dirs)
+        ignore, macros, include_dirs = fixed_args
+        pp_opts = gen_preprocess_options(macros, include_dirs)
+        pp_args = self.preprocessor + pp_opts
+        if output_file:
+            pp_args.extend(['-o', output_file])
+        if extra_preargs:
+            pp_args[:0] = extra_preargs
+        if extra_postargs:
+            pp_args.extend(extra_postargs)
+        pp_args.append(source)
+
+        # reasons to preprocess:
+        # - force is indicated
+        # - output is directed to stdout
+        # - source file is newer than the target
+        preprocess = self.force or output_file is None or newer(source, output_file)
+        if not preprocess:
+            return
+
+        if output_file:
+            self.mkpath(os.path.dirname(output_file))
+
+        try:
+            self.spawn(pp_args)
+        except DistutilsExecError as msg:
+            raise CompileError(msg)
+
+    def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
+        compiler_so = compiler_fixup(self.compiler_so, cc_args + extra_postargs)
+        try:
+            self.spawn(compiler_so + cc_args + [src, '-o', obj] + extra_postargs)
+        except DistutilsExecError as msg:
+            raise CompileError(msg)
+
+    def create_static_lib(
+        self, objects, output_libname, output_dir=None, debug=False, target_lang=None
+    ):
+        objects, output_dir = self._fix_object_args(objects, output_dir)
+
+        output_filename = self.library_filename(output_libname, output_dir=output_dir)
+
+        if self._need_link(objects, output_filename):
+            self.mkpath(os.path.dirname(output_filename))
+            self.spawn(self.archiver + [output_filename] + objects + self.objects)
+
+            # Not many Unices required ranlib anymore -- SunOS 4.x is, I
+            # think the only major Unix that does.  Maybe we need some
+            # platform intelligence here to skip ranlib if it's not
+            # needed -- or maybe Python's configure script took care of
+            # it for us, hence the check for leading colon.
+            if self.ranlib:
+                try:
+                    self.spawn(self.ranlib + [output_filename])
+                except DistutilsExecError as msg:
+                    raise LibError(msg)
+        else:
+            log.debug("skipping %s (up-to-date)", output_filename)
+
+    def link(
+        self,
+        target_desc,
+        objects,
+        output_filename,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        objects, output_dir = self._fix_object_args(objects, output_dir)
+        fixed_args = self._fix_lib_args(libraries, library_dirs, runtime_library_dirs)
+        libraries, library_dirs, runtime_library_dirs = fixed_args
+
+        lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries)
+        if not isinstance(output_dir, (str, type(None))):
+            raise TypeError("'output_dir' must be a string or None")
+        if output_dir is not None:
+            output_filename = os.path.join(output_dir, output_filename)
+
+        if self._need_link(objects, output_filename):
+            ld_args = objects + self.objects + lib_opts + ['-o', output_filename]
+            if debug:
+                ld_args[:0] = ['-g']
+            if extra_preargs:
+                ld_args[:0] = extra_preargs
+            if extra_postargs:
+                ld_args.extend(extra_postargs)
+            self.mkpath(os.path.dirname(output_filename))
+            try:
+                # Select a linker based on context: linker_exe when
+                # building an executable or linker_so (with shared options)
+                # when building a shared library.
+                building_exe = target_desc == CCompiler.EXECUTABLE
+                linker = (self.linker_exe if building_exe else self.linker_so)[:]
+
+                if target_lang == "c++" and self.compiler_cxx:
+                    env, linker_ne = _split_env(linker)
+                    aix, linker_na = _split_aix(linker_ne)
+                    _, compiler_cxx_ne = _split_env(self.compiler_cxx)
+                    _, linker_exe_ne = _split_env(self.linker_exe)
+
+                    params = _linker_params(linker_na, linker_exe_ne)
+                    linker = env + aix + compiler_cxx_ne + params
+
+                linker = compiler_fixup(linker, ld_args)
+
+                self.spawn(linker + ld_args)
+            except DistutilsExecError as msg:
+                raise LinkError(msg)
+        else:
+            log.debug("skipping %s (up-to-date)", output_filename)
+
+    # -- Miscellaneous methods -----------------------------------------
+    # These are all used by the 'gen_lib_options() function, in
+    # ccompiler.py.
+
+    def library_dir_option(self, dir):
+        return "-L" + dir
+
+    def _is_gcc(self):
+        cc_var = sysconfig.get_config_var("CC")
+        compiler = os.path.basename(shlex.split(cc_var)[0])
+        return "gcc" in compiler or "g++" in compiler
+
+    def runtime_library_dir_option(self, dir: str) -> str | list[str]:
+        # XXX Hackish, at the very least.  See Python bug #445902:
+        # https://bugs.python.org/issue445902
+        # Linkers on different platforms need different options to
+        # specify that directories need to be added to the list of
+        # directories searched for dependencies when a dynamic library
+        # is sought.  GCC on GNU systems (Linux, FreeBSD, ...) has to
+        # be told to pass the -R option through to the linker, whereas
+        # other compilers and gcc on other systems just know this.
+        # Other compilers may need something slightly different.  At
+        # this time, there's no way to determine this information from
+        # the configuration data stored in the Python installation, so
+        # we use this hack.
+        if sys.platform[:6] == "darwin":
+            from distutils.util import get_macosx_target_ver, split_version
+
+            macosx_target_ver = get_macosx_target_ver()
+            if macosx_target_ver and split_version(macosx_target_ver) >= [10, 5]:
+                return "-Wl,-rpath," + dir
+            else:  # no support for -rpath on earlier macOS versions
+                return "-L" + dir
+        elif sys.platform[:7] == "freebsd":
+            return "-Wl,-rpath=" + dir
+        elif sys.platform[:5] == "hp-ux":
+            return [
+                "-Wl,+s" if self._is_gcc() else "+s",
+                "-L" + dir,
+            ]
+
+        # For all compilers, `-Wl` is the presumed way to pass a
+        # compiler option to the linker
+        if sysconfig.get_config_var("GNULD") == "yes":
+            return consolidate_linker_args([
+                # Force RUNPATH instead of RPATH
+                "-Wl,--enable-new-dtags",
+                "-Wl,-rpath," + dir,
+            ])
+        else:
+            return "-Wl,-R" + dir
+
+    def library_option(self, lib):
+        return "-l" + lib
+
+    @staticmethod
+    def _library_root(dir):
+        """
+        macOS users can specify an alternate SDK using'-isysroot'.
+        Calculate the SDK root if it is specified.
+
+        Note that, as of Xcode 7, Apple SDKs may contain textual stub
+        libraries with .tbd extensions rather than the normal .dylib
+        shared libraries installed in /.  The Apple compiler tool
+        chain handles this transparently but it can cause problems
+        for programs that are being built with an SDK and searching
+        for specific libraries.  Callers of find_library_file need to
+        keep in mind that the base filename of the returned SDK library
+        file might have a different extension from that of the library
+        file installed on the running system, for example:
+          /Applications/Xcode.app/Contents/Developer/Platforms/
+              MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/
+              usr/lib/libedit.tbd
+        vs
+          /usr/lib/libedit.dylib
+        """
+        cflags = sysconfig.get_config_var('CFLAGS')
+        match = re.search(r'-isysroot\s*(\S+)', cflags)
+
+        apply_root = (
+            sys.platform == 'darwin'
+            and match
+            and (
+                dir.startswith('/System/')
+                or (dir.startswith('/usr/') and not dir.startswith('/usr/local/'))
+            )
+        )
+
+        return os.path.join(match.group(1), dir[1:]) if apply_root else dir
+
+    def find_library_file(self, dirs, lib, debug=False):
+        r"""
+        Second-guess the linker with not much hard
+        data to go on: GCC seems to prefer the shared library, so
+        assume that *all* Unix C compilers do,
+        ignoring even GCC's "-static" option.
+
+        >>> compiler = UnixCCompiler()
+        >>> compiler._library_root = lambda dir: dir
+        >>> monkeypatch = getfixture('monkeypatch')
+        >>> monkeypatch.setattr(os.path, 'exists', lambda d: 'existing' in d)
+        >>> dirs = ('/foo/bar/missing', '/foo/bar/existing')
+        >>> compiler.find_library_file(dirs, 'abc').replace('\\', '/')
+        '/foo/bar/existing/libabc.dylib'
+        >>> compiler.find_library_file(reversed(dirs), 'abc').replace('\\', '/')
+        '/foo/bar/existing/libabc.dylib'
+        >>> monkeypatch.setattr(os.path, 'exists',
+        ...     lambda d: 'existing' in d and '.a' in d)
+        >>> compiler.find_library_file(dirs, 'abc').replace('\\', '/')
+        '/foo/bar/existing/libabc.a'
+        >>> compiler.find_library_file(reversed(dirs), 'abc').replace('\\', '/')
+        '/foo/bar/existing/libabc.a'
+        """
+        lib_names = (
+            self.library_filename(lib, lib_type=type)
+            for type in 'dylib xcode_stub shared static'.split()
+        )
+
+        roots = map(self._library_root, dirs)
+
+        searched = itertools.starmap(os.path.join, itertools.product(roots, lib_names))
+
+        found = filter(os.path.exists, searched)
+
+        # Return None if it could not be found in any dir.
+        return next(found, None)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..9db89b097913ec00f56d09bbcca3b5ebc2912995
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/util.py
@@ -0,0 +1,510 @@
+"""distutils.util
+
+Miscellaneous utility functions -- anything that doesn't fit into
+one of the other *util.py modules.
+"""
+
+import functools
+import importlib.util
+import os
+import re
+import string
+import subprocess
+import sys
+import sysconfig
+import tempfile
+
+from ._log import log
+from ._modified import newer
+from .errors import DistutilsByteCompileError, DistutilsPlatformError
+from .spawn import spawn
+
+
+def get_host_platform():
+    """
+    Return a string that identifies the current platform. Use this
+    function to distinguish platform-specific build directories and
+    platform-specific built distributions.
+    """
+
+    # This function initially exposed platforms as defined in Python 3.9
+    # even with older Python versions when distutils was split out.
+    # Now it delegates to stdlib sysconfig, but maintains compatibility.
+
+    if sys.version_info < (3, 9):
+        if os.name == "posix" and hasattr(os, 'uname'):
+            osname, host, release, version, machine = os.uname()
+            if osname[:3] == "aix":
+                from .compat.py38 import aix_platform
+
+                return aix_platform(osname, version, release)
+
+    return sysconfig.get_platform()
+
+
+def get_platform():
+    if os.name == 'nt':
+        TARGET_TO_PLAT = {
+            'x86': 'win32',
+            'x64': 'win-amd64',
+            'arm': 'win-arm32',
+            'arm64': 'win-arm64',
+        }
+        target = os.environ.get('VSCMD_ARG_TGT_ARCH')
+        return TARGET_TO_PLAT.get(target) or get_host_platform()
+    return get_host_platform()
+
+
+if sys.platform == 'darwin':
+    _syscfg_macosx_ver = None  # cache the version pulled from sysconfig
+MACOSX_VERSION_VAR = 'MACOSX_DEPLOYMENT_TARGET'
+
+
+def _clear_cached_macosx_ver():
+    """For testing only. Do not call."""
+    global _syscfg_macosx_ver
+    _syscfg_macosx_ver = None
+
+
+def get_macosx_target_ver_from_syscfg():
+    """Get the version of macOS latched in the Python interpreter configuration.
+    Returns the version as a string or None if can't obtain one. Cached."""
+    global _syscfg_macosx_ver
+    if _syscfg_macosx_ver is None:
+        from distutils import sysconfig
+
+        ver = sysconfig.get_config_var(MACOSX_VERSION_VAR) or ''
+        if ver:
+            _syscfg_macosx_ver = ver
+    return _syscfg_macosx_ver
+
+
+def get_macosx_target_ver():
+    """Return the version of macOS for which we are building.
+
+    The target version defaults to the version in sysconfig latched at time
+    the Python interpreter was built, unless overridden by an environment
+    variable. If neither source has a value, then None is returned"""
+
+    syscfg_ver = get_macosx_target_ver_from_syscfg()
+    env_ver = os.environ.get(MACOSX_VERSION_VAR)
+
+    if env_ver:
+        # Validate overridden version against sysconfig version, if have both.
+        # Ensure that the deployment target of the build process is not less
+        # than 10.3 if the interpreter was built for 10.3 or later.  This
+        # ensures extension modules are built with correct compatibility
+        # values, specifically LDSHARED which can use
+        # '-undefined dynamic_lookup' which only works on >= 10.3.
+        if (
+            syscfg_ver
+            and split_version(syscfg_ver) >= [10, 3]
+            and split_version(env_ver) < [10, 3]
+        ):
+            my_msg = (
+                '$' + MACOSX_VERSION_VAR + ' mismatch: '
+                f'now "{env_ver}" but "{syscfg_ver}" during configure; '
+                'must use 10.3 or later'
+            )
+            raise DistutilsPlatformError(my_msg)
+        return env_ver
+    return syscfg_ver
+
+
+def split_version(s):
+    """Convert a dot-separated string into a list of numbers for comparisons"""
+    return [int(n) for n in s.split('.')]
+
+
+def convert_path(pathname):
+    """Return 'pathname' as a name that will work on the native filesystem,
+    i.e. split it on '/' and put it back together again using the current
+    directory separator.  Needed because filenames in the setup script are
+    always supplied in Unix style, and have to be converted to the local
+    convention before we can actually use them in the filesystem.  Raises
+    ValueError on non-Unix-ish systems if 'pathname' either starts or
+    ends with a slash.
+    """
+    if os.sep == '/':
+        return pathname
+    if not pathname:
+        return pathname
+    if pathname[0] == '/':
+        raise ValueError(f"path '{pathname}' cannot be absolute")
+    if pathname[-1] == '/':
+        raise ValueError(f"path '{pathname}' cannot end with '/'")
+
+    paths = pathname.split('/')
+    while '.' in paths:
+        paths.remove('.')
+    if not paths:
+        return os.curdir
+    return os.path.join(*paths)
+
+
+# convert_path ()
+
+
+def change_root(new_root, pathname):
+    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
+    relative, this is equivalent to "os.path.join(new_root,pathname)".
+    Otherwise, it requires making 'pathname' relative and then joining the
+    two, which is tricky on DOS/Windows and Mac OS.
+    """
+    if os.name == 'posix':
+        if not os.path.isabs(pathname):
+            return os.path.join(new_root, pathname)
+        else:
+            return os.path.join(new_root, pathname[1:])
+
+    elif os.name == 'nt':
+        (drive, path) = os.path.splitdrive(pathname)
+        if path[0] == os.sep:
+            path = path[1:]
+        return os.path.join(new_root, path)
+
+    raise DistutilsPlatformError(f"nothing known about platform '{os.name}'")
+
+
+@functools.lru_cache
+def check_environ():
+    """Ensure that 'os.environ' has all the environment variables we
+    guarantee that users can use in config files, command-line options,
+    etc.  Currently this includes:
+      HOME - user's home directory (Unix only)
+      PLAT - description of the current platform, including hardware
+             and OS (see 'get_platform()')
+    """
+    if os.name == 'posix' and 'HOME' not in os.environ:
+        try:
+            import pwd
+
+            os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
+        except (ImportError, KeyError):
+            # bpo-10496: if the current user identifier doesn't exist in the
+            # password database, do nothing
+            pass
+
+    if 'PLAT' not in os.environ:
+        os.environ['PLAT'] = get_platform()
+
+
+def subst_vars(s, local_vars):
+    """
+    Perform variable substitution on 'string'.
+    Variables are indicated by format-style braces ("{var}").
+    Variable is substituted by the value found in the 'local_vars'
+    dictionary or in 'os.environ' if it's not in 'local_vars'.
+    'os.environ' is first checked/augmented to guarantee that it contains
+    certain values: see 'check_environ()'.  Raise ValueError for any
+    variables not found in either 'local_vars' or 'os.environ'.
+    """
+    check_environ()
+    lookup = dict(os.environ)
+    lookup.update((name, str(value)) for name, value in local_vars.items())
+    try:
+        return _subst_compat(s).format_map(lookup)
+    except KeyError as var:
+        raise ValueError(f"invalid variable {var}")
+
+
+def _subst_compat(s):
+    """
+    Replace shell/Perl-style variable substitution with
+    format-style. For compatibility.
+    """
+
+    def _subst(match):
+        return f'{{{match.group(1)}}}'
+
+    repl = re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
+    if repl != s:
+        import warnings
+
+        warnings.warn(
+            "shell/Perl-style substitutions are deprecated",
+            DeprecationWarning,
+        )
+    return repl
+
+
+def grok_environment_error(exc, prefix="error: "):
+    # Function kept for backward compatibility.
+    # Used to try clever things with EnvironmentErrors,
+    # but nowadays str(exception) produces good messages.
+    return prefix + str(exc)
+
+
+# Needed by 'split_quoted()'
+_wordchars_re = _squote_re = _dquote_re = None
+
+
+def _init_regex():
+    global _wordchars_re, _squote_re, _dquote_re
+    _wordchars_re = re.compile(rf'[^\\\'\"{string.whitespace} ]*')
+    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
+    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
+
+
+def split_quoted(s):
+    """Split a string up according to Unix shell-like rules for quotes and
+    backslashes.  In short: words are delimited by spaces, as long as those
+    spaces are not escaped by a backslash, or inside a quoted string.
+    Single and double quotes are equivalent, and the quote characters can
+    be backslash-escaped.  The backslash is stripped from any two-character
+    escape sequence, leaving only the escaped character.  The quote
+    characters are stripped from any quoted string.  Returns a list of
+    words.
+    """
+
+    # This is a nice algorithm for splitting up a single string, since it
+    # doesn't require character-by-character examination.  It was a little
+    # bit of a brain-bender to get it working right, though...
+    if _wordchars_re is None:
+        _init_regex()
+
+    s = s.strip()
+    words = []
+    pos = 0
+
+    while s:
+        m = _wordchars_re.match(s, pos)
+        end = m.end()
+        if end == len(s):
+            words.append(s[:end])
+            break
+
+        if s[end] in string.whitespace:
+            # unescaped, unquoted whitespace: now
+            # we definitely have a word delimiter
+            words.append(s[:end])
+            s = s[end:].lstrip()
+            pos = 0
+
+        elif s[end] == '\\':
+            # preserve whatever is being escaped;
+            # will become part of the current word
+            s = s[:end] + s[end + 1 :]
+            pos = end + 1
+
+        else:
+            if s[end] == "'":  # slurp singly-quoted string
+                m = _squote_re.match(s, end)
+            elif s[end] == '"':  # slurp doubly-quoted string
+                m = _dquote_re.match(s, end)
+            else:
+                raise RuntimeError("this can't happen (bad char '%c')" % s[end])
+
+            if m is None:
+                raise ValueError(f"bad string (mismatched {s[end]} quotes?)")
+
+            (beg, end) = m.span()
+            s = s[:beg] + s[beg + 1 : end - 1] + s[end:]
+            pos = m.end() - 2
+
+        if pos >= len(s):
+            words.append(s)
+            break
+
+    return words
+
+
+# split_quoted ()
+
+
+def execute(func, args, msg=None, verbose=False, dry_run=False):
+    """Perform some action that affects the outside world (eg.  by
+    writing to the filesystem).  Such actions are special because they
+    are disabled by the 'dry_run' flag.  This method takes care of all
+    that bureaucracy for you; all you have to do is supply the
+    function to call and an argument tuple for it (to embody the
+    "external action" being performed), and an optional message to
+    print.
+    """
+    if msg is None:
+        msg = f"{func.__name__}{args!r}"
+        if msg[-2:] == ',)':  # correct for singleton tuple
+            msg = msg[0:-2] + ')'
+
+    log.info(msg)
+    if not dry_run:
+        func(*args)
+
+
+def strtobool(val):
+    """Convert a string representation of truth to true (1) or false (0).
+
+    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
+    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
+    'val' is anything else.
+    """
+    val = val.lower()
+    if val in ('y', 'yes', 't', 'true', 'on', '1'):
+        return 1
+    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
+        return 0
+    else:
+        raise ValueError(f"invalid truth value {val!r}")
+
+
+def byte_compile(  # noqa: C901
+    py_files,
+    optimize=0,
+    force=False,
+    prefix=None,
+    base_dir=None,
+    verbose=True,
+    dry_run=False,
+    direct=None,
+):
+    """Byte-compile a collection of Python source files to .pyc
+    files in a __pycache__ subdirectory.  'py_files' is a list
+    of files to compile; any files that don't end in ".py" are silently
+    skipped.  'optimize' must be one of the following:
+      0 - don't optimize
+      1 - normal optimization (like "python -O")
+      2 - extra optimization (like "python -OO")
+    If 'force' is true, all files are recompiled regardless of
+    timestamps.
+
+    The source filename encoded in each bytecode file defaults to the
+    filenames listed in 'py_files'; you can modify these with 'prefix' and
+    'basedir'.  'prefix' is a string that will be stripped off of each
+    source filename, and 'base_dir' is a directory name that will be
+    prepended (after 'prefix' is stripped).  You can supply either or both
+    (or neither) of 'prefix' and 'base_dir', as you wish.
+
+    If 'dry_run' is true, doesn't actually do anything that would
+    affect the filesystem.
+
+    Byte-compilation is either done directly in this interpreter process
+    with the standard py_compile module, or indirectly by writing a
+    temporary script and executing it.  Normally, you should let
+    'byte_compile()' figure out to use direct compilation or not (see
+    the source for details).  The 'direct' flag is used by the script
+    generated in indirect mode; unless you know what you're doing, leave
+    it set to None.
+    """
+
+    # nothing is done if sys.dont_write_bytecode is True
+    if sys.dont_write_bytecode:
+        raise DistutilsByteCompileError('byte-compiling is disabled.')
+
+    # First, if the caller didn't force us into direct or indirect mode,
+    # figure out which mode we should be in.  We take a conservative
+    # approach: choose direct mode *only* if the current interpreter is
+    # in debug mode and optimize is 0.  If we're not in debug mode (-O
+    # or -OO), we don't know which level of optimization this
+    # interpreter is running with, so we can't do direct
+    # byte-compilation and be certain that it's the right thing.  Thus,
+    # always compile indirectly if the current interpreter is in either
+    # optimize mode, or if either optimization level was requested by
+    # the caller.
+    if direct is None:
+        direct = __debug__ and optimize == 0
+
+    # "Indirect" byte-compilation: write a temporary script and then
+    # run it with the appropriate flags.
+    if not direct:
+        (script_fd, script_name) = tempfile.mkstemp(".py")
+        log.info("writing byte-compilation script '%s'", script_name)
+        if not dry_run:
+            script = os.fdopen(script_fd, "w", encoding='utf-8')
+
+            with script:
+                script.write(
+                    """\
+from distutils.util import byte_compile
+files = [
+"""
+                )
+
+                # XXX would be nice to write absolute filenames, just for
+                # safety's sake (script should be more robust in the face of
+                # chdir'ing before running it).  But this requires abspath'ing
+                # 'prefix' as well, and that breaks the hack in build_lib's
+                # 'byte_compile()' method that carefully tacks on a trailing
+                # slash (os.sep really) to make sure the prefix here is "just
+                # right".  This whole prefix business is rather delicate -- the
+                # problem is that it's really a directory, but I'm treating it
+                # as a dumb string, so trailing slashes and so forth matter.
+
+                script.write(",\n".join(map(repr, py_files)) + "]\n")
+                script.write(
+                    f"""
+byte_compile(files, optimize={optimize!r}, force={force!r},
+             prefix={prefix!r}, base_dir={base_dir!r},
+             verbose={verbose!r}, dry_run=False,
+             direct=True)
+"""
+                )
+
+        cmd = [sys.executable]
+        cmd.extend(subprocess._optim_args_from_interpreter_flags())
+        cmd.append(script_name)
+        spawn(cmd, dry_run=dry_run)
+        execute(os.remove, (script_name,), f"removing {script_name}", dry_run=dry_run)
+
+    # "Direct" byte-compilation: use the py_compile module to compile
+    # right here, right now.  Note that the script generated in indirect
+    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
+    # cross-process recursion.  Hey, it works!
+    else:
+        from py_compile import compile
+
+        for file in py_files:
+            if file[-3:] != ".py":
+                # This lets us be lazy and not filter filenames in
+                # the "install_lib" command.
+                continue
+
+            # Terminology from the py_compile module:
+            #   cfile - byte-compiled file
+            #   dfile - purported source filename (same as 'file' by default)
+            if optimize >= 0:
+                opt = '' if optimize == 0 else optimize
+                cfile = importlib.util.cache_from_source(file, optimization=opt)
+            else:
+                cfile = importlib.util.cache_from_source(file)
+            dfile = file
+            if prefix:
+                if file[: len(prefix)] != prefix:
+                    raise ValueError(
+                        f"invalid prefix: filename {file!r} doesn't start with {prefix!r}"
+                    )
+                dfile = dfile[len(prefix) :]
+            if base_dir:
+                dfile = os.path.join(base_dir, dfile)
+
+            cfile_base = os.path.basename(cfile)
+            if direct:
+                if force or newer(file, cfile):
+                    log.info("byte-compiling %s to %s", file, cfile_base)
+                    if not dry_run:
+                        compile(file, cfile, dfile)
+                else:
+                    log.debug("skipping byte-compilation of %s to %s", file, cfile_base)
+
+
+def rfc822_escape(header):
+    """Return a version of the string escaped for inclusion in an
+    RFC-822 header, by ensuring there are 8 spaces space after each newline.
+    """
+    indent = 8 * " "
+    lines = header.splitlines(keepends=True)
+
+    # Emulate the behaviour of `str.split`
+    # (the terminal line break in `splitlines` does not result in an extra line):
+    ends_in_newline = lines and lines[-1].splitlines()[0] != lines[-1]
+    suffix = indent if ends_in_newline else ""
+
+    return indent.join(lines) + suffix
+
+
+def is_mingw():
+    """Returns True if the current platform is mingw.
+
+    Python compiled with Mingw-w64 has sys.platform == 'win32' and
+    get_platform() starts with 'mingw'.
+    """
+    return sys.platform == 'win32' and get_platform().startswith('mingw')
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/version.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..942b56bf944827b9ed1d07f59a9322060ef79ee8
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/version.py
@@ -0,0 +1,349 @@
+#
+# distutils/version.py
+#
+# Implements multiple version numbering conventions for the
+# Python Module Distribution Utilities.
+#
+# $Id$
+#
+
+"""Provides classes to represent module version numbers (one class for
+each style of version numbering).  There are currently two such classes
+implemented: StrictVersion and LooseVersion.
+
+Every version number class implements the following interface:
+  * the 'parse' method takes a string and parses it to some internal
+    representation; if the string is an invalid version number,
+    'parse' raises a ValueError exception
+  * the class constructor takes an optional string argument which,
+    if supplied, is passed to 'parse'
+  * __str__ reconstructs the string that was passed to 'parse' (or
+    an equivalent string -- ie. one that will generate an equivalent
+    version number instance)
+  * __repr__ generates Python code to recreate the version number instance
+  * _cmp compares the current instance with either another instance
+    of the same class or a string (which will be parsed to an instance
+    of the same class, thus must follow the same rules)
+"""
+
+import contextlib
+import re
+import warnings
+
+
+@contextlib.contextmanager
+def suppress_known_deprecation():
+    with warnings.catch_warnings(record=True) as ctx:
+        warnings.filterwarnings(
+            action='default',
+            category=DeprecationWarning,
+            message="distutils Version classes are deprecated.",
+        )
+        yield ctx
+
+
+class Version:
+    """Abstract base class for version numbering classes.  Just provides
+    constructor (__init__) and reproducer (__repr__), because those
+    seem to be the same for all version numbering classes; and route
+    rich comparisons to _cmp.
+    """
+
+    def __init__(self, vstring=None):
+        if vstring:
+            self.parse(vstring)
+        warnings.warn(
+            "distutils Version classes are deprecated. "
+            "Use packaging.version instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+    def __repr__(self):
+        return f"{self.__class__.__name__} ('{self}')"
+
+    def __eq__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c == 0
+
+    def __lt__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c < 0
+
+    def __le__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c <= 0
+
+    def __gt__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c > 0
+
+    def __ge__(self, other):
+        c = self._cmp(other)
+        if c is NotImplemented:
+            return c
+        return c >= 0
+
+
+# Interface for version-number classes -- must be implemented
+# by the following classes (the concrete ones -- Version should
+# be treated as an abstract class).
+#    __init__ (string) - create and take same action as 'parse'
+#                        (string parameter is optional)
+#    parse (string)    - convert a string representation to whatever
+#                        internal representation is appropriate for
+#                        this style of version numbering
+#    __str__ (self)    - convert back to a string; should be very similar
+#                        (if not identical to) the string supplied to parse
+#    __repr__ (self)   - generate Python code to recreate
+#                        the instance
+#    _cmp (self, other) - compare two version numbers ('other' may
+#                        be an unparsed version string, or another
+#                        instance of your version class)
+
+
+class StrictVersion(Version):
+    """Version numbering for anal retentives and software idealists.
+    Implements the standard interface for version number classes as
+    described above.  A version number consists of two or three
+    dot-separated numeric components, with an optional "pre-release" tag
+    on the end.  The pre-release tag consists of the letter 'a' or 'b'
+    followed by a number.  If the numeric components of two version
+    numbers are equal, then one with a pre-release tag will always
+    be deemed earlier (lesser) than one without.
+
+    The following are valid version numbers (shown in the order that
+    would be obtained by sorting according to the supplied cmp function):
+
+        0.4       0.4.0  (these two are equivalent)
+        0.4.1
+        0.5a1
+        0.5b3
+        0.5
+        0.9.6
+        1.0
+        1.0.4a3
+        1.0.4b1
+        1.0.4
+
+    The following are examples of invalid version numbers:
+
+        1
+        2.7.2.2
+        1.3.a4
+        1.3pl1
+        1.3c4
+
+    The rationale for this version numbering system will be explained
+    in the distutils documentation.
+    """
+
+    version_re = re.compile(
+        r'^(\d+) \. (\d+) (\. (\d+))? ([ab](\d+))?$', re.VERBOSE | re.ASCII
+    )
+
+    def parse(self, vstring):
+        match = self.version_re.match(vstring)
+        if not match:
+            raise ValueError(f"invalid version number '{vstring}'")
+
+        (major, minor, patch, prerelease, prerelease_num) = match.group(1, 2, 4, 5, 6)
+
+        if patch:
+            self.version = tuple(map(int, [major, minor, patch]))
+        else:
+            self.version = tuple(map(int, [major, minor])) + (0,)
+
+        if prerelease:
+            self.prerelease = (prerelease[0], int(prerelease_num))
+        else:
+            self.prerelease = None
+
+    def __str__(self):
+        if self.version[2] == 0:
+            vstring = '.'.join(map(str, self.version[0:2]))
+        else:
+            vstring = '.'.join(map(str, self.version))
+
+        if self.prerelease:
+            vstring = vstring + self.prerelease[0] + str(self.prerelease[1])
+
+        return vstring
+
+    def _cmp(self, other):
+        if isinstance(other, str):
+            with suppress_known_deprecation():
+                other = StrictVersion(other)
+        elif not isinstance(other, StrictVersion):
+            return NotImplemented
+
+        if self.version == other.version:
+            # versions match; pre-release drives the comparison
+            return self._cmp_prerelease(other)
+
+        return -1 if self.version < other.version else 1
+
+    def _cmp_prerelease(self, other):
+        """
+        case 1: self has prerelease, other doesn't; other is greater
+        case 2: self doesn't have prerelease, other does: self is greater
+        case 3: both or neither have prerelease: compare them!
+        """
+        if self.prerelease and not other.prerelease:
+            return -1
+        elif not self.prerelease and other.prerelease:
+            return 1
+
+        if self.prerelease == other.prerelease:
+            return 0
+        elif self.prerelease < other.prerelease:
+            return -1
+        else:
+            return 1
+
+
+# end class StrictVersion
+
+
+# The rules according to Greg Stein:
+# 1) a version number has 1 or more numbers separated by a period or by
+#    sequences of letters. If only periods, then these are compared
+#    left-to-right to determine an ordering.
+# 2) sequences of letters are part of the tuple for comparison and are
+#    compared lexicographically
+# 3) recognize the numeric components may have leading zeroes
+#
+# The LooseVersion class below implements these rules: a version number
+# string is split up into a tuple of integer and string components, and
+# comparison is a simple tuple comparison.  This means that version
+# numbers behave in a predictable and obvious way, but a way that might
+# not necessarily be how people *want* version numbers to behave.  There
+# wouldn't be a problem if people could stick to purely numeric version
+# numbers: just split on period and compare the numbers as tuples.
+# However, people insist on putting letters into their version numbers;
+# the most common purpose seems to be:
+#   - indicating a "pre-release" version
+#     ('alpha', 'beta', 'a', 'b', 'pre', 'p')
+#   - indicating a post-release patch ('p', 'pl', 'patch')
+# but of course this can't cover all version number schemes, and there's
+# no way to know what a programmer means without asking him.
+#
+# The problem is what to do with letters (and other non-numeric
+# characters) in a version number.  The current implementation does the
+# obvious and predictable thing: keep them as strings and compare
+# lexically within a tuple comparison.  This has the desired effect if
+# an appended letter sequence implies something "post-release":
+# eg. "0.99" < "0.99pl14" < "1.0", and "5.001" < "5.001m" < "5.002".
+#
+# However, if letters in a version number imply a pre-release version,
+# the "obvious" thing isn't correct.  Eg. you would expect that
+# "1.5.1" < "1.5.2a2" < "1.5.2", but under the tuple/lexical comparison
+# implemented here, this just isn't so.
+#
+# Two possible solutions come to mind.  The first is to tie the
+# comparison algorithm to a particular set of semantic rules, as has
+# been done in the StrictVersion class above.  This works great as long
+# as everyone can go along with bondage and discipline.  Hopefully a
+# (large) subset of Python module programmers will agree that the
+# particular flavour of bondage and discipline provided by StrictVersion
+# provides enough benefit to be worth using, and will submit their
+# version numbering scheme to its domination.  The free-thinking
+# anarchists in the lot will never give in, though, and something needs
+# to be done to accommodate them.
+#
+# Perhaps a "moderately strict" version class could be implemented that
+# lets almost anything slide (syntactically), and makes some heuristic
+# assumptions about non-digits in version number strings.  This could
+# sink into special-case-hell, though; if I was as talented and
+# idiosyncratic as Larry Wall, I'd go ahead and implement a class that
+# somehow knows that "1.2.1" < "1.2.2a2" < "1.2.2" < "1.2.2pl3", and is
+# just as happy dealing with things like "2g6" and "1.13++".  I don't
+# think I'm smart enough to do it right though.
+#
+# In any case, I've coded the test suite for this module (see
+# ../test/test_version.py) specifically to fail on things like comparing
+# "1.2a2" and "1.2".  That's not because the *code* is doing anything
+# wrong, it's because the simple, obvious design doesn't match my
+# complicated, hairy expectations for real-world version numbers.  It
+# would be a snap to fix the test suite to say, "Yep, LooseVersion does
+# the Right Thing" (ie. the code matches the conception).  But I'd rather
+# have a conception that matches common notions about version numbers.
+
+
+class LooseVersion(Version):
+    """Version numbering for anarchists and software realists.
+    Implements the standard interface for version number classes as
+    described above.  A version number consists of a series of numbers,
+    separated by either periods or strings of letters.  When comparing
+    version numbers, the numeric components will be compared
+    numerically, and the alphabetic components lexically.  The following
+    are all valid version numbers, in no particular order:
+
+        1.5.1
+        1.5.2b2
+        161
+        3.10a
+        8.02
+        3.4j
+        1996.07.12
+        3.2.pl0
+        3.1.1.6
+        2g6
+        11g
+        0.960923
+        2.2beta29
+        1.13++
+        5.5.kw
+        2.0b1pl0
+
+    In fact, there is no such thing as an invalid version number under
+    this scheme; the rules for comparison are simple and predictable,
+    but may not always give the results you want (for some definition
+    of "want").
+    """
+
+    component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE)
+
+    def parse(self, vstring):
+        # I've given up on thinking I can reconstruct the version string
+        # from the parsed tuple -- so I just store the string here for
+        # use by __str__
+        self.vstring = vstring
+        components = [x for x in self.component_re.split(vstring) if x and x != '.']
+        for i, obj in enumerate(components):
+            try:
+                components[i] = int(obj)
+            except ValueError:
+                pass
+
+        self.version = components
+
+    def __str__(self):
+        return self.vstring
+
+    def __repr__(self):
+        return f"LooseVersion ('{self}')"
+
+    def _cmp(self, other):
+        if isinstance(other, str):
+            other = LooseVersion(other)
+        elif not isinstance(other, LooseVersion):
+            return NotImplemented
+
+        if self.version == other.version:
+            return 0
+        if self.version < other.version:
+            return -1
+        if self.version > other.version:
+            return 1
+
+
+# end class LooseVersion
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/versionpredicate.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/versionpredicate.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe31b0ed8ef843080ea64df9f58a398d317434ad
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/versionpredicate.py
@@ -0,0 +1,175 @@
+"""Module for parsing and testing package version predicate strings."""
+
+import operator
+import re
+
+from . import version
+
+re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)", re.ASCII)
+# (package) (rest)
+
+re_paren = re.compile(r"^\s*\((.*)\)\s*$")  # (list) inside of parentheses
+re_splitComparison = re.compile(r"^\s*(<=|>=|<|>|!=|==)\s*([^\s,]+)\s*$")
+# (comp) (version)
+
+
+def splitUp(pred):
+    """Parse a single version comparison.
+
+    Return (comparison string, StrictVersion)
+    """
+    res = re_splitComparison.match(pred)
+    if not res:
+        raise ValueError(f"bad package restriction syntax: {pred!r}")
+    comp, verStr = res.groups()
+    with version.suppress_known_deprecation():
+        other = version.StrictVersion(verStr)
+    return (comp, other)
+
+
+compmap = {
+    "<": operator.lt,
+    "<=": operator.le,
+    "==": operator.eq,
+    ">": operator.gt,
+    ">=": operator.ge,
+    "!=": operator.ne,
+}
+
+
+class VersionPredicate:
+    """Parse and test package version predicates.
+
+    >>> v = VersionPredicate('pyepat.abc (>1.0, <3333.3a1, !=1555.1b3)')
+
+    The `name` attribute provides the full dotted name that is given::
+
+    >>> v.name
+    'pyepat.abc'
+
+    The str() of a `VersionPredicate` provides a normalized
+    human-readable version of the expression::
+
+    >>> print(v)
+    pyepat.abc (> 1.0, < 3333.3a1, != 1555.1b3)
+
+    The `satisfied_by()` method can be used to determine with a given
+    version number is included in the set described by the version
+    restrictions::
+
+    >>> v.satisfied_by('1.1')
+    True
+    >>> v.satisfied_by('1.4')
+    True
+    >>> v.satisfied_by('1.0')
+    False
+    >>> v.satisfied_by('4444.4')
+    False
+    >>> v.satisfied_by('1555.1b3')
+    False
+
+    `VersionPredicate` is flexible in accepting extra whitespace::
+
+    >>> v = VersionPredicate(' pat( ==  0.1  )  ')
+    >>> v.name
+    'pat'
+    >>> v.satisfied_by('0.1')
+    True
+    >>> v.satisfied_by('0.2')
+    False
+
+    If any version numbers passed in do not conform to the
+    restrictions of `StrictVersion`, a `ValueError` is raised::
+
+    >>> v = VersionPredicate('p1.p2.p3.p4(>=1.0, <=1.3a1, !=1.2zb3)')
+    Traceback (most recent call last):
+      ...
+    ValueError: invalid version number '1.2zb3'
+
+    It the module or package name given does not conform to what's
+    allowed as a legal module or package name, `ValueError` is
+    raised::
+
+    >>> v = VersionPredicate('foo-bar')
+    Traceback (most recent call last):
+      ...
+    ValueError: expected parenthesized list: '-bar'
+
+    >>> v = VersionPredicate('foo bar (12.21)')
+    Traceback (most recent call last):
+      ...
+    ValueError: expected parenthesized list: 'bar (12.21)'
+
+    """
+
+    def __init__(self, versionPredicateStr):
+        """Parse a version predicate string."""
+        # Fields:
+        #    name:  package name
+        #    pred:  list of (comparison string, StrictVersion)
+
+        versionPredicateStr = versionPredicateStr.strip()
+        if not versionPredicateStr:
+            raise ValueError("empty package restriction")
+        match = re_validPackage.match(versionPredicateStr)
+        if not match:
+            raise ValueError(f"bad package name in {versionPredicateStr!r}")
+        self.name, paren = match.groups()
+        paren = paren.strip()
+        if paren:
+            match = re_paren.match(paren)
+            if not match:
+                raise ValueError(f"expected parenthesized list: {paren!r}")
+            str = match.groups()[0]
+            self.pred = [splitUp(aPred) for aPred in str.split(",")]
+            if not self.pred:
+                raise ValueError(f"empty parenthesized list in {versionPredicateStr!r}")
+        else:
+            self.pred = []
+
+    def __str__(self):
+        if self.pred:
+            seq = [cond + " " + str(ver) for cond, ver in self.pred]
+            return self.name + " (" + ", ".join(seq) + ")"
+        else:
+            return self.name
+
+    def satisfied_by(self, version):
+        """True if version is compatible with all the predicates in self.
+        The parameter version must be acceptable to the StrictVersion
+        constructor.  It may be either a string or StrictVersion.
+        """
+        for cond, ver in self.pred:
+            if not compmap[cond](version, ver):
+                return False
+        return True
+
+
+_provision_rx = None
+
+
+def split_provision(value):
+    """Return the name and optional version number of a provision.
+
+    The version number, if given, will be returned as a `StrictVersion`
+    instance, otherwise it will be `None`.
+
+    >>> split_provision('mypkg')
+    ('mypkg', None)
+    >>> split_provision(' mypkg( 1.2 ) ')
+    ('mypkg', StrictVersion ('1.2'))
+    """
+    global _provision_rx
+    if _provision_rx is None:
+        _provision_rx = re.compile(
+            r"([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$", re.ASCII
+        )
+    value = value.strip()
+    m = _provision_rx.match(value)
+    if not m:
+        raise ValueError(f"illegal provides specification: {value!r}")
+    ver = m.group(2) or None
+    if ver:
+        with version.suppress_known_deprecation():
+            ver = version.StrictVersion(ver)
+    return m.group(1), ver
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/zosccompiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/zosccompiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..af1e7fa5cced7b7ac2f29f3d50c10abaeb111dc0
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_distutils/zosccompiler.py
@@ -0,0 +1,229 @@
+"""distutils.zosccompiler
+
+Contains the selection of the c & c++ compilers on z/OS. There are several
+different c compilers on z/OS, all of them are optional, so the correct
+one needs to be chosen based on the users input. This is compatible with
+the following compilers:
+
+IBM C/C++ For Open Enterprise Languages on z/OS 2.0
+IBM Open XL C/C++ 1.1 for z/OS
+IBM XL C/C++ V2.4.1 for z/OS 2.4 and 2.5
+IBM z/OS XL C/C++
+"""
+
+import os
+
+from . import sysconfig
+from .errors import CompileError, DistutilsExecError
+from .unixccompiler import UnixCCompiler
+
+_cc_args = {
+    'ibm-openxl': [
+        '-m64',
+        '-fvisibility=default',
+        '-fzos-le-char-mode=ascii',
+        '-fno-short-enums',
+    ],
+    'ibm-xlclang': [
+        '-q64',
+        '-qexportall',
+        '-qascii',
+        '-qstrict',
+        '-qnocsect',
+        '-Wa,asa,goff',
+        '-Wa,xplink',
+        '-qgonumber',
+        '-qenum=int',
+        '-Wc,DLL',
+    ],
+    'ibm-xlc': [
+        '-q64',
+        '-qexportall',
+        '-qascii',
+        '-qstrict',
+        '-qnocsect',
+        '-Wa,asa,goff',
+        '-Wa,xplink',
+        '-qgonumber',
+        '-qenum=int',
+        '-Wc,DLL',
+        '-qlanglvl=extc99',
+    ],
+}
+
+_cxx_args = {
+    'ibm-openxl': [
+        '-m64',
+        '-fvisibility=default',
+        '-fzos-le-char-mode=ascii',
+        '-fno-short-enums',
+    ],
+    'ibm-xlclang': [
+        '-q64',
+        '-qexportall',
+        '-qascii',
+        '-qstrict',
+        '-qnocsect',
+        '-Wa,asa,goff',
+        '-Wa,xplink',
+        '-qgonumber',
+        '-qenum=int',
+        '-Wc,DLL',
+    ],
+    'ibm-xlc': [
+        '-q64',
+        '-qexportall',
+        '-qascii',
+        '-qstrict',
+        '-qnocsect',
+        '-Wa,asa,goff',
+        '-Wa,xplink',
+        '-qgonumber',
+        '-qenum=int',
+        '-Wc,DLL',
+        '-qlanglvl=extended0x',
+    ],
+}
+
+_asm_args = {
+    'ibm-openxl': ['-fasm', '-fno-integrated-as', '-Wa,--ASA', '-Wa,--GOFF'],
+    'ibm-xlclang': [],
+    'ibm-xlc': [],
+}
+
+_ld_args = {
+    'ibm-openxl': [],
+    'ibm-xlclang': ['-Wl,dll', '-q64'],
+    'ibm-xlc': ['-Wl,dll', '-q64'],
+}
+
+
+# Python on z/OS is built with no compiler specific options in it's CFLAGS.
+# But each compiler requires it's own specific options to build successfully,
+# though some of the options are common between them
+class zOSCCompiler(UnixCCompiler):
+    src_extensions = ['.c', '.C', '.cc', '.cxx', '.cpp', '.m', '.s']
+    _cpp_extensions = ['.cc', '.cpp', '.cxx', '.C']
+    _asm_extensions = ['.s']
+
+    def _get_zos_compiler_name(self):
+        zos_compiler_names = [
+            os.path.basename(binary)
+            for envvar in ('CC', 'CXX', 'LDSHARED')
+            if (binary := os.environ.get(envvar, None))
+        ]
+        if len(zos_compiler_names) == 0:
+            return 'ibm-openxl'
+
+        zos_compilers = {}
+        for compiler in (
+            'ibm-clang',
+            'ibm-clang64',
+            'ibm-clang++',
+            'ibm-clang++64',
+            'clang',
+            'clang++',
+            'clang-14',
+        ):
+            zos_compilers[compiler] = 'ibm-openxl'
+
+        for compiler in ('xlclang', 'xlclang++', 'njsc', 'njsc++'):
+            zos_compilers[compiler] = 'ibm-xlclang'
+
+        for compiler in ('xlc', 'xlC', 'xlc++'):
+            zos_compilers[compiler] = 'ibm-xlc'
+
+        return zos_compilers.get(zos_compiler_names[0], 'ibm-openxl')
+
+    def __init__(self, verbose=False, dry_run=False, force=False):
+        super().__init__(verbose, dry_run, force)
+        self.zos_compiler = self._get_zos_compiler_name()
+        sysconfig.customize_compiler(self)
+
+    def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
+        local_args = []
+        if ext in self._cpp_extensions:
+            compiler = self.compiler_cxx
+            local_args.extend(_cxx_args[self.zos_compiler])
+        elif ext in self._asm_extensions:
+            compiler = self.compiler_so
+            local_args.extend(_cc_args[self.zos_compiler])
+            local_args.extend(_asm_args[self.zos_compiler])
+        else:
+            compiler = self.compiler_so
+            local_args.extend(_cc_args[self.zos_compiler])
+        local_args.extend(cc_args)
+
+        try:
+            self.spawn(compiler + local_args + [src, '-o', obj] + extra_postargs)
+        except DistutilsExecError as msg:
+            raise CompileError(msg)
+
+    def runtime_library_dir_option(self, dir):
+        return '-L' + dir
+
+    def link(
+        self,
+        target_desc,
+        objects,
+        output_filename,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        # For a built module to use functions from cpython, it needs to use Pythons
+        # side deck file. The side deck is located beside the libpython3.xx.so
+        ldversion = sysconfig.get_config_var('LDVERSION')
+        if sysconfig.python_build:
+            side_deck_path = os.path.join(
+                sysconfig.get_config_var('abs_builddir'),
+                f'libpython{ldversion}.x',
+            )
+        else:
+            side_deck_path = os.path.join(
+                sysconfig.get_config_var('installed_base'),
+                sysconfig.get_config_var('platlibdir'),
+                f'libpython{ldversion}.x',
+            )
+
+        if os.path.exists(side_deck_path):
+            if extra_postargs:
+                extra_postargs.append(side_deck_path)
+            else:
+                extra_postargs = [side_deck_path]
+
+        # Check and replace libraries included side deck files
+        if runtime_library_dirs:
+            for dir in runtime_library_dirs:
+                for library in libraries[:]:
+                    library_side_deck = os.path.join(dir, f'{library}.x')
+                    if os.path.exists(library_side_deck):
+                        libraries.remove(library)
+                        extra_postargs.append(library_side_deck)
+                        break
+
+        # Any required ld args for the given compiler
+        extra_postargs.extend(_ld_args[self.zos_compiler])
+
+        super().link(
+            target_desc,
+            objects,
+            output_filename,
+            output_dir,
+            libraries,
+            library_dirs,
+            runtime_library_dirs,
+            export_symbols,
+            debug,
+            extra_preargs,
+            extra_postargs,
+            build_temp,
+            target_lang,
+        )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a05163b54fc16ad1c8bcf7b83cbb55f1bb4e4827
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/ordered_set.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/ordered_set.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f18a258b91a4c5dc0d030f0e7effc15e1cceee7
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/ordered_set.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/zipp.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/zipp.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff34aa5e484e7aa59678d5001e007593312ae1ec
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/__pycache__/zipp.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/backports/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/backports/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/backports/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/backports/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bab325adfa2e2820929c3f577471388a7e6c47db
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/backports/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/backports/tarfile.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/backports/tarfile.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7a9a6e7b9418edae60289dba9bff5d7fb7fcee5
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/backports/tarfile.py
@@ -0,0 +1,2900 @@
+#!/usr/bin/env python3
+#-------------------------------------------------------------------
+# tarfile.py
+#-------------------------------------------------------------------
+# Copyright (C) 2002 Lars Gustaebel 
+# All rights reserved.
+#
+# Permission  is  hereby granted,  free  of charge,  to  any person
+# obtaining a  copy of  this software  and associated documentation
+# files  (the  "Software"),  to   deal  in  the  Software   without
+# restriction,  including  without limitation  the  rights to  use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies  of  the  Software,  and to  permit  persons  to  whom the
+# Software  is  furnished  to  do  so,  subject  to  the  following
+# conditions:
+#
+# The above copyright  notice and this  permission notice shall  be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
+# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
+# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
+# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
+# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
+# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+"""Read from and write to tar format archives.
+"""
+
+version     = "0.9.0"
+__author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
+__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
+
+#---------
+# Imports
+#---------
+from builtins import open as bltn_open
+import sys
+import os
+import io
+import shutil
+import stat
+import time
+import struct
+import copy
+import re
+import warnings
+
+try:
+    import pwd
+except ImportError:
+    pwd = None
+try:
+    import grp
+except ImportError:
+    grp = None
+
+# os.symlink on Windows prior to 6.0 raises NotImplementedError
+# OSError (winerror=1314) will be raised if the caller does not hold the
+# SeCreateSymbolicLinkPrivilege privilege
+symlink_exception = (AttributeError, NotImplementedError, OSError)
+
+# from tarfile import *
+__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
+           "CompressionError", "StreamError", "ExtractError", "HeaderError",
+           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
+           "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter",
+           "tar_filter", "FilterError", "AbsoluteLinkError",
+           "OutsideDestinationError", "SpecialFileError", "AbsolutePathError",
+           "LinkOutsideDestinationError"]
+
+
+#---------------------------------------------------------
+# tar constants
+#---------------------------------------------------------
+NUL = b"\0"                     # the null character
+BLOCKSIZE = 512                 # length of processing blocks
+RECORDSIZE = BLOCKSIZE * 20     # length of records
+GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
+POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
+
+LENGTH_NAME = 100               # maximum length of a filename
+LENGTH_LINK = 100               # maximum length of a linkname
+LENGTH_PREFIX = 155             # maximum length of the prefix field
+
+REGTYPE = b"0"                  # regular file
+AREGTYPE = b"\0"                # regular file
+LNKTYPE = b"1"                  # link (inside tarfile)
+SYMTYPE = b"2"                  # symbolic link
+CHRTYPE = b"3"                  # character special device
+BLKTYPE = b"4"                  # block special device
+DIRTYPE = b"5"                  # directory
+FIFOTYPE = b"6"                 # fifo special device
+CONTTYPE = b"7"                 # contiguous file
+
+GNUTYPE_LONGNAME = b"L"         # GNU tar longname
+GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
+GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
+
+XHDTYPE = b"x"                  # POSIX.1-2001 extended header
+XGLTYPE = b"g"                  # POSIX.1-2001 global header
+SOLARIS_XHDTYPE = b"X"          # Solaris extended header
+
+USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
+GNU_FORMAT = 1                  # GNU tar format
+PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
+DEFAULT_FORMAT = PAX_FORMAT
+
+#---------------------------------------------------------
+# tarfile constants
+#---------------------------------------------------------
+# File types that tarfile supports:
+SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
+                   SYMTYPE, DIRTYPE, FIFOTYPE,
+                   CONTTYPE, CHRTYPE, BLKTYPE,
+                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
+                   GNUTYPE_SPARSE)
+
+# File types that will be treated as a regular file.
+REGULAR_TYPES = (REGTYPE, AREGTYPE,
+                 CONTTYPE, GNUTYPE_SPARSE)
+
+# File types that are part of the GNU tar format.
+GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
+             GNUTYPE_SPARSE)
+
+# Fields from a pax header that override a TarInfo attribute.
+PAX_FIELDS = ("path", "linkpath", "size", "mtime",
+              "uid", "gid", "uname", "gname")
+
+# Fields from a pax header that are affected by hdrcharset.
+PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
+
+# Fields in a pax header that are numbers, all other fields
+# are treated as strings.
+PAX_NUMBER_FIELDS = {
+    "atime": float,
+    "ctime": float,
+    "mtime": float,
+    "uid": int,
+    "gid": int,
+    "size": int
+}
+
+#---------------------------------------------------------
+# initialization
+#---------------------------------------------------------
+if os.name == "nt":
+    ENCODING = "utf-8"
+else:
+    ENCODING = sys.getfilesystemencoding()
+
+#---------------------------------------------------------
+# Some useful functions
+#---------------------------------------------------------
+
+def stn(s, length, encoding, errors):
+    """Convert a string to a null-terminated bytes object.
+    """
+    if s is None:
+        raise ValueError("metadata cannot contain None")
+    s = s.encode(encoding, errors)
+    return s[:length] + (length - len(s)) * NUL
+
+def nts(s, encoding, errors):
+    """Convert a null-terminated bytes object to a string.
+    """
+    p = s.find(b"\0")
+    if p != -1:
+        s = s[:p]
+    return s.decode(encoding, errors)
+
+def nti(s):
+    """Convert a number field to a python number.
+    """
+    # There are two possible encodings for a number field, see
+    # itn() below.
+    if s[0] in (0o200, 0o377):
+        n = 0
+        for i in range(len(s) - 1):
+            n <<= 8
+            n += s[i + 1]
+        if s[0] == 0o377:
+            n = -(256 ** (len(s) - 1) - n)
+    else:
+        try:
+            s = nts(s, "ascii", "strict")
+            n = int(s.strip() or "0", 8)
+        except ValueError:
+            raise InvalidHeaderError("invalid header")
+    return n
+
+def itn(n, digits=8, format=DEFAULT_FORMAT):
+    """Convert a python number to a number field.
+    """
+    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
+    # octal digits followed by a null-byte, this allows values up to
+    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
+    # that if necessary. A leading 0o200 or 0o377 byte indicate this
+    # particular encoding, the following digits-1 bytes are a big-endian
+    # base-256 representation. This allows values up to (256**(digits-1))-1.
+    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
+    # number.
+    original_n = n
+    n = int(n)
+    if 0 <= n < 8 ** (digits - 1):
+        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
+    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
+        if n >= 0:
+            s = bytearray([0o200])
+        else:
+            s = bytearray([0o377])
+            n = 256 ** digits + n
+
+        for i in range(digits - 1):
+            s.insert(1, n & 0o377)
+            n >>= 8
+    else:
+        raise ValueError("overflow in number field")
+
+    return s
+
+def calc_chksums(buf):
+    """Calculate the checksum for a member's header by summing up all
+       characters except for the chksum field which is treated as if
+       it was filled with spaces. According to the GNU tar sources,
+       some tars (Sun and NeXT) calculate chksum with signed char,
+       which will be different if there are chars in the buffer with
+       the high bit set. So we calculate two checksums, unsigned and
+       signed.
+    """
+    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
+    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
+    return unsigned_chksum, signed_chksum
+
+def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
+    """Copy length bytes from fileobj src to fileobj dst.
+       If length is None, copy the entire content.
+    """
+    bufsize = bufsize or 16 * 1024
+    if length == 0:
+        return
+    if length is None:
+        shutil.copyfileobj(src, dst, bufsize)
+        return
+
+    blocks, remainder = divmod(length, bufsize)
+    for b in range(blocks):
+        buf = src.read(bufsize)
+        if len(buf) < bufsize:
+            raise exception("unexpected end of data")
+        dst.write(buf)
+
+    if remainder != 0:
+        buf = src.read(remainder)
+        if len(buf) < remainder:
+            raise exception("unexpected end of data")
+        dst.write(buf)
+    return
+
+def _safe_print(s):
+    encoding = getattr(sys.stdout, 'encoding', None)
+    if encoding is not None:
+        s = s.encode(encoding, 'backslashreplace').decode(encoding)
+    print(s, end=' ')
+
+
+class TarError(Exception):
+    """Base exception."""
+    pass
+class ExtractError(TarError):
+    """General exception for extract errors."""
+    pass
+class ReadError(TarError):
+    """Exception for unreadable tar archives."""
+    pass
+class CompressionError(TarError):
+    """Exception for unavailable compression methods."""
+    pass
+class StreamError(TarError):
+    """Exception for unsupported operations on stream-like TarFiles."""
+    pass
+class HeaderError(TarError):
+    """Base exception for header errors."""
+    pass
+class EmptyHeaderError(HeaderError):
+    """Exception for empty headers."""
+    pass
+class TruncatedHeaderError(HeaderError):
+    """Exception for truncated headers."""
+    pass
+class EOFHeaderError(HeaderError):
+    """Exception for end of file headers."""
+    pass
+class InvalidHeaderError(HeaderError):
+    """Exception for invalid headers."""
+    pass
+class SubsequentHeaderError(HeaderError):
+    """Exception for missing and invalid extended headers."""
+    pass
+
+#---------------------------
+# internal stream interface
+#---------------------------
+class _LowLevelFile:
+    """Low-level file object. Supports reading and writing.
+       It is used instead of a regular file object for streaming
+       access.
+    """
+
+    def __init__(self, name, mode):
+        mode = {
+            "r": os.O_RDONLY,
+            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
+        }[mode]
+        if hasattr(os, "O_BINARY"):
+            mode |= os.O_BINARY
+        self.fd = os.open(name, mode, 0o666)
+
+    def close(self):
+        os.close(self.fd)
+
+    def read(self, size):
+        return os.read(self.fd, size)
+
+    def write(self, s):
+        os.write(self.fd, s)
+
+class _Stream:
+    """Class that serves as an adapter between TarFile and
+       a stream-like object.  The stream-like object only
+       needs to have a read() or write() method that works with bytes,
+       and the method is accessed blockwise.
+       Use of gzip or bzip2 compression is possible.
+       A stream-like object could be for example: sys.stdin.buffer,
+       sys.stdout.buffer, a socket, a tape device etc.
+
+       _Stream is intended to be used only internally.
+    """
+
+    def __init__(self, name, mode, comptype, fileobj, bufsize,
+                 compresslevel):
+        """Construct a _Stream object.
+        """
+        self._extfileobj = True
+        if fileobj is None:
+            fileobj = _LowLevelFile(name, mode)
+            self._extfileobj = False
+
+        if comptype == '*':
+            # Enable transparent compression detection for the
+            # stream interface
+            fileobj = _StreamProxy(fileobj)
+            comptype = fileobj.getcomptype()
+
+        self.name     = name or ""
+        self.mode     = mode
+        self.comptype = comptype
+        self.fileobj  = fileobj
+        self.bufsize  = bufsize
+        self.buf      = b""
+        self.pos      = 0
+        self.closed   = False
+
+        try:
+            if comptype == "gz":
+                try:
+                    import zlib
+                except ImportError:
+                    raise CompressionError("zlib module is not available") from None
+                self.zlib = zlib
+                self.crc = zlib.crc32(b"")
+                if mode == "r":
+                    self.exception = zlib.error
+                    self._init_read_gz()
+                else:
+                    self._init_write_gz(compresslevel)
+
+            elif comptype == "bz2":
+                try:
+                    import bz2
+                except ImportError:
+                    raise CompressionError("bz2 module is not available") from None
+                if mode == "r":
+                    self.dbuf = b""
+                    self.cmp = bz2.BZ2Decompressor()
+                    self.exception = OSError
+                else:
+                    self.cmp = bz2.BZ2Compressor(compresslevel)
+
+            elif comptype == "xz":
+                try:
+                    import lzma
+                except ImportError:
+                    raise CompressionError("lzma module is not available") from None
+                if mode == "r":
+                    self.dbuf = b""
+                    self.cmp = lzma.LZMADecompressor()
+                    self.exception = lzma.LZMAError
+                else:
+                    self.cmp = lzma.LZMACompressor()
+
+            elif comptype != "tar":
+                raise CompressionError("unknown compression type %r" % comptype)
+
+        except:
+            if not self._extfileobj:
+                self.fileobj.close()
+            self.closed = True
+            raise
+
+    def __del__(self):
+        if hasattr(self, "closed") and not self.closed:
+            self.close()
+
+    def _init_write_gz(self, compresslevel):
+        """Initialize for writing with gzip compression.
+        """
+        self.cmp = self.zlib.compressobj(compresslevel,
+                                         self.zlib.DEFLATED,
+                                         -self.zlib.MAX_WBITS,
+                                         self.zlib.DEF_MEM_LEVEL,
+                                         0)
+        timestamp = struct.pack(" self.bufsize:
+            self.fileobj.write(self.buf[:self.bufsize])
+            self.buf = self.buf[self.bufsize:]
+
+    def close(self):
+        """Close the _Stream object. No operation should be
+           done on it afterwards.
+        """
+        if self.closed:
+            return
+
+        self.closed = True
+        try:
+            if self.mode == "w" and self.comptype != "tar":
+                self.buf += self.cmp.flush()
+
+            if self.mode == "w" and self.buf:
+                self.fileobj.write(self.buf)
+                self.buf = b""
+                if self.comptype == "gz":
+                    self.fileobj.write(struct.pack("= 0:
+            blocks, remainder = divmod(pos - self.pos, self.bufsize)
+            for i in range(blocks):
+                self.read(self.bufsize)
+            self.read(remainder)
+        else:
+            raise StreamError("seeking backwards is not allowed")
+        return self.pos
+
+    def read(self, size):
+        """Return the next size number of bytes from the stream."""
+        assert size is not None
+        buf = self._read(size)
+        self.pos += len(buf)
+        return buf
+
+    def _read(self, size):
+        """Return size bytes from the stream.
+        """
+        if self.comptype == "tar":
+            return self.__read(size)
+
+        c = len(self.dbuf)
+        t = [self.dbuf]
+        while c < size:
+            # Skip underlying buffer to avoid unaligned double buffering.
+            if self.buf:
+                buf = self.buf
+                self.buf = b""
+            else:
+                buf = self.fileobj.read(self.bufsize)
+                if not buf:
+                    break
+            try:
+                buf = self.cmp.decompress(buf)
+            except self.exception as e:
+                raise ReadError("invalid compressed data") from e
+            t.append(buf)
+            c += len(buf)
+        t = b"".join(t)
+        self.dbuf = t[size:]
+        return t[:size]
+
+    def __read(self, size):
+        """Return size bytes from stream. If internal buffer is empty,
+           read another block from the stream.
+        """
+        c = len(self.buf)
+        t = [self.buf]
+        while c < size:
+            buf = self.fileobj.read(self.bufsize)
+            if not buf:
+                break
+            t.append(buf)
+            c += len(buf)
+        t = b"".join(t)
+        self.buf = t[size:]
+        return t[:size]
+# class _Stream
+
+class _StreamProxy(object):
+    """Small proxy class that enables transparent compression
+       detection for the Stream interface (mode 'r|*').
+    """
+
+    def __init__(self, fileobj):
+        self.fileobj = fileobj
+        self.buf = self.fileobj.read(BLOCKSIZE)
+
+    def read(self, size):
+        self.read = self.fileobj.read
+        return self.buf
+
+    def getcomptype(self):
+        if self.buf.startswith(b"\x1f\x8b\x08"):
+            return "gz"
+        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
+            return "bz2"
+        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
+            return "xz"
+        else:
+            return "tar"
+
+    def close(self):
+        self.fileobj.close()
+# class StreamProxy
+
+#------------------------
+# Extraction file object
+#------------------------
+class _FileInFile(object):
+    """A thin wrapper around an existing file object that
+       provides a part of its data as an individual file
+       object.
+    """
+
+    def __init__(self, fileobj, offset, size, name, blockinfo=None):
+        self.fileobj = fileobj
+        self.offset = offset
+        self.size = size
+        self.position = 0
+        self.name = name
+        self.closed = False
+
+        if blockinfo is None:
+            blockinfo = [(0, size)]
+
+        # Construct a map with data and zero blocks.
+        self.map_index = 0
+        self.map = []
+        lastpos = 0
+        realpos = self.offset
+        for offset, size in blockinfo:
+            if offset > lastpos:
+                self.map.append((False, lastpos, offset, None))
+            self.map.append((True, offset, offset + size, realpos))
+            realpos += size
+            lastpos = offset + size
+        if lastpos < self.size:
+            self.map.append((False, lastpos, self.size, None))
+
+    def flush(self):
+        pass
+
+    def readable(self):
+        return True
+
+    def writable(self):
+        return False
+
+    def seekable(self):
+        return self.fileobj.seekable()
+
+    def tell(self):
+        """Return the current file position.
+        """
+        return self.position
+
+    def seek(self, position, whence=io.SEEK_SET):
+        """Seek to a position in the file.
+        """
+        if whence == io.SEEK_SET:
+            self.position = min(max(position, 0), self.size)
+        elif whence == io.SEEK_CUR:
+            if position < 0:
+                self.position = max(self.position + position, 0)
+            else:
+                self.position = min(self.position + position, self.size)
+        elif whence == io.SEEK_END:
+            self.position = max(min(self.size + position, self.size), 0)
+        else:
+            raise ValueError("Invalid argument")
+        return self.position
+
+    def read(self, size=None):
+        """Read data from the file.
+        """
+        if size is None:
+            size = self.size - self.position
+        else:
+            size = min(size, self.size - self.position)
+
+        buf = b""
+        while size > 0:
+            while True:
+                data, start, stop, offset = self.map[self.map_index]
+                if start <= self.position < stop:
+                    break
+                else:
+                    self.map_index += 1
+                    if self.map_index == len(self.map):
+                        self.map_index = 0
+            length = min(size, stop - self.position)
+            if data:
+                self.fileobj.seek(offset + (self.position - start))
+                b = self.fileobj.read(length)
+                if len(b) != length:
+                    raise ReadError("unexpected end of data")
+                buf += b
+            else:
+                buf += NUL * length
+            size -= length
+            self.position += length
+        return buf
+
+    def readinto(self, b):
+        buf = self.read(len(b))
+        b[:len(buf)] = buf
+        return len(buf)
+
+    def close(self):
+        self.closed = True
+#class _FileInFile
+
+class ExFileObject(io.BufferedReader):
+
+    def __init__(self, tarfile, tarinfo):
+        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
+                tarinfo.size, tarinfo.name, tarinfo.sparse)
+        super().__init__(fileobj)
+#class ExFileObject
+
+
+#-----------------------------
+# extraction filters (PEP 706)
+#-----------------------------
+
+class FilterError(TarError):
+    pass
+
+class AbsolutePathError(FilterError):
+    def __init__(self, tarinfo):
+        self.tarinfo = tarinfo
+        super().__init__(f'member {tarinfo.name!r} has an absolute path')
+
+class OutsideDestinationError(FilterError):
+    def __init__(self, tarinfo, path):
+        self.tarinfo = tarinfo
+        self._path = path
+        super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
+                         + 'which is outside the destination')
+
+class SpecialFileError(FilterError):
+    def __init__(self, tarinfo):
+        self.tarinfo = tarinfo
+        super().__init__(f'{tarinfo.name!r} is a special file')
+
+class AbsoluteLinkError(FilterError):
+    def __init__(self, tarinfo):
+        self.tarinfo = tarinfo
+        super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
+
+class LinkOutsideDestinationError(FilterError):
+    def __init__(self, tarinfo, path):
+        self.tarinfo = tarinfo
+        self._path = path
+        super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
+                         + 'which is outside the destination')
+
+def _get_filtered_attrs(member, dest_path, for_data=True):
+    new_attrs = {}
+    name = member.name
+    dest_path = os.path.realpath(dest_path)
+    # Strip leading / (tar's directory separator) from filenames.
+    # Include os.sep (target OS directory separator) as well.
+    if name.startswith(('/', os.sep)):
+        name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
+    if os.path.isabs(name):
+        # Path is absolute even after stripping.
+        # For example, 'C:/foo' on Windows.
+        raise AbsolutePathError(member)
+    # Ensure we stay in the destination
+    target_path = os.path.realpath(os.path.join(dest_path, name))
+    if os.path.commonpath([target_path, dest_path]) != dest_path:
+        raise OutsideDestinationError(member, target_path)
+    # Limit permissions (no high bits, and go-w)
+    mode = member.mode
+    if mode is not None:
+        # Strip high bits & group/other write bits
+        mode = mode & 0o755
+        if for_data:
+            # For data, handle permissions & file types
+            if member.isreg() or member.islnk():
+                if not mode & 0o100:
+                    # Clear executable bits if not executable by user
+                    mode &= ~0o111
+                # Ensure owner can read & write
+                mode |= 0o600
+            elif member.isdir() or member.issym():
+                # Ignore mode for directories & symlinks
+                mode = None
+            else:
+                # Reject special files
+                raise SpecialFileError(member)
+        if mode != member.mode:
+            new_attrs['mode'] = mode
+    if for_data:
+        # Ignore ownership for 'data'
+        if member.uid is not None:
+            new_attrs['uid'] = None
+        if member.gid is not None:
+            new_attrs['gid'] = None
+        if member.uname is not None:
+            new_attrs['uname'] = None
+        if member.gname is not None:
+            new_attrs['gname'] = None
+        # Check link destination for 'data'
+        if member.islnk() or member.issym():
+            if os.path.isabs(member.linkname):
+                raise AbsoluteLinkError(member)
+            if member.issym():
+                target_path = os.path.join(dest_path,
+                                           os.path.dirname(name),
+                                           member.linkname)
+            else:
+                target_path = os.path.join(dest_path,
+                                           member.linkname)
+            target_path = os.path.realpath(target_path)
+            if os.path.commonpath([target_path, dest_path]) != dest_path:
+                raise LinkOutsideDestinationError(member, target_path)
+    return new_attrs
+
+def fully_trusted_filter(member, dest_path):
+    return member
+
+def tar_filter(member, dest_path):
+    new_attrs = _get_filtered_attrs(member, dest_path, False)
+    if new_attrs:
+        return member.replace(**new_attrs, deep=False)
+    return member
+
+def data_filter(member, dest_path):
+    new_attrs = _get_filtered_attrs(member, dest_path, True)
+    if new_attrs:
+        return member.replace(**new_attrs, deep=False)
+    return member
+
+_NAMED_FILTERS = {
+    "fully_trusted": fully_trusted_filter,
+    "tar": tar_filter,
+    "data": data_filter,
+}
+
+#------------------
+# Exported Classes
+#------------------
+
+# Sentinel for replace() defaults, meaning "don't change the attribute"
+_KEEP = object()
+
+class TarInfo(object):
+    """Informational class which holds the details about an
+       archive member given by a tar header block.
+       TarInfo objects are returned by TarFile.getmember(),
+       TarFile.getmembers() and TarFile.gettarinfo() and are
+       usually created internally.
+    """
+
+    __slots__ = dict(
+        name = 'Name of the archive member.',
+        mode = 'Permission bits.',
+        uid = 'User ID of the user who originally stored this member.',
+        gid = 'Group ID of the user who originally stored this member.',
+        size = 'Size in bytes.',
+        mtime = 'Time of last modification.',
+        chksum = 'Header checksum.',
+        type = ('File type. type is usually one of these constants: '
+                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
+                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
+        linkname = ('Name of the target file name, which is only present '
+                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
+        uname = 'User name.',
+        gname = 'Group name.',
+        devmajor = 'Device major number.',
+        devminor = 'Device minor number.',
+        offset = 'The tar header starts here.',
+        offset_data = "The file's data starts here.",
+        pax_headers = ('A dictionary containing key-value pairs of an '
+                       'associated pax extended header.'),
+        sparse = 'Sparse member information.',
+        tarfile = None,
+        _sparse_structs = None,
+        _link_target = None,
+        )
+
+    def __init__(self, name=""):
+        """Construct a TarInfo object. name is the optional name
+           of the member.
+        """
+        self.name = name        # member name
+        self.mode = 0o644       # file permissions
+        self.uid = 0            # user id
+        self.gid = 0            # group id
+        self.size = 0           # file size
+        self.mtime = 0          # modification time
+        self.chksum = 0         # header checksum
+        self.type = REGTYPE     # member type
+        self.linkname = ""      # link name
+        self.uname = ""         # user name
+        self.gname = ""         # group name
+        self.devmajor = 0       # device major number
+        self.devminor = 0       # device minor number
+
+        self.offset = 0         # the tar header starts here
+        self.offset_data = 0    # the file's data starts here
+
+        self.sparse = None      # sparse member information
+        self.pax_headers = {}   # pax header information
+
+    @property
+    def path(self):
+        'In pax headers, "name" is called "path".'
+        return self.name
+
+    @path.setter
+    def path(self, name):
+        self.name = name
+
+    @property
+    def linkpath(self):
+        'In pax headers, "linkname" is called "linkpath".'
+        return self.linkname
+
+    @linkpath.setter
+    def linkpath(self, linkname):
+        self.linkname = linkname
+
+    def __repr__(self):
+        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
+
+    def replace(self, *,
+                name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
+                uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
+                deep=True, _KEEP=_KEEP):
+        """Return a deep copy of self with the given attributes replaced.
+        """
+        if deep:
+            result = copy.deepcopy(self)
+        else:
+            result = copy.copy(self)
+        if name is not _KEEP:
+            result.name = name
+        if mtime is not _KEEP:
+            result.mtime = mtime
+        if mode is not _KEEP:
+            result.mode = mode
+        if linkname is not _KEEP:
+            result.linkname = linkname
+        if uid is not _KEEP:
+            result.uid = uid
+        if gid is not _KEEP:
+            result.gid = gid
+        if uname is not _KEEP:
+            result.uname = uname
+        if gname is not _KEEP:
+            result.gname = gname
+        return result
+
+    def get_info(self):
+        """Return the TarInfo's attributes as a dictionary.
+        """
+        if self.mode is None:
+            mode = None
+        else:
+            mode = self.mode & 0o7777
+        info = {
+            "name":     self.name,
+            "mode":     mode,
+            "uid":      self.uid,
+            "gid":      self.gid,
+            "size":     self.size,
+            "mtime":    self.mtime,
+            "chksum":   self.chksum,
+            "type":     self.type,
+            "linkname": self.linkname,
+            "uname":    self.uname,
+            "gname":    self.gname,
+            "devmajor": self.devmajor,
+            "devminor": self.devminor
+        }
+
+        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
+            info["name"] += "/"
+
+        return info
+
+    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
+        """Return a tar header as a string of 512 byte blocks.
+        """
+        info = self.get_info()
+        for name, value in info.items():
+            if value is None:
+                raise ValueError("%s may not be None" % name)
+
+        if format == USTAR_FORMAT:
+            return self.create_ustar_header(info, encoding, errors)
+        elif format == GNU_FORMAT:
+            return self.create_gnu_header(info, encoding, errors)
+        elif format == PAX_FORMAT:
+            return self.create_pax_header(info, encoding)
+        else:
+            raise ValueError("invalid format")
+
+    def create_ustar_header(self, info, encoding, errors):
+        """Return the object as a ustar header block.
+        """
+        info["magic"] = POSIX_MAGIC
+
+        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
+            raise ValueError("linkname is too long")
+
+        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
+            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
+
+        return self._create_header(info, USTAR_FORMAT, encoding, errors)
+
+    def create_gnu_header(self, info, encoding, errors):
+        """Return the object as a GNU header block sequence.
+        """
+        info["magic"] = GNU_MAGIC
+
+        buf = b""
+        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
+            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
+
+        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
+            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
+
+        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
+
+    def create_pax_header(self, info, encoding):
+        """Return the object as a ustar header block. If it cannot be
+           represented this way, prepend a pax extended header sequence
+           with supplement information.
+        """
+        info["magic"] = POSIX_MAGIC
+        pax_headers = self.pax_headers.copy()
+
+        # Test string fields for values that exceed the field length or cannot
+        # be represented in ASCII encoding.
+        for name, hname, length in (
+                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
+                ("uname", "uname", 32), ("gname", "gname", 32)):
+
+            if hname in pax_headers:
+                # The pax header has priority.
+                continue
+
+            # Try to encode the string as ASCII.
+            try:
+                info[name].encode("ascii", "strict")
+            except UnicodeEncodeError:
+                pax_headers[hname] = info[name]
+                continue
+
+            if len(info[name]) > length:
+                pax_headers[hname] = info[name]
+
+        # Test number fields for values that exceed the field limit or values
+        # that like to be stored as float.
+        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
+            needs_pax = False
+
+            val = info[name]
+            val_is_float = isinstance(val, float)
+            val_int = round(val) if val_is_float else val
+            if not 0 <= val_int < 8 ** (digits - 1):
+                # Avoid overflow.
+                info[name] = 0
+                needs_pax = True
+            elif val_is_float:
+                # Put rounded value in ustar header, and full
+                # precision value in pax header.
+                info[name] = val_int
+                needs_pax = True
+
+            # The existing pax header has priority.
+            if needs_pax and name not in pax_headers:
+                pax_headers[name] = str(val)
+
+        # Create a pax extended header if necessary.
+        if pax_headers:
+            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
+        else:
+            buf = b""
+
+        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
+
+    @classmethod
+    def create_pax_global_header(cls, pax_headers):
+        """Return the object as a pax global header block sequence.
+        """
+        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
+
+    def _posix_split_name(self, name, encoding, errors):
+        """Split a name longer than 100 chars into a prefix
+           and a name part.
+        """
+        components = name.split("/")
+        for i in range(1, len(components)):
+            prefix = "/".join(components[:i])
+            name = "/".join(components[i:])
+            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
+                    len(name.encode(encoding, errors)) <= LENGTH_NAME:
+                break
+        else:
+            raise ValueError("name is too long")
+
+        return prefix, name
+
+    @staticmethod
+    def _create_header(info, format, encoding, errors):
+        """Return a header block. info is a dictionary with file
+           information, format must be one of the *_FORMAT constants.
+        """
+        has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
+        if has_device_fields:
+            devmajor = itn(info.get("devmajor", 0), 8, format)
+            devminor = itn(info.get("devminor", 0), 8, format)
+        else:
+            devmajor = stn("", 8, encoding, errors)
+            devminor = stn("", 8, encoding, errors)
+
+        # None values in metadata should cause ValueError.
+        # itn()/stn() do this for all fields except type.
+        filetype = info.get("type", REGTYPE)
+        if filetype is None:
+            raise ValueError("TarInfo.type must not be None")
+
+        parts = [
+            stn(info.get("name", ""), 100, encoding, errors),
+            itn(info.get("mode", 0) & 0o7777, 8, format),
+            itn(info.get("uid", 0), 8, format),
+            itn(info.get("gid", 0), 8, format),
+            itn(info.get("size", 0), 12, format),
+            itn(info.get("mtime", 0), 12, format),
+            b"        ", # checksum field
+            filetype,
+            stn(info.get("linkname", ""), 100, encoding, errors),
+            info.get("magic", POSIX_MAGIC),
+            stn(info.get("uname", ""), 32, encoding, errors),
+            stn(info.get("gname", ""), 32, encoding, errors),
+            devmajor,
+            devminor,
+            stn(info.get("prefix", ""), 155, encoding, errors)
+        ]
+
+        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
+        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
+        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
+        return buf
+
+    @staticmethod
+    def _create_payload(payload):
+        """Return the string payload filled with zero bytes
+           up to the next 512 byte border.
+        """
+        blocks, remainder = divmod(len(payload), BLOCKSIZE)
+        if remainder > 0:
+            payload += (BLOCKSIZE - remainder) * NUL
+        return payload
+
+    @classmethod
+    def _create_gnu_long_header(cls, name, type, encoding, errors):
+        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
+           for name.
+        """
+        name = name.encode(encoding, errors) + NUL
+
+        info = {}
+        info["name"] = "././@LongLink"
+        info["type"] = type
+        info["size"] = len(name)
+        info["magic"] = GNU_MAGIC
+
+        # create extended header + name blocks.
+        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
+                cls._create_payload(name)
+
+    @classmethod
+    def _create_pax_generic_header(cls, pax_headers, type, encoding):
+        """Return a POSIX.1-2008 extended or global header sequence
+           that contains a list of keyword, value pairs. The values
+           must be strings.
+        """
+        # Check if one of the fields contains surrogate characters and thereby
+        # forces hdrcharset=BINARY, see _proc_pax() for more information.
+        binary = False
+        for keyword, value in pax_headers.items():
+            try:
+                value.encode("utf-8", "strict")
+            except UnicodeEncodeError:
+                binary = True
+                break
+
+        records = b""
+        if binary:
+            # Put the hdrcharset field at the beginning of the header.
+            records += b"21 hdrcharset=BINARY\n"
+
+        for keyword, value in pax_headers.items():
+            keyword = keyword.encode("utf-8")
+            if binary:
+                # Try to restore the original byte representation of `value'.
+                # Needless to say, that the encoding must match the string.
+                value = value.encode(encoding, "surrogateescape")
+            else:
+                value = value.encode("utf-8")
+
+            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
+            n = p = 0
+            while True:
+                n = l + len(str(p))
+                if n == p:
+                    break
+                p = n
+            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
+
+        # We use a hardcoded "././@PaxHeader" name like star does
+        # instead of the one that POSIX recommends.
+        info = {}
+        info["name"] = "././@PaxHeader"
+        info["type"] = type
+        info["size"] = len(records)
+        info["magic"] = POSIX_MAGIC
+
+        # Create pax header + record blocks.
+        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
+                cls._create_payload(records)
+
+    @classmethod
+    def frombuf(cls, buf, encoding, errors):
+        """Construct a TarInfo object from a 512 byte bytes object.
+        """
+        if len(buf) == 0:
+            raise EmptyHeaderError("empty header")
+        if len(buf) != BLOCKSIZE:
+            raise TruncatedHeaderError("truncated header")
+        if buf.count(NUL) == BLOCKSIZE:
+            raise EOFHeaderError("end of file header")
+
+        chksum = nti(buf[148:156])
+        if chksum not in calc_chksums(buf):
+            raise InvalidHeaderError("bad checksum")
+
+        obj = cls()
+        obj.name = nts(buf[0:100], encoding, errors)
+        obj.mode = nti(buf[100:108])
+        obj.uid = nti(buf[108:116])
+        obj.gid = nti(buf[116:124])
+        obj.size = nti(buf[124:136])
+        obj.mtime = nti(buf[136:148])
+        obj.chksum = chksum
+        obj.type = buf[156:157]
+        obj.linkname = nts(buf[157:257], encoding, errors)
+        obj.uname = nts(buf[265:297], encoding, errors)
+        obj.gname = nts(buf[297:329], encoding, errors)
+        obj.devmajor = nti(buf[329:337])
+        obj.devminor = nti(buf[337:345])
+        prefix = nts(buf[345:500], encoding, errors)
+
+        # Old V7 tar format represents a directory as a regular
+        # file with a trailing slash.
+        if obj.type == AREGTYPE and obj.name.endswith("/"):
+            obj.type = DIRTYPE
+
+        # The old GNU sparse format occupies some of the unused
+        # space in the buffer for up to 4 sparse structures.
+        # Save them for later processing in _proc_sparse().
+        if obj.type == GNUTYPE_SPARSE:
+            pos = 386
+            structs = []
+            for i in range(4):
+                try:
+                    offset = nti(buf[pos:pos + 12])
+                    numbytes = nti(buf[pos + 12:pos + 24])
+                except ValueError:
+                    break
+                structs.append((offset, numbytes))
+                pos += 24
+            isextended = bool(buf[482])
+            origsize = nti(buf[483:495])
+            obj._sparse_structs = (structs, isextended, origsize)
+
+        # Remove redundant slashes from directories.
+        if obj.isdir():
+            obj.name = obj.name.rstrip("/")
+
+        # Reconstruct a ustar longname.
+        if prefix and obj.type not in GNU_TYPES:
+            obj.name = prefix + "/" + obj.name
+        return obj
+
+    @classmethod
+    def fromtarfile(cls, tarfile):
+        """Return the next TarInfo object from TarFile object
+           tarfile.
+        """
+        buf = tarfile.fileobj.read(BLOCKSIZE)
+        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
+        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
+        return obj._proc_member(tarfile)
+
+    #--------------------------------------------------------------------------
+    # The following are methods that are called depending on the type of a
+    # member. The entry point is _proc_member() which can be overridden in a
+    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
+    # implement the following
+    # operations:
+    # 1. Set self.offset_data to the position where the data blocks begin,
+    #    if there is data that follows.
+    # 2. Set tarfile.offset to the position where the next member's header will
+    #    begin.
+    # 3. Return self or another valid TarInfo object.
+    def _proc_member(self, tarfile):
+        """Choose the right processing method depending on
+           the type and call it.
+        """
+        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
+            return self._proc_gnulong(tarfile)
+        elif self.type == GNUTYPE_SPARSE:
+            return self._proc_sparse(tarfile)
+        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
+            return self._proc_pax(tarfile)
+        else:
+            return self._proc_builtin(tarfile)
+
+    def _proc_builtin(self, tarfile):
+        """Process a builtin type or an unknown type which
+           will be treated as a regular file.
+        """
+        self.offset_data = tarfile.fileobj.tell()
+        offset = self.offset_data
+        if self.isreg() or self.type not in SUPPORTED_TYPES:
+            # Skip the following data blocks.
+            offset += self._block(self.size)
+        tarfile.offset = offset
+
+        # Patch the TarInfo object with saved global
+        # header information.
+        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
+
+        # Remove redundant slashes from directories. This is to be consistent
+        # with frombuf().
+        if self.isdir():
+            self.name = self.name.rstrip("/")
+
+        return self
+
+    def _proc_gnulong(self, tarfile):
+        """Process the blocks that hold a GNU longname
+           or longlink member.
+        """
+        buf = tarfile.fileobj.read(self._block(self.size))
+
+        # Fetch the next header and process it.
+        try:
+            next = self.fromtarfile(tarfile)
+        except HeaderError as e:
+            raise SubsequentHeaderError(str(e)) from None
+
+        # Patch the TarInfo object from the next header with
+        # the longname information.
+        next.offset = self.offset
+        if self.type == GNUTYPE_LONGNAME:
+            next.name = nts(buf, tarfile.encoding, tarfile.errors)
+        elif self.type == GNUTYPE_LONGLINK:
+            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
+
+        # Remove redundant slashes from directories. This is to be consistent
+        # with frombuf().
+        if next.isdir():
+            next.name = next.name.removesuffix("/")
+
+        return next
+
+    def _proc_sparse(self, tarfile):
+        """Process a GNU sparse header plus extra headers.
+        """
+        # We already collected some sparse structures in frombuf().
+        structs, isextended, origsize = self._sparse_structs
+        del self._sparse_structs
+
+        # Collect sparse structures from extended header blocks.
+        while isextended:
+            buf = tarfile.fileobj.read(BLOCKSIZE)
+            pos = 0
+            for i in range(21):
+                try:
+                    offset = nti(buf[pos:pos + 12])
+                    numbytes = nti(buf[pos + 12:pos + 24])
+                except ValueError:
+                    break
+                if offset and numbytes:
+                    structs.append((offset, numbytes))
+                pos += 24
+            isextended = bool(buf[504])
+        self.sparse = structs
+
+        self.offset_data = tarfile.fileobj.tell()
+        tarfile.offset = self.offset_data + self._block(self.size)
+        self.size = origsize
+        return self
+
+    def _proc_pax(self, tarfile):
+        """Process an extended or global header as described in
+           POSIX.1-2008.
+        """
+        # Read the header information.
+        buf = tarfile.fileobj.read(self._block(self.size))
+
+        # A pax header stores supplemental information for either
+        # the following file (extended) or all following files
+        # (global).
+        if self.type == XGLTYPE:
+            pax_headers = tarfile.pax_headers
+        else:
+            pax_headers = tarfile.pax_headers.copy()
+
+        # Check if the pax header contains a hdrcharset field. This tells us
+        # the encoding of the path, linkpath, uname and gname fields. Normally,
+        # these fields are UTF-8 encoded but since POSIX.1-2008 tar
+        # implementations are allowed to store them as raw binary strings if
+        # the translation to UTF-8 fails.
+        match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
+        if match is not None:
+            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
+
+        # For the time being, we don't care about anything other than "BINARY".
+        # The only other value that is currently allowed by the standard is
+        # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
+        hdrcharset = pax_headers.get("hdrcharset")
+        if hdrcharset == "BINARY":
+            encoding = tarfile.encoding
+        else:
+            encoding = "utf-8"
+
+        # Parse pax header information. A record looks like that:
+        # "%d %s=%s\n" % (length, keyword, value). length is the size
+        # of the complete record including the length field itself and
+        # the newline. keyword and value are both UTF-8 encoded strings.
+        regex = re.compile(br"(\d+) ([^=]+)=")
+        pos = 0
+        while match := regex.match(buf, pos):
+            length, keyword = match.groups()
+            length = int(length)
+            if length == 0:
+                raise InvalidHeaderError("invalid header")
+            value = buf[match.end(2) + 1:match.start(1) + length - 1]
+
+            # Normally, we could just use "utf-8" as the encoding and "strict"
+            # as the error handler, but we better not take the risk. For
+            # example, GNU tar <= 1.23 is known to store filenames it cannot
+            # translate to UTF-8 as raw strings (unfortunately without a
+            # hdrcharset=BINARY header).
+            # We first try the strict standard encoding, and if that fails we
+            # fall back on the user's encoding and error handler.
+            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
+                    tarfile.errors)
+            if keyword in PAX_NAME_FIELDS:
+                value = self._decode_pax_field(value, encoding, tarfile.encoding,
+                        tarfile.errors)
+            else:
+                value = self._decode_pax_field(value, "utf-8", "utf-8",
+                        tarfile.errors)
+
+            pax_headers[keyword] = value
+            pos += length
+
+        # Fetch the next header.
+        try:
+            next = self.fromtarfile(tarfile)
+        except HeaderError as e:
+            raise SubsequentHeaderError(str(e)) from None
+
+        # Process GNU sparse information.
+        if "GNU.sparse.map" in pax_headers:
+            # GNU extended sparse format version 0.1.
+            self._proc_gnusparse_01(next, pax_headers)
+
+        elif "GNU.sparse.size" in pax_headers:
+            # GNU extended sparse format version 0.0.
+            self._proc_gnusparse_00(next, pax_headers, buf)
+
+        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
+            # GNU extended sparse format version 1.0.
+            self._proc_gnusparse_10(next, pax_headers, tarfile)
+
+        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
+            # Patch the TarInfo object with the extended header info.
+            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
+            next.offset = self.offset
+
+            if "size" in pax_headers:
+                # If the extended header replaces the size field,
+                # we need to recalculate the offset where the next
+                # header starts.
+                offset = next.offset_data
+                if next.isreg() or next.type not in SUPPORTED_TYPES:
+                    offset += next._block(next.size)
+                tarfile.offset = offset
+
+        return next
+
+    def _proc_gnusparse_00(self, next, pax_headers, buf):
+        """Process a GNU tar extended sparse header, version 0.0.
+        """
+        offsets = []
+        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
+            offsets.append(int(match.group(1)))
+        numbytes = []
+        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
+            numbytes.append(int(match.group(1)))
+        next.sparse = list(zip(offsets, numbytes))
+
+    def _proc_gnusparse_01(self, next, pax_headers):
+        """Process a GNU tar extended sparse header, version 0.1.
+        """
+        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
+        next.sparse = list(zip(sparse[::2], sparse[1::2]))
+
+    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
+        """Process a GNU tar extended sparse header, version 1.0.
+        """
+        fields = None
+        sparse = []
+        buf = tarfile.fileobj.read(BLOCKSIZE)
+        fields, buf = buf.split(b"\n", 1)
+        fields = int(fields)
+        while len(sparse) < fields * 2:
+            if b"\n" not in buf:
+                buf += tarfile.fileobj.read(BLOCKSIZE)
+            number, buf = buf.split(b"\n", 1)
+            sparse.append(int(number))
+        next.offset_data = tarfile.fileobj.tell()
+        next.sparse = list(zip(sparse[::2], sparse[1::2]))
+
+    def _apply_pax_info(self, pax_headers, encoding, errors):
+        """Replace fields with supplemental information from a previous
+           pax extended or global header.
+        """
+        for keyword, value in pax_headers.items():
+            if keyword == "GNU.sparse.name":
+                setattr(self, "path", value)
+            elif keyword == "GNU.sparse.size":
+                setattr(self, "size", int(value))
+            elif keyword == "GNU.sparse.realsize":
+                setattr(self, "size", int(value))
+            elif keyword in PAX_FIELDS:
+                if keyword in PAX_NUMBER_FIELDS:
+                    try:
+                        value = PAX_NUMBER_FIELDS[keyword](value)
+                    except ValueError:
+                        value = 0
+                if keyword == "path":
+                    value = value.rstrip("/")
+                setattr(self, keyword, value)
+
+        self.pax_headers = pax_headers.copy()
+
+    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
+        """Decode a single field from a pax record.
+        """
+        try:
+            return value.decode(encoding, "strict")
+        except UnicodeDecodeError:
+            return value.decode(fallback_encoding, fallback_errors)
+
+    def _block(self, count):
+        """Round up a byte count by BLOCKSIZE and return it,
+           e.g. _block(834) => 1024.
+        """
+        blocks, remainder = divmod(count, BLOCKSIZE)
+        if remainder:
+            blocks += 1
+        return blocks * BLOCKSIZE
+
+    def isreg(self):
+        'Return True if the Tarinfo object is a regular file.'
+        return self.type in REGULAR_TYPES
+
+    def isfile(self):
+        'Return True if the Tarinfo object is a regular file.'
+        return self.isreg()
+
+    def isdir(self):
+        'Return True if it is a directory.'
+        return self.type == DIRTYPE
+
+    def issym(self):
+        'Return True if it is a symbolic link.'
+        return self.type == SYMTYPE
+
+    def islnk(self):
+        'Return True if it is a hard link.'
+        return self.type == LNKTYPE
+
+    def ischr(self):
+        'Return True if it is a character device.'
+        return self.type == CHRTYPE
+
+    def isblk(self):
+        'Return True if it is a block device.'
+        return self.type == BLKTYPE
+
+    def isfifo(self):
+        'Return True if it is a FIFO.'
+        return self.type == FIFOTYPE
+
+    def issparse(self):
+        return self.sparse is not None
+
+    def isdev(self):
+        'Return True if it is one of character device, block device or FIFO.'
+        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
+# class TarInfo
+
+class TarFile(object):
+    """The TarFile Class provides an interface to tar archives.
+    """
+
+    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
+
+    dereference = False         # If true, add content of linked file to the
+                                # tar file, else the link.
+
+    ignore_zeros = False        # If true, skips empty or invalid blocks and
+                                # continues processing.
+
+    errorlevel = 1              # If 0, fatal errors only appear in debug
+                                # messages (if debug >= 0). If > 0, errors
+                                # are passed to the caller as exceptions.
+
+    format = DEFAULT_FORMAT     # The format to use when creating an archive.
+
+    encoding = ENCODING         # Encoding for 8-bit character strings.
+
+    errors = None               # Error handler for unicode conversion.
+
+    tarinfo = TarInfo           # The default TarInfo class to use.
+
+    fileobject = ExFileObject   # The file-object for extractfile().
+
+    extraction_filter = None    # The default filter for extraction.
+
+    def __init__(self, name=None, mode="r", fileobj=None, format=None,
+            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
+            errors="surrogateescape", pax_headers=None, debug=None,
+            errorlevel=None, copybufsize=None):
+        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
+           read from an existing archive, 'a' to append data to an existing
+           file or 'w' to create a new file overwriting an existing one. `mode'
+           defaults to 'r'.
+           If `fileobj' is given, it is used for reading or writing data. If it
+           can be determined, `mode' is overridden by `fileobj's mode.
+           `fileobj' is not closed, when TarFile is closed.
+        """
+        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
+        if mode not in modes:
+            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
+        self.mode = mode
+        self._mode = modes[mode]
+
+        if not fileobj:
+            if self.mode == "a" and not os.path.exists(name):
+                # Create nonexistent files in append mode.
+                self.mode = "w"
+                self._mode = "wb"
+            fileobj = bltn_open(name, self._mode)
+            self._extfileobj = False
+        else:
+            if (name is None and hasattr(fileobj, "name") and
+                isinstance(fileobj.name, (str, bytes))):
+                name = fileobj.name
+            if hasattr(fileobj, "mode"):
+                self._mode = fileobj.mode
+            self._extfileobj = True
+        self.name = os.path.abspath(name) if name else None
+        self.fileobj = fileobj
+
+        # Init attributes.
+        if format is not None:
+            self.format = format
+        if tarinfo is not None:
+            self.tarinfo = tarinfo
+        if dereference is not None:
+            self.dereference = dereference
+        if ignore_zeros is not None:
+            self.ignore_zeros = ignore_zeros
+        if encoding is not None:
+            self.encoding = encoding
+        self.errors = errors
+
+        if pax_headers is not None and self.format == PAX_FORMAT:
+            self.pax_headers = pax_headers
+        else:
+            self.pax_headers = {}
+
+        if debug is not None:
+            self.debug = debug
+        if errorlevel is not None:
+            self.errorlevel = errorlevel
+
+        # Init datastructures.
+        self.copybufsize = copybufsize
+        self.closed = False
+        self.members = []       # list of members as TarInfo objects
+        self._loaded = False    # flag if all members have been read
+        self.offset = self.fileobj.tell()
+                                # current position in the archive file
+        self.inodes = {}        # dictionary caching the inodes of
+                                # archive members already added
+
+        try:
+            if self.mode == "r":
+                self.firstmember = None
+                self.firstmember = self.next()
+
+            if self.mode == "a":
+                # Move to the end of the archive,
+                # before the first empty block.
+                while True:
+                    self.fileobj.seek(self.offset)
+                    try:
+                        tarinfo = self.tarinfo.fromtarfile(self)
+                        self.members.append(tarinfo)
+                    except EOFHeaderError:
+                        self.fileobj.seek(self.offset)
+                        break
+                    except HeaderError as e:
+                        raise ReadError(str(e)) from None
+
+            if self.mode in ("a", "w", "x"):
+                self._loaded = True
+
+                if self.pax_headers:
+                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
+                    self.fileobj.write(buf)
+                    self.offset += len(buf)
+        except:
+            if not self._extfileobj:
+                self.fileobj.close()
+            self.closed = True
+            raise
+
+    #--------------------------------------------------------------------------
+    # Below are the classmethods which act as alternate constructors to the
+    # TarFile class. The open() method is the only one that is needed for
+    # public use; it is the "super"-constructor and is able to select an
+    # adequate "sub"-constructor for a particular compression using the mapping
+    # from OPEN_METH.
+    #
+    # This concept allows one to subclass TarFile without losing the comfort of
+    # the super-constructor. A sub-constructor is registered and made available
+    # by adding it to the mapping in OPEN_METH.
+
+    @classmethod
+    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
+        r"""Open a tar archive for reading, writing or appending. Return
+           an appropriate TarFile class.
+
+           mode:
+           'r' or 'r:\*' open for reading with transparent compression
+           'r:'         open for reading exclusively uncompressed
+           'r:gz'       open for reading with gzip compression
+           'r:bz2'      open for reading with bzip2 compression
+           'r:xz'       open for reading with lzma compression
+           'a' or 'a:'  open for appending, creating the file if necessary
+           'w' or 'w:'  open for writing without compression
+           'w:gz'       open for writing with gzip compression
+           'w:bz2'      open for writing with bzip2 compression
+           'w:xz'       open for writing with lzma compression
+
+           'x' or 'x:'  create a tarfile exclusively without compression, raise
+                        an exception if the file is already created
+           'x:gz'       create a gzip compressed tarfile, raise an exception
+                        if the file is already created
+           'x:bz2'      create a bzip2 compressed tarfile, raise an exception
+                        if the file is already created
+           'x:xz'       create an lzma compressed tarfile, raise an exception
+                        if the file is already created
+
+           'r|\*'        open a stream of tar blocks with transparent compression
+           'r|'         open an uncompressed stream of tar blocks for reading
+           'r|gz'       open a gzip compressed stream of tar blocks
+           'r|bz2'      open a bzip2 compressed stream of tar blocks
+           'r|xz'       open an lzma compressed stream of tar blocks
+           'w|'         open an uncompressed stream for writing
+           'w|gz'       open a gzip compressed stream for writing
+           'w|bz2'      open a bzip2 compressed stream for writing
+           'w|xz'       open an lzma compressed stream for writing
+        """
+
+        if not name and not fileobj:
+            raise ValueError("nothing to open")
+
+        if mode in ("r", "r:*"):
+            # Find out which *open() is appropriate for opening the file.
+            def not_compressed(comptype):
+                return cls.OPEN_METH[comptype] == 'taropen'
+            error_msgs = []
+            for comptype in sorted(cls.OPEN_METH, key=not_compressed):
+                func = getattr(cls, cls.OPEN_METH[comptype])
+                if fileobj is not None:
+                    saved_pos = fileobj.tell()
+                try:
+                    return func(name, "r", fileobj, **kwargs)
+                except (ReadError, CompressionError) as e:
+                    error_msgs.append(f'- method {comptype}: {e!r}')
+                    if fileobj is not None:
+                        fileobj.seek(saved_pos)
+                    continue
+            error_msgs_summary = '\n'.join(error_msgs)
+            raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
+
+        elif ":" in mode:
+            filemode, comptype = mode.split(":", 1)
+            filemode = filemode or "r"
+            comptype = comptype or "tar"
+
+            # Select the *open() function according to
+            # given compression.
+            if comptype in cls.OPEN_METH:
+                func = getattr(cls, cls.OPEN_METH[comptype])
+            else:
+                raise CompressionError("unknown compression type %r" % comptype)
+            return func(name, filemode, fileobj, **kwargs)
+
+        elif "|" in mode:
+            filemode, comptype = mode.split("|", 1)
+            filemode = filemode or "r"
+            comptype = comptype or "tar"
+
+            if filemode not in ("r", "w"):
+                raise ValueError("mode must be 'r' or 'w'")
+
+            compresslevel = kwargs.pop("compresslevel", 9)
+            stream = _Stream(name, filemode, comptype, fileobj, bufsize,
+                             compresslevel)
+            try:
+                t = cls(name, filemode, stream, **kwargs)
+            except:
+                stream.close()
+                raise
+            t._extfileobj = False
+            return t
+
+        elif mode in ("a", "w", "x"):
+            return cls.taropen(name, mode, fileobj, **kwargs)
+
+        raise ValueError("undiscernible mode")
+
+    @classmethod
+    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
+        """Open uncompressed tar archive name for reading or writing.
+        """
+        if mode not in ("r", "a", "w", "x"):
+            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
+        return cls(name, mode, fileobj, **kwargs)
+
+    @classmethod
+    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
+        """Open gzip compressed tar archive name for reading or writing.
+           Appending is not allowed.
+        """
+        if mode not in ("r", "w", "x"):
+            raise ValueError("mode must be 'r', 'w' or 'x'")
+
+        try:
+            from gzip import GzipFile
+        except ImportError:
+            raise CompressionError("gzip module is not available") from None
+
+        try:
+            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
+        except OSError as e:
+            if fileobj is not None and mode == 'r':
+                raise ReadError("not a gzip file") from e
+            raise
+
+        try:
+            t = cls.taropen(name, mode, fileobj, **kwargs)
+        except OSError as e:
+            fileobj.close()
+            if mode == 'r':
+                raise ReadError("not a gzip file") from e
+            raise
+        except:
+            fileobj.close()
+            raise
+        t._extfileobj = False
+        return t
+
+    @classmethod
+    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
+        """Open bzip2 compressed tar archive name for reading or writing.
+           Appending is not allowed.
+        """
+        if mode not in ("r", "w", "x"):
+            raise ValueError("mode must be 'r', 'w' or 'x'")
+
+        try:
+            from bz2 import BZ2File
+        except ImportError:
+            raise CompressionError("bz2 module is not available") from None
+
+        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
+
+        try:
+            t = cls.taropen(name, mode, fileobj, **kwargs)
+        except (OSError, EOFError) as e:
+            fileobj.close()
+            if mode == 'r':
+                raise ReadError("not a bzip2 file") from e
+            raise
+        except:
+            fileobj.close()
+            raise
+        t._extfileobj = False
+        return t
+
+    @classmethod
+    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
+        """Open lzma compressed tar archive name for reading or writing.
+           Appending is not allowed.
+        """
+        if mode not in ("r", "w", "x"):
+            raise ValueError("mode must be 'r', 'w' or 'x'")
+
+        try:
+            from lzma import LZMAFile, LZMAError
+        except ImportError:
+            raise CompressionError("lzma module is not available") from None
+
+        fileobj = LZMAFile(fileobj or name, mode, preset=preset)
+
+        try:
+            t = cls.taropen(name, mode, fileobj, **kwargs)
+        except (LZMAError, EOFError) as e:
+            fileobj.close()
+            if mode == 'r':
+                raise ReadError("not an lzma file") from e
+            raise
+        except:
+            fileobj.close()
+            raise
+        t._extfileobj = False
+        return t
+
+    # All *open() methods are registered here.
+    OPEN_METH = {
+        "tar": "taropen",   # uncompressed tar
+        "gz":  "gzopen",    # gzip compressed tar
+        "bz2": "bz2open",   # bzip2 compressed tar
+        "xz":  "xzopen"     # lzma compressed tar
+    }
+
+    #--------------------------------------------------------------------------
+    # The public methods which TarFile provides:
+
+    def close(self):
+        """Close the TarFile. In write-mode, two finishing zero blocks are
+           appended to the archive.
+        """
+        if self.closed:
+            return
+
+        self.closed = True
+        try:
+            if self.mode in ("a", "w", "x"):
+                self.fileobj.write(NUL * (BLOCKSIZE * 2))
+                self.offset += (BLOCKSIZE * 2)
+                # fill up the end with zero-blocks
+                # (like option -b20 for tar does)
+                blocks, remainder = divmod(self.offset, RECORDSIZE)
+                if remainder > 0:
+                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
+        finally:
+            if not self._extfileobj:
+                self.fileobj.close()
+
+    def getmember(self, name):
+        """Return a TarInfo object for member ``name``. If ``name`` can not be
+           found in the archive, KeyError is raised. If a member occurs more
+           than once in the archive, its last occurrence is assumed to be the
+           most up-to-date version.
+        """
+        tarinfo = self._getmember(name.rstrip('/'))
+        if tarinfo is None:
+            raise KeyError("filename %r not found" % name)
+        return tarinfo
+
+    def getmembers(self):
+        """Return the members of the archive as a list of TarInfo objects. The
+           list has the same order as the members in the archive.
+        """
+        self._check()
+        if not self._loaded:    # if we want to obtain a list of
+            self._load()        # all members, we first have to
+                                # scan the whole archive.
+        return self.members
+
+    def getnames(self):
+        """Return the members of the archive as a list of their names. It has
+           the same order as the list returned by getmembers().
+        """
+        return [tarinfo.name for tarinfo in self.getmembers()]
+
+    def gettarinfo(self, name=None, arcname=None, fileobj=None):
+        """Create a TarInfo object from the result of os.stat or equivalent
+           on an existing file. The file is either named by ``name``, or
+           specified as a file object ``fileobj`` with a file descriptor. If
+           given, ``arcname`` specifies an alternative name for the file in the
+           archive, otherwise, the name is taken from the 'name' attribute of
+           'fileobj', or the 'name' argument. The name should be a text
+           string.
+        """
+        self._check("awx")
+
+        # When fileobj is given, replace name by
+        # fileobj's real name.
+        if fileobj is not None:
+            name = fileobj.name
+
+        # Building the name of the member in the archive.
+        # Backward slashes are converted to forward slashes,
+        # Absolute paths are turned to relative paths.
+        if arcname is None:
+            arcname = name
+        drv, arcname = os.path.splitdrive(arcname)
+        arcname = arcname.replace(os.sep, "/")
+        arcname = arcname.lstrip("/")
+
+        # Now, fill the TarInfo object with
+        # information specific for the file.
+        tarinfo = self.tarinfo()
+        tarinfo.tarfile = self  # Not needed
+
+        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
+        if fileobj is None:
+            if not self.dereference:
+                statres = os.lstat(name)
+            else:
+                statres = os.stat(name)
+        else:
+            statres = os.fstat(fileobj.fileno())
+        linkname = ""
+
+        stmd = statres.st_mode
+        if stat.S_ISREG(stmd):
+            inode = (statres.st_ino, statres.st_dev)
+            if not self.dereference and statres.st_nlink > 1 and \
+                    inode in self.inodes and arcname != self.inodes[inode]:
+                # Is it a hardlink to an already
+                # archived file?
+                type = LNKTYPE
+                linkname = self.inodes[inode]
+            else:
+                # The inode is added only if its valid.
+                # For win32 it is always 0.
+                type = REGTYPE
+                if inode[0]:
+                    self.inodes[inode] = arcname
+        elif stat.S_ISDIR(stmd):
+            type = DIRTYPE
+        elif stat.S_ISFIFO(stmd):
+            type = FIFOTYPE
+        elif stat.S_ISLNK(stmd):
+            type = SYMTYPE
+            linkname = os.readlink(name)
+        elif stat.S_ISCHR(stmd):
+            type = CHRTYPE
+        elif stat.S_ISBLK(stmd):
+            type = BLKTYPE
+        else:
+            return None
+
+        # Fill the TarInfo object with all
+        # information we can get.
+        tarinfo.name = arcname
+        tarinfo.mode = stmd
+        tarinfo.uid = statres.st_uid
+        tarinfo.gid = statres.st_gid
+        if type == REGTYPE:
+            tarinfo.size = statres.st_size
+        else:
+            tarinfo.size = 0
+        tarinfo.mtime = statres.st_mtime
+        tarinfo.type = type
+        tarinfo.linkname = linkname
+        if pwd:
+            try:
+                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
+            except KeyError:
+                pass
+        if grp:
+            try:
+                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
+            except KeyError:
+                pass
+
+        if type in (CHRTYPE, BLKTYPE):
+            if hasattr(os, "major") and hasattr(os, "minor"):
+                tarinfo.devmajor = os.major(statres.st_rdev)
+                tarinfo.devminor = os.minor(statres.st_rdev)
+        return tarinfo
+
+    def list(self, verbose=True, *, members=None):
+        """Print a table of contents to sys.stdout. If ``verbose`` is False, only
+           the names of the members are printed. If it is True, an `ls -l'-like
+           output is produced. ``members`` is optional and must be a subset of the
+           list returned by getmembers().
+        """
+        self._check()
+
+        if members is None:
+            members = self
+        for tarinfo in members:
+            if verbose:
+                if tarinfo.mode is None:
+                    _safe_print("??????????")
+                else:
+                    _safe_print(stat.filemode(tarinfo.mode))
+                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
+                                       tarinfo.gname or tarinfo.gid))
+                if tarinfo.ischr() or tarinfo.isblk():
+                    _safe_print("%10s" %
+                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
+                else:
+                    _safe_print("%10d" % tarinfo.size)
+                if tarinfo.mtime is None:
+                    _safe_print("????-??-?? ??:??:??")
+                else:
+                    _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
+                                % time.localtime(tarinfo.mtime)[:6])
+
+            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
+
+            if verbose:
+                if tarinfo.issym():
+                    _safe_print("-> " + tarinfo.linkname)
+                if tarinfo.islnk():
+                    _safe_print("link to " + tarinfo.linkname)
+            print()
+
+    def add(self, name, arcname=None, recursive=True, *, filter=None):
+        """Add the file ``name`` to the archive. ``name`` may be any type of file
+           (directory, fifo, symbolic link, etc.). If given, ``arcname``
+           specifies an alternative name for the file in the archive.
+           Directories are added recursively by default. This can be avoided by
+           setting ``recursive`` to False. ``filter`` is a function
+           that expects a TarInfo object argument and returns the changed
+           TarInfo object, if it returns None the TarInfo object will be
+           excluded from the archive.
+        """
+        self._check("awx")
+
+        if arcname is None:
+            arcname = name
+
+        # Skip if somebody tries to archive the archive...
+        if self.name is not None and os.path.abspath(name) == self.name:
+            self._dbg(2, "tarfile: Skipped %r" % name)
+            return
+
+        self._dbg(1, name)
+
+        # Create a TarInfo object from the file.
+        tarinfo = self.gettarinfo(name, arcname)
+
+        if tarinfo is None:
+            self._dbg(1, "tarfile: Unsupported type %r" % name)
+            return
+
+        # Change or exclude the TarInfo object.
+        if filter is not None:
+            tarinfo = filter(tarinfo)
+            if tarinfo is None:
+                self._dbg(2, "tarfile: Excluded %r" % name)
+                return
+
+        # Append the tar header and data to the archive.
+        if tarinfo.isreg():
+            with bltn_open(name, "rb") as f:
+                self.addfile(tarinfo, f)
+
+        elif tarinfo.isdir():
+            self.addfile(tarinfo)
+            if recursive:
+                for f in sorted(os.listdir(name)):
+                    self.add(os.path.join(name, f), os.path.join(arcname, f),
+                            recursive, filter=filter)
+
+        else:
+            self.addfile(tarinfo)
+
+    def addfile(self, tarinfo, fileobj=None):
+        """Add the TarInfo object ``tarinfo`` to the archive. If ``fileobj`` is
+           given, it should be a binary file, and tarinfo.size bytes are read
+           from it and added to the archive. You can create TarInfo objects
+           directly, or by using gettarinfo().
+        """
+        self._check("awx")
+
+        tarinfo = copy.copy(tarinfo)
+
+        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
+        self.fileobj.write(buf)
+        self.offset += len(buf)
+        bufsize=self.copybufsize
+        # If there's data to follow, append it.
+        if fileobj is not None:
+            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
+            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
+            if remainder > 0:
+                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
+                blocks += 1
+            self.offset += blocks * BLOCKSIZE
+
+        self.members.append(tarinfo)
+
+    def _get_filter_function(self, filter):
+        if filter is None:
+            filter = self.extraction_filter
+            if filter is None:
+                warnings.warn(
+                    'Python 3.14 will, by default, filter extracted tar '
+                    + 'archives and reject files or modify their metadata. '
+                    + 'Use the filter argument to control this behavior.',
+                    DeprecationWarning)
+                return fully_trusted_filter
+            if isinstance(filter, str):
+                raise TypeError(
+                    'String names are not supported for '
+                    + 'TarFile.extraction_filter. Use a function such as '
+                    + 'tarfile.data_filter directly.')
+            return filter
+        if callable(filter):
+            return filter
+        try:
+            return _NAMED_FILTERS[filter]
+        except KeyError:
+            raise ValueError(f"filter {filter!r} not found") from None
+
+    def extractall(self, path=".", members=None, *, numeric_owner=False,
+                   filter=None):
+        """Extract all members from the archive to the current working
+           directory and set owner, modification time and permissions on
+           directories afterwards. `path' specifies a different directory
+           to extract to. `members' is optional and must be a subset of the
+           list returned by getmembers(). If `numeric_owner` is True, only
+           the numbers for user/group names are used and not the names.
+
+           The `filter` function will be called on each member just
+           before extraction.
+           It can return a changed TarInfo or None to skip the member.
+           String names of common filters are accepted.
+        """
+        directories = []
+
+        filter_function = self._get_filter_function(filter)
+        if members is None:
+            members = self
+
+        for member in members:
+            tarinfo = self._get_extract_tarinfo(member, filter_function, path)
+            if tarinfo is None:
+                continue
+            if tarinfo.isdir():
+                # For directories, delay setting attributes until later,
+                # since permissions can interfere with extraction and
+                # extracting contents can reset mtime.
+                directories.append(tarinfo)
+            self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
+                              numeric_owner=numeric_owner)
+
+        # Reverse sort directories.
+        directories.sort(key=lambda a: a.name, reverse=True)
+
+        # Set correct owner, mtime and filemode on directories.
+        for tarinfo in directories:
+            dirpath = os.path.join(path, tarinfo.name)
+            try:
+                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
+                self.utime(tarinfo, dirpath)
+                self.chmod(tarinfo, dirpath)
+            except ExtractError as e:
+                self._handle_nonfatal_error(e)
+
+    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
+                filter=None):
+        """Extract a member from the archive to the current working directory,
+           using its full name. Its file information is extracted as accurately
+           as possible. `member' may be a filename or a TarInfo object. You can
+           specify a different directory using `path'. File attributes (owner,
+           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
+           is True, only the numbers for user/group names are used and not
+           the names.
+
+           The `filter` function will be called before extraction.
+           It can return a changed TarInfo or None to skip the member.
+           String names of common filters are accepted.
+        """
+        filter_function = self._get_filter_function(filter)
+        tarinfo = self._get_extract_tarinfo(member, filter_function, path)
+        if tarinfo is not None:
+            self._extract_one(tarinfo, path, set_attrs, numeric_owner)
+
+    def _get_extract_tarinfo(self, member, filter_function, path):
+        """Get filtered TarInfo (or None) from member, which might be a str"""
+        if isinstance(member, str):
+            tarinfo = self.getmember(member)
+        else:
+            tarinfo = member
+
+        unfiltered = tarinfo
+        try:
+            tarinfo = filter_function(tarinfo, path)
+        except (OSError, FilterError) as e:
+            self._handle_fatal_error(e)
+        except ExtractError as e:
+            self._handle_nonfatal_error(e)
+        if tarinfo is None:
+            self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
+            return None
+        # Prepare the link target for makelink().
+        if tarinfo.islnk():
+            tarinfo = copy.copy(tarinfo)
+            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
+        return tarinfo
+
+    def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
+        """Extract from filtered tarinfo to disk"""
+        self._check("r")
+
+        try:
+            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
+                                 set_attrs=set_attrs,
+                                 numeric_owner=numeric_owner)
+        except OSError as e:
+            self._handle_fatal_error(e)
+        except ExtractError as e:
+            self._handle_nonfatal_error(e)
+
+    def _handle_nonfatal_error(self, e):
+        """Handle non-fatal error (ExtractError) according to errorlevel"""
+        if self.errorlevel > 1:
+            raise
+        else:
+            self._dbg(1, "tarfile: %s" % e)
+
+    def _handle_fatal_error(self, e):
+        """Handle "fatal" error according to self.errorlevel"""
+        if self.errorlevel > 0:
+            raise
+        elif isinstance(e, OSError):
+            if e.filename is None:
+                self._dbg(1, "tarfile: %s" % e.strerror)
+            else:
+                self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
+        else:
+            self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
+
+    def extractfile(self, member):
+        """Extract a member from the archive as a file object. ``member`` may be
+           a filename or a TarInfo object. If ``member`` is a regular file or
+           a link, an io.BufferedReader object is returned. For all other
+           existing members, None is returned. If ``member`` does not appear
+           in the archive, KeyError is raised.
+        """
+        self._check("r")
+
+        if isinstance(member, str):
+            tarinfo = self.getmember(member)
+        else:
+            tarinfo = member
+
+        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
+            # Members with unknown types are treated as regular files.
+            return self.fileobject(self, tarinfo)
+
+        elif tarinfo.islnk() or tarinfo.issym():
+            if isinstance(self.fileobj, _Stream):
+                # A small but ugly workaround for the case that someone tries
+                # to extract a (sym)link as a file-object from a non-seekable
+                # stream of tar blocks.
+                raise StreamError("cannot extract (sym)link as file object")
+            else:
+                # A (sym)link's file object is its target's file object.
+                return self.extractfile(self._find_link_target(tarinfo))
+        else:
+            # If there's no data associated with the member (directory, chrdev,
+            # blkdev, etc.), return None instead of a file object.
+            return None
+
+    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
+                        numeric_owner=False):
+        """Extract the TarInfo object tarinfo to a physical
+           file called targetpath.
+        """
+        # Fetch the TarInfo object for the given name
+        # and build the destination pathname, replacing
+        # forward slashes to platform specific separators.
+        targetpath = targetpath.rstrip("/")
+        targetpath = targetpath.replace("/", os.sep)
+
+        # Create all upper directories.
+        upperdirs = os.path.dirname(targetpath)
+        if upperdirs and not os.path.exists(upperdirs):
+            # Create directories that are not part of the archive with
+            # default permissions.
+            os.makedirs(upperdirs)
+
+        if tarinfo.islnk() or tarinfo.issym():
+            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
+        else:
+            self._dbg(1, tarinfo.name)
+
+        if tarinfo.isreg():
+            self.makefile(tarinfo, targetpath)
+        elif tarinfo.isdir():
+            self.makedir(tarinfo, targetpath)
+        elif tarinfo.isfifo():
+            self.makefifo(tarinfo, targetpath)
+        elif tarinfo.ischr() or tarinfo.isblk():
+            self.makedev(tarinfo, targetpath)
+        elif tarinfo.islnk() or tarinfo.issym():
+            self.makelink(tarinfo, targetpath)
+        elif tarinfo.type not in SUPPORTED_TYPES:
+            self.makeunknown(tarinfo, targetpath)
+        else:
+            self.makefile(tarinfo, targetpath)
+
+        if set_attrs:
+            self.chown(tarinfo, targetpath, numeric_owner)
+            if not tarinfo.issym():
+                self.chmod(tarinfo, targetpath)
+                self.utime(tarinfo, targetpath)
+
+    #--------------------------------------------------------------------------
+    # Below are the different file methods. They are called via
+    # _extract_member() when extract() is called. They can be replaced in a
+    # subclass to implement other functionality.
+
+    def makedir(self, tarinfo, targetpath):
+        """Make a directory called targetpath.
+        """
+        try:
+            if tarinfo.mode is None:
+                # Use the system's default mode
+                os.mkdir(targetpath)
+            else:
+                # Use a safe mode for the directory, the real mode is set
+                # later in _extract_member().
+                os.mkdir(targetpath, 0o700)
+        except FileExistsError:
+            if not os.path.isdir(targetpath):
+                raise
+
+    def makefile(self, tarinfo, targetpath):
+        """Make a file called targetpath.
+        """
+        source = self.fileobj
+        source.seek(tarinfo.offset_data)
+        bufsize = self.copybufsize
+        with bltn_open(targetpath, "wb") as target:
+            if tarinfo.sparse is not None:
+                for offset, size in tarinfo.sparse:
+                    target.seek(offset)
+                    copyfileobj(source, target, size, ReadError, bufsize)
+                target.seek(tarinfo.size)
+                target.truncate()
+            else:
+                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
+
+    def makeunknown(self, tarinfo, targetpath):
+        """Make a file from a TarInfo object with an unknown type
+           at targetpath.
+        """
+        self.makefile(tarinfo, targetpath)
+        self._dbg(1, "tarfile: Unknown file type %r, " \
+                     "extracted as regular file." % tarinfo.type)
+
+    def makefifo(self, tarinfo, targetpath):
+        """Make a fifo called targetpath.
+        """
+        if hasattr(os, "mkfifo"):
+            os.mkfifo(targetpath)
+        else:
+            raise ExtractError("fifo not supported by system")
+
+    def makedev(self, tarinfo, targetpath):
+        """Make a character or block device called targetpath.
+        """
+        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
+            raise ExtractError("special devices not supported by system")
+
+        mode = tarinfo.mode
+        if mode is None:
+            # Use mknod's default
+            mode = 0o600
+        if tarinfo.isblk():
+            mode |= stat.S_IFBLK
+        else:
+            mode |= stat.S_IFCHR
+
+        os.mknod(targetpath, mode,
+                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
+
+    def makelink(self, tarinfo, targetpath):
+        """Make a (symbolic) link called targetpath. If it cannot be created
+          (platform limitation), we try to make a copy of the referenced file
+          instead of a link.
+        """
+        try:
+            # For systems that support symbolic and hard links.
+            if tarinfo.issym():
+                if os.path.lexists(targetpath):
+                    # Avoid FileExistsError on following os.symlink.
+                    os.unlink(targetpath)
+                os.symlink(tarinfo.linkname, targetpath)
+            else:
+                if os.path.exists(tarinfo._link_target):
+                    os.link(tarinfo._link_target, targetpath)
+                else:
+                    self._extract_member(self._find_link_target(tarinfo),
+                                         targetpath)
+        except symlink_exception:
+            try:
+                self._extract_member(self._find_link_target(tarinfo),
+                                     targetpath)
+            except KeyError:
+                raise ExtractError("unable to resolve link inside archive") from None
+
+    def chown(self, tarinfo, targetpath, numeric_owner):
+        """Set owner of targetpath according to tarinfo. If numeric_owner
+           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
+           is False, fall back to .gid/.uid when the search based on name
+           fails.
+        """
+        if hasattr(os, "geteuid") and os.geteuid() == 0:
+            # We have to be root to do so.
+            g = tarinfo.gid
+            u = tarinfo.uid
+            if not numeric_owner:
+                try:
+                    if grp and tarinfo.gname:
+                        g = grp.getgrnam(tarinfo.gname)[2]
+                except KeyError:
+                    pass
+                try:
+                    if pwd and tarinfo.uname:
+                        u = pwd.getpwnam(tarinfo.uname)[2]
+                except KeyError:
+                    pass
+            if g is None:
+                g = -1
+            if u is None:
+                u = -1
+            try:
+                if tarinfo.issym() and hasattr(os, "lchown"):
+                    os.lchown(targetpath, u, g)
+                else:
+                    os.chown(targetpath, u, g)
+            except OSError as e:
+                raise ExtractError("could not change owner") from e
+
+    def chmod(self, tarinfo, targetpath):
+        """Set file permissions of targetpath according to tarinfo.
+        """
+        if tarinfo.mode is None:
+            return
+        try:
+            os.chmod(targetpath, tarinfo.mode)
+        except OSError as e:
+            raise ExtractError("could not change mode") from e
+
+    def utime(self, tarinfo, targetpath):
+        """Set modification time of targetpath according to tarinfo.
+        """
+        mtime = tarinfo.mtime
+        if mtime is None:
+            return
+        if not hasattr(os, 'utime'):
+            return
+        try:
+            os.utime(targetpath, (mtime, mtime))
+        except OSError as e:
+            raise ExtractError("could not change modification time") from e
+
+    #--------------------------------------------------------------------------
+    def next(self):
+        """Return the next member of the archive as a TarInfo object, when
+           TarFile is opened for reading. Return None if there is no more
+           available.
+        """
+        self._check("ra")
+        if self.firstmember is not None:
+            m = self.firstmember
+            self.firstmember = None
+            return m
+
+        # Advance the file pointer.
+        if self.offset != self.fileobj.tell():
+            if self.offset == 0:
+                return None
+            self.fileobj.seek(self.offset - 1)
+            if not self.fileobj.read(1):
+                raise ReadError("unexpected end of data")
+
+        # Read the next block.
+        tarinfo = None
+        while True:
+            try:
+                tarinfo = self.tarinfo.fromtarfile(self)
+            except EOFHeaderError as e:
+                if self.ignore_zeros:
+                    self._dbg(2, "0x%X: %s" % (self.offset, e))
+                    self.offset += BLOCKSIZE
+                    continue
+            except InvalidHeaderError as e:
+                if self.ignore_zeros:
+                    self._dbg(2, "0x%X: %s" % (self.offset, e))
+                    self.offset += BLOCKSIZE
+                    continue
+                elif self.offset == 0:
+                    raise ReadError(str(e)) from None
+            except EmptyHeaderError:
+                if self.offset == 0:
+                    raise ReadError("empty file") from None
+            except TruncatedHeaderError as e:
+                if self.offset == 0:
+                    raise ReadError(str(e)) from None
+            except SubsequentHeaderError as e:
+                raise ReadError(str(e)) from None
+            except Exception as e:
+                try:
+                    import zlib
+                    if isinstance(e, zlib.error):
+                        raise ReadError(f'zlib error: {e}') from None
+                    else:
+                        raise e
+                except ImportError:
+                    raise e
+            break
+
+        if tarinfo is not None:
+            self.members.append(tarinfo)
+        else:
+            self._loaded = True
+
+        return tarinfo
+
+    #--------------------------------------------------------------------------
+    # Little helper methods:
+
+    def _getmember(self, name, tarinfo=None, normalize=False):
+        """Find an archive member by name from bottom to top.
+           If tarinfo is given, it is used as the starting point.
+        """
+        # Ensure that all members have been loaded.
+        members = self.getmembers()
+
+        # Limit the member search list up to tarinfo.
+        skipping = False
+        if tarinfo is not None:
+            try:
+                index = members.index(tarinfo)
+            except ValueError:
+                # The given starting point might be a (modified) copy.
+                # We'll later skip members until we find an equivalent.
+                skipping = True
+            else:
+                # Happy fast path
+                members = members[:index]
+
+        if normalize:
+            name = os.path.normpath(name)
+
+        for member in reversed(members):
+            if skipping:
+                if tarinfo.offset == member.offset:
+                    skipping = False
+                continue
+            if normalize:
+                member_name = os.path.normpath(member.name)
+            else:
+                member_name = member.name
+
+            if name == member_name:
+                return member
+
+        if skipping:
+            # Starting point was not found
+            raise ValueError(tarinfo)
+
+    def _load(self):
+        """Read through the entire archive file and look for readable
+           members.
+        """
+        while self.next() is not None:
+            pass
+        self._loaded = True
+
+    def _check(self, mode=None):
+        """Check if TarFile is still open, and if the operation's mode
+           corresponds to TarFile's mode.
+        """
+        if self.closed:
+            raise OSError("%s is closed" % self.__class__.__name__)
+        if mode is not None and self.mode not in mode:
+            raise OSError("bad operation for mode %r" % self.mode)
+
+    def _find_link_target(self, tarinfo):
+        """Find the target member of a symlink or hardlink member in the
+           archive.
+        """
+        if tarinfo.issym():
+            # Always search the entire archive.
+            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
+            limit = None
+        else:
+            # Search the archive before the link, because a hard link is
+            # just a reference to an already archived file.
+            linkname = tarinfo.linkname
+            limit = tarinfo
+
+        member = self._getmember(linkname, tarinfo=limit, normalize=True)
+        if member is None:
+            raise KeyError("linkname %r not found" % linkname)
+        return member
+
+    def __iter__(self):
+        """Provide an iterator object.
+        """
+        if self._loaded:
+            yield from self.members
+            return
+
+        # Yield items using TarFile's next() method.
+        # When all members have been read, set TarFile as _loaded.
+        index = 0
+        # Fix for SF #1100429: Under rare circumstances it can
+        # happen that getmembers() is called during iteration,
+        # which will have already exhausted the next() method.
+        if self.firstmember is not None:
+            tarinfo = self.next()
+            index += 1
+            yield tarinfo
+
+        while True:
+            if index < len(self.members):
+                tarinfo = self.members[index]
+            elif not self._loaded:
+                tarinfo = self.next()
+                if not tarinfo:
+                    self._loaded = True
+                    return
+            else:
+                return
+            index += 1
+            yield tarinfo
+
+    def _dbg(self, level, msg):
+        """Write debugging output to sys.stderr.
+        """
+        if level <= self.debug:
+            print(msg, file=sys.stderr)
+
+    def __enter__(self):
+        self._check()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        if type is None:
+            self.close()
+        else:
+            # An exception occurred. We must not call close() because
+            # it would try to write end-of-archive blocks and padding.
+            if not self._extfileobj:
+                self.fileobj.close()
+            self.closed = True
+
+#--------------------
+# exported functions
+#--------------------
+
+def is_tarfile(name):
+    """Return True if name points to a tar archive that we
+       are able to handle, else return False.
+
+       'name' should be a string, file, or file-like object.
+    """
+    try:
+        if hasattr(name, "read"):
+            pos = name.tell()
+            t = open(fileobj=name)
+            name.seek(pos)
+        else:
+            t = open(name)
+        t.close()
+        return True
+    except TarError:
+        return False
+
+open = TarFile.open
+
+
+def main():
+    import argparse
+
+    description = 'A simple command-line interface for tarfile module.'
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('-v', '--verbose', action='store_true', default=False,
+                        help='Verbose output')
+    parser.add_argument('--filter', metavar='',
+                        choices=_NAMED_FILTERS,
+                        help='Filter for extraction')
+
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('-l', '--list', metavar='',
+                       help='Show listing of a tarfile')
+    group.add_argument('-e', '--extract', nargs='+',
+                       metavar=('', ''),
+                       help='Extract tarfile into target dir')
+    group.add_argument('-c', '--create', nargs='+',
+                       metavar=('', ''),
+                       help='Create tarfile from sources')
+    group.add_argument('-t', '--test', metavar='',
+                       help='Test if a tarfile is valid')
+
+    args = parser.parse_args()
+
+    if args.filter and args.extract is None:
+        parser.exit(1, '--filter is only valid for extraction\n')
+
+    if args.test is not None:
+        src = args.test
+        if is_tarfile(src):
+            with open(src, 'r') as tar:
+                tar.getmembers()
+                print(tar.getmembers(), file=sys.stderr)
+            if args.verbose:
+                print('{!r} is a tar archive.'.format(src))
+        else:
+            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
+
+    elif args.list is not None:
+        src = args.list
+        if is_tarfile(src):
+            with TarFile.open(src, 'r:*') as tf:
+                tf.list(verbose=args.verbose)
+        else:
+            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
+
+    elif args.extract is not None:
+        if len(args.extract) == 1:
+            src = args.extract[0]
+            curdir = os.curdir
+        elif len(args.extract) == 2:
+            src, curdir = args.extract
+        else:
+            parser.exit(1, parser.format_help())
+
+        if is_tarfile(src):
+            with TarFile.open(src, 'r:*') as tf:
+                tf.extractall(path=curdir, filter=args.filter)
+            if args.verbose:
+                if curdir == '.':
+                    msg = '{!r} file is extracted.'.format(src)
+                else:
+                    msg = ('{!r} file is extracted '
+                           'into {!r} directory.').format(src, curdir)
+                print(msg)
+        else:
+            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
+
+    elif args.create is not None:
+        tar_name = args.create.pop(0)
+        _, ext = os.path.splitext(tar_name)
+        compressions = {
+            # gz
+            '.gz': 'gz',
+            '.tgz': 'gz',
+            # xz
+            '.xz': 'xz',
+            '.txz': 'xz',
+            # bz2
+            '.bz2': 'bz2',
+            '.tbz': 'bz2',
+            '.tbz2': 'bz2',
+            '.tb2': 'bz2',
+        }
+        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
+        tar_files = args.create
+
+        with TarFile.open(tar_name, tar_mode) as tf:
+            for file_name in tar_files:
+                tf.add(file_name)
+
+        if args.verbose:
+            print('{!r} file created.'.format(tar_name))
+
+if __name__ == '__main__':
+    main()
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..886421437557e5d898f5e608ea7e9f23662f01bb
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__init__.py
@@ -0,0 +1,904 @@
+import os
+import re
+import abc
+import csv
+import sys
+from .. import zipp
+import email
+import pathlib
+import operator
+import textwrap
+import warnings
+import functools
+import itertools
+import posixpath
+import collections
+
+from . import _adapters, _meta, _py39compat
+from ._collections import FreezableDefaultDict, Pair
+from ._compat import (
+    NullFinder,
+    install,
+    pypy_partial,
+)
+from ._functools import method_cache, pass_none
+from ._itertools import always_iterable, unique_everseen
+from ._meta import PackageMetadata, SimplePath
+
+from contextlib import suppress
+from importlib import import_module
+from importlib.abc import MetaPathFinder
+from itertools import starmap
+from typing import List, Mapping, Optional
+
+
+__all__ = [
+    'Distribution',
+    'DistributionFinder',
+    'PackageMetadata',
+    'PackageNotFoundError',
+    'distribution',
+    'distributions',
+    'entry_points',
+    'files',
+    'metadata',
+    'packages_distributions',
+    'requires',
+    'version',
+]
+
+
+class PackageNotFoundError(ModuleNotFoundError):
+    """The package was not found."""
+
+    def __str__(self):
+        return f"No package metadata was found for {self.name}"
+
+    @property
+    def name(self):
+        (name,) = self.args
+        return name
+
+
+class Sectioned:
+    """
+    A simple entry point config parser for performance
+
+    >>> for item in Sectioned.read(Sectioned._sample):
+    ...     print(item)
+    Pair(name='sec1', value='# comments ignored')
+    Pair(name='sec1', value='a = 1')
+    Pair(name='sec1', value='b = 2')
+    Pair(name='sec2', value='a = 2')
+
+    >>> res = Sectioned.section_pairs(Sectioned._sample)
+    >>> item = next(res)
+    >>> item.name
+    'sec1'
+    >>> item.value
+    Pair(name='a', value='1')
+    >>> item = next(res)
+    >>> item.value
+    Pair(name='b', value='2')
+    >>> item = next(res)
+    >>> item.name
+    'sec2'
+    >>> item.value
+    Pair(name='a', value='2')
+    >>> list(res)
+    []
+    """
+
+    _sample = textwrap.dedent(
+        """
+        [sec1]
+        # comments ignored
+        a = 1
+        b = 2
+
+        [sec2]
+        a = 2
+        """
+    ).lstrip()
+
+    @classmethod
+    def section_pairs(cls, text):
+        return (
+            section._replace(value=Pair.parse(section.value))
+            for section in cls.read(text, filter_=cls.valid)
+            if section.name is not None
+        )
+
+    @staticmethod
+    def read(text, filter_=None):
+        lines = filter(filter_, map(str.strip, text.splitlines()))
+        name = None
+        for value in lines:
+            section_match = value.startswith('[') and value.endswith(']')
+            if section_match:
+                name = value.strip('[]')
+                continue
+            yield Pair(name, value)
+
+    @staticmethod
+    def valid(line):
+        return line and not line.startswith('#')
+
+
+class DeprecatedTuple:
+    """
+    Provide subscript item access for backward compatibility.
+
+    >>> recwarn = getfixture('recwarn')
+    >>> ep = EntryPoint(name='name', value='value', group='group')
+    >>> ep[:]
+    ('name', 'value', 'group')
+    >>> ep[0]
+    'name'
+    >>> len(recwarn)
+    1
+    """
+
+    # Do not remove prior to 2023-05-01 or Python 3.13
+    _warn = functools.partial(
+        warnings.warn,
+        "EntryPoint tuple interface is deprecated. Access members by name.",
+        DeprecationWarning,
+        stacklevel=pypy_partial(2),
+    )
+
+    def __getitem__(self, item):
+        self._warn()
+        return self._key()[item]
+
+
+class EntryPoint(DeprecatedTuple):
+    """An entry point as defined by Python packaging conventions.
+
+    See `the packaging docs on entry points
+    `_
+    for more information.
+
+    >>> ep = EntryPoint(
+    ...     name=None, group=None, value='package.module:attr [extra1, extra2]')
+    >>> ep.module
+    'package.module'
+    >>> ep.attr
+    'attr'
+    >>> ep.extras
+    ['extra1', 'extra2']
+    """
+
+    pattern = re.compile(
+        r'(?P[\w.]+)\s*'
+        r'(:\s*(?P[\w.]+)\s*)?'
+        r'((?P\[.*\])\s*)?$'
+    )
+    """
+    A regular expression describing the syntax for an entry point,
+    which might look like:
+
+        - module
+        - package.module
+        - package.module:attribute
+        - package.module:object.attribute
+        - package.module:attr [extra1, extra2]
+
+    Other combinations are possible as well.
+
+    The expression is lenient about whitespace around the ':',
+    following the attr, and following any extras.
+    """
+
+    name: str
+    value: str
+    group: str
+
+    dist: Optional['Distribution'] = None
+
+    def __init__(self, name, value, group):
+        vars(self).update(name=name, value=value, group=group)
+
+    def load(self):
+        """Load the entry point from its definition. If only a module
+        is indicated by the value, return that module. Otherwise,
+        return the named object.
+        """
+        match = self.pattern.match(self.value)
+        module = import_module(match.group('module'))
+        attrs = filter(None, (match.group('attr') or '').split('.'))
+        return functools.reduce(getattr, attrs, module)
+
+    @property
+    def module(self):
+        match = self.pattern.match(self.value)
+        return match.group('module')
+
+    @property
+    def attr(self):
+        match = self.pattern.match(self.value)
+        return match.group('attr')
+
+    @property
+    def extras(self):
+        match = self.pattern.match(self.value)
+        return re.findall(r'\w+', match.group('extras') or '')
+
+    def _for(self, dist):
+        vars(self).update(dist=dist)
+        return self
+
+    def matches(self, **params):
+        """
+        EntryPoint matches the given parameters.
+
+        >>> ep = EntryPoint(group='foo', name='bar', value='bing:bong [extra1, extra2]')
+        >>> ep.matches(group='foo')
+        True
+        >>> ep.matches(name='bar', value='bing:bong [extra1, extra2]')
+        True
+        >>> ep.matches(group='foo', name='other')
+        False
+        >>> ep.matches()
+        True
+        >>> ep.matches(extras=['extra1', 'extra2'])
+        True
+        >>> ep.matches(module='bing')
+        True
+        >>> ep.matches(attr='bong')
+        True
+        """
+        attrs = (getattr(self, param) for param in params)
+        return all(map(operator.eq, params.values(), attrs))
+
+    def _key(self):
+        return self.name, self.value, self.group
+
+    def __lt__(self, other):
+        return self._key() < other._key()
+
+    def __eq__(self, other):
+        return self._key() == other._key()
+
+    def __setattr__(self, name, value):
+        raise AttributeError("EntryPoint objects are immutable.")
+
+    def __repr__(self):
+        return (
+            f'EntryPoint(name={self.name!r}, value={self.value!r}, '
+            f'group={self.group!r})'
+        )
+
+    def __hash__(self):
+        return hash(self._key())
+
+
+class EntryPoints(tuple):
+    """
+    An immutable collection of selectable EntryPoint objects.
+    """
+
+    __slots__ = ()
+
+    def __getitem__(self, name):  # -> EntryPoint:
+        """
+        Get the EntryPoint in self matching name.
+        """
+        try:
+            return next(iter(self.select(name=name)))
+        except StopIteration:
+            raise KeyError(name)
+
+    def select(self, **params):
+        """
+        Select entry points from self that match the
+        given parameters (typically group and/or name).
+        """
+        return EntryPoints(ep for ep in self if _py39compat.ep_matches(ep, **params))
+
+    @property
+    def names(self):
+        """
+        Return the set of all names of all entry points.
+        """
+        return {ep.name for ep in self}
+
+    @property
+    def groups(self):
+        """
+        Return the set of all groups of all entry points.
+        """
+        return {ep.group for ep in self}
+
+    @classmethod
+    def _from_text_for(cls, text, dist):
+        return cls(ep._for(dist) for ep in cls._from_text(text))
+
+    @staticmethod
+    def _from_text(text):
+        return (
+            EntryPoint(name=item.value.name, value=item.value.value, group=item.name)
+            for item in Sectioned.section_pairs(text or '')
+        )
+
+
+class PackagePath(pathlib.PurePosixPath):
+    """A reference to a path in a package"""
+
+    def read_text(self, encoding='utf-8'):
+        with self.locate().open(encoding=encoding) as stream:
+            return stream.read()
+
+    def read_binary(self):
+        with self.locate().open('rb') as stream:
+            return stream.read()
+
+    def locate(self):
+        """Return a path-like object for this path"""
+        return self.dist.locate_file(self)
+
+
+class FileHash:
+    def __init__(self, spec):
+        self.mode, _, self.value = spec.partition('=')
+
+    def __repr__(self):
+        return f''
+
+
+class Distribution(metaclass=abc.ABCMeta):
+    """A Python distribution package."""
+
+    @abc.abstractmethod
+    def read_text(self, filename):
+        """Attempt to load metadata file given by the name.
+
+        :param filename: The name of the file in the distribution info.
+        :return: The text if found, otherwise None.
+        """
+
+    @abc.abstractmethod
+    def locate_file(self, path):
+        """
+        Given a path to a file in this distribution, return a path
+        to it.
+        """
+
+    @classmethod
+    def from_name(cls, name: str):
+        """Return the Distribution for the given package name.
+
+        :param name: The name of the distribution package to search for.
+        :return: The Distribution instance (or subclass thereof) for the named
+            package, if found.
+        :raises PackageNotFoundError: When the named package's distribution
+            metadata cannot be found.
+        :raises ValueError: When an invalid value is supplied for name.
+        """
+        if not name:
+            raise ValueError("A distribution name is required.")
+        try:
+            return next(cls.discover(name=name))
+        except StopIteration:
+            raise PackageNotFoundError(name)
+
+    @classmethod
+    def discover(cls, **kwargs):
+        """Return an iterable of Distribution objects for all packages.
+
+        Pass a ``context`` or pass keyword arguments for constructing
+        a context.
+
+        :context: A ``DistributionFinder.Context`` object.
+        :return: Iterable of Distribution objects for all packages.
+        """
+        context = kwargs.pop('context', None)
+        if context and kwargs:
+            raise ValueError("cannot accept context and kwargs")
+        context = context or DistributionFinder.Context(**kwargs)
+        return itertools.chain.from_iterable(
+            resolver(context) for resolver in cls._discover_resolvers()
+        )
+
+    @staticmethod
+    def at(path):
+        """Return a Distribution for the indicated metadata path
+
+        :param path: a string or path-like object
+        :return: a concrete Distribution instance for the path
+        """
+        return PathDistribution(pathlib.Path(path))
+
+    @staticmethod
+    def _discover_resolvers():
+        """Search the meta_path for resolvers."""
+        declared = (
+            getattr(finder, 'find_distributions', None) for finder in sys.meta_path
+        )
+        return filter(None, declared)
+
+    @property
+    def metadata(self) -> _meta.PackageMetadata:
+        """Return the parsed metadata for this Distribution.
+
+        The returned object will have keys that name the various bits of
+        metadata.  See PEP 566 for details.
+        """
+        text = (
+            self.read_text('METADATA')
+            or self.read_text('PKG-INFO')
+            # This last clause is here to support old egg-info files.  Its
+            # effect is to just end up using the PathDistribution's self._path
+            # (which points to the egg-info file) attribute unchanged.
+            or self.read_text('')
+        )
+        return _adapters.Message(email.message_from_string(text))
+
+    @property
+    def name(self):
+        """Return the 'Name' metadata for the distribution package."""
+        return self.metadata['Name']
+
+    @property
+    def _normalized_name(self):
+        """Return a normalized version of the name."""
+        return Prepared.normalize(self.name)
+
+    @property
+    def version(self):
+        """Return the 'Version' metadata for the distribution package."""
+        return self.metadata['Version']
+
+    @property
+    def entry_points(self):
+        return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self)
+
+    @property
+    def files(self):
+        """Files in this distribution.
+
+        :return: List of PackagePath for this distribution or None
+
+        Result is `None` if the metadata file that enumerates files
+        (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
+        missing.
+        Result may be empty if the metadata exists but is empty.
+        """
+
+        def make_file(name, hash=None, size_str=None):
+            result = PackagePath(name)
+            result.hash = FileHash(hash) if hash else None
+            result.size = int(size_str) if size_str else None
+            result.dist = self
+            return result
+
+        @pass_none
+        def make_files(lines):
+            return list(starmap(make_file, csv.reader(lines)))
+
+        return make_files(self._read_files_distinfo() or self._read_files_egginfo())
+
+    def _read_files_distinfo(self):
+        """
+        Read the lines of RECORD
+        """
+        text = self.read_text('RECORD')
+        return text and text.splitlines()
+
+    def _read_files_egginfo(self):
+        """
+        SOURCES.txt might contain literal commas, so wrap each line
+        in quotes.
+        """
+        text = self.read_text('SOURCES.txt')
+        return text and map('"{}"'.format, text.splitlines())
+
+    @property
+    def requires(self):
+        """Generated requirements specified for this Distribution"""
+        reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
+        return reqs and list(reqs)
+
+    def _read_dist_info_reqs(self):
+        return self.metadata.get_all('Requires-Dist')
+
+    def _read_egg_info_reqs(self):
+        source = self.read_text('requires.txt')
+        return pass_none(self._deps_from_requires_text)(source)
+
+    @classmethod
+    def _deps_from_requires_text(cls, source):
+        return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source))
+
+    @staticmethod
+    def _convert_egg_info_reqs_to_simple_reqs(sections):
+        """
+        Historically, setuptools would solicit and store 'extra'
+        requirements, including those with environment markers,
+        in separate sections. More modern tools expect each
+        dependency to be defined separately, with any relevant
+        extras and environment markers attached directly to that
+        requirement. This method converts the former to the
+        latter. See _test_deps_from_requires_text for an example.
+        """
+
+        def make_condition(name):
+            return name and f'extra == "{name}"'
+
+        def quoted_marker(section):
+            section = section or ''
+            extra, sep, markers = section.partition(':')
+            if extra and markers:
+                markers = f'({markers})'
+            conditions = list(filter(None, [markers, make_condition(extra)]))
+            return '; ' + ' and '.join(conditions) if conditions else ''
+
+        def url_req_space(req):
+            """
+            PEP 508 requires a space between the url_spec and the quoted_marker.
+            Ref python/importlib_metadata#357.
+            """
+            # '@' is uniquely indicative of a url_req.
+            return ' ' * ('@' in req)
+
+        for section in sections:
+            space = url_req_space(section.value)
+            yield section.value + space + quoted_marker(section.name)
+
+
+class DistributionFinder(MetaPathFinder):
+    """
+    A MetaPathFinder capable of discovering installed distributions.
+    """
+
+    class Context:
+        """
+        Keyword arguments presented by the caller to
+        ``distributions()`` or ``Distribution.discover()``
+        to narrow the scope of a search for distributions
+        in all DistributionFinders.
+
+        Each DistributionFinder may expect any parameters
+        and should attempt to honor the canonical
+        parameters defined below when appropriate.
+        """
+
+        name = None
+        """
+        Specific name for which a distribution finder should match.
+        A name of ``None`` matches all distributions.
+        """
+
+        def __init__(self, **kwargs):
+            vars(self).update(kwargs)
+
+        @property
+        def path(self):
+            """
+            The sequence of directory path that a distribution finder
+            should search.
+
+            Typically refers to Python installed package paths such as
+            "site-packages" directories and defaults to ``sys.path``.
+            """
+            return vars(self).get('path', sys.path)
+
+    @abc.abstractmethod
+    def find_distributions(self, context=Context()):
+        """
+        Find distributions.
+
+        Return an iterable of all Distribution instances capable of
+        loading the metadata for packages matching the ``context``,
+        a DistributionFinder.Context instance.
+        """
+
+
+class FastPath:
+    """
+    Micro-optimized class for searching a path for
+    children.
+
+    >>> FastPath('').children()
+    ['...']
+    """
+
+    @functools.lru_cache()  # type: ignore
+    def __new__(cls, root):
+        return super().__new__(cls)
+
+    def __init__(self, root):
+        self.root = root
+
+    def joinpath(self, child):
+        return pathlib.Path(self.root, child)
+
+    def children(self):
+        with suppress(Exception):
+            return os.listdir(self.root or '.')
+        with suppress(Exception):
+            return self.zip_children()
+        return []
+
+    def zip_children(self):
+        zip_path = zipp.Path(self.root)
+        names = zip_path.root.namelist()
+        self.joinpath = zip_path.joinpath
+
+        return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
+
+    def search(self, name):
+        return self.lookup(self.mtime).search(name)
+
+    @property
+    def mtime(self):
+        with suppress(OSError):
+            return os.stat(self.root).st_mtime
+        self.lookup.cache_clear()
+
+    @method_cache
+    def lookup(self, mtime):
+        return Lookup(self)
+
+
+class Lookup:
+    def __init__(self, path: FastPath):
+        base = os.path.basename(path.root).lower()
+        base_is_egg = base.endswith(".egg")
+        self.infos = FreezableDefaultDict(list)
+        self.eggs = FreezableDefaultDict(list)
+
+        for child in path.children():
+            low = child.lower()
+            if low.endswith((".dist-info", ".egg-info")):
+                # rpartition is faster than splitext and suitable for this purpose.
+                name = low.rpartition(".")[0].partition("-")[0]
+                normalized = Prepared.normalize(name)
+                self.infos[normalized].append(path.joinpath(child))
+            elif base_is_egg and low == "egg-info":
+                name = base.rpartition(".")[0].partition("-")[0]
+                legacy_normalized = Prepared.legacy_normalize(name)
+                self.eggs[legacy_normalized].append(path.joinpath(child))
+
+        self.infos.freeze()
+        self.eggs.freeze()
+
+    def search(self, prepared):
+        infos = (
+            self.infos[prepared.normalized]
+            if prepared
+            else itertools.chain.from_iterable(self.infos.values())
+        )
+        eggs = (
+            self.eggs[prepared.legacy_normalized]
+            if prepared
+            else itertools.chain.from_iterable(self.eggs.values())
+        )
+        return itertools.chain(infos, eggs)
+
+
+class Prepared:
+    """
+    A prepared search for metadata on a possibly-named package.
+    """
+
+    normalized = None
+    legacy_normalized = None
+
+    def __init__(self, name):
+        self.name = name
+        if name is None:
+            return
+        self.normalized = self.normalize(name)
+        self.legacy_normalized = self.legacy_normalize(name)
+
+    @staticmethod
+    def normalize(name):
+        """
+        PEP 503 normalization plus dashes as underscores.
+        """
+        return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
+
+    @staticmethod
+    def legacy_normalize(name):
+        """
+        Normalize the package name as found in the convention in
+        older packaging tools versions and specs.
+        """
+        return name.lower().replace('-', '_')
+
+    def __bool__(self):
+        return bool(self.name)
+
+
+@install
+class MetadataPathFinder(NullFinder, DistributionFinder):
+    """A degenerate finder for distribution packages on the file system.
+
+    This finder supplies only a find_distributions() method for versions
+    of Python that do not have a PathFinder find_distributions().
+    """
+
+    def find_distributions(self, context=DistributionFinder.Context()):
+        """
+        Find distributions.
+
+        Return an iterable of all Distribution instances capable of
+        loading the metadata for packages matching ``context.name``
+        (or all names if ``None`` indicated) along the paths in the list
+        of directories ``context.path``.
+        """
+        found = self._search_paths(context.name, context.path)
+        return map(PathDistribution, found)
+
+    @classmethod
+    def _search_paths(cls, name, paths):
+        """Find metadata directories in paths heuristically."""
+        prepared = Prepared(name)
+        return itertools.chain.from_iterable(
+            path.search(prepared) for path in map(FastPath, paths)
+        )
+
+    def invalidate_caches(cls):
+        FastPath.__new__.cache_clear()
+
+
+class PathDistribution(Distribution):
+    def __init__(self, path: SimplePath):
+        """Construct a distribution.
+
+        :param path: SimplePath indicating the metadata directory.
+        """
+        self._path = path
+
+    def read_text(self, filename):
+        with suppress(
+            FileNotFoundError,
+            IsADirectoryError,
+            KeyError,
+            NotADirectoryError,
+            PermissionError,
+        ):
+            return self._path.joinpath(filename).read_text(encoding='utf-8')
+
+    read_text.__doc__ = Distribution.read_text.__doc__
+
+    def locate_file(self, path):
+        return self._path.parent / path
+
+    @property
+    def _normalized_name(self):
+        """
+        Performance optimization: where possible, resolve the
+        normalized name from the file system path.
+        """
+        stem = os.path.basename(str(self._path))
+        return (
+            pass_none(Prepared.normalize)(self._name_from_stem(stem))
+            or super()._normalized_name
+        )
+
+    @staticmethod
+    def _name_from_stem(stem):
+        """
+        >>> PathDistribution._name_from_stem('foo-3.0.egg-info')
+        'foo'
+        >>> PathDistribution._name_from_stem('CherryPy-3.0.dist-info')
+        'CherryPy'
+        >>> PathDistribution._name_from_stem('face.egg-info')
+        'face'
+        >>> PathDistribution._name_from_stem('foo.bar')
+        """
+        filename, ext = os.path.splitext(stem)
+        if ext not in ('.dist-info', '.egg-info'):
+            return
+        name, sep, rest = filename.partition('-')
+        return name
+
+
+def distribution(distribution_name):
+    """Get the ``Distribution`` instance for the named package.
+
+    :param distribution_name: The name of the distribution package as a string.
+    :return: A ``Distribution`` instance (or subclass thereof).
+    """
+    return Distribution.from_name(distribution_name)
+
+
+def distributions(**kwargs):
+    """Get all ``Distribution`` instances in the current environment.
+
+    :return: An iterable of ``Distribution`` instances.
+    """
+    return Distribution.discover(**kwargs)
+
+
+def metadata(distribution_name) -> _meta.PackageMetadata:
+    """Get the metadata for the named package.
+
+    :param distribution_name: The name of the distribution package to query.
+    :return: A PackageMetadata containing the parsed metadata.
+    """
+    return Distribution.from_name(distribution_name).metadata
+
+
+def version(distribution_name):
+    """Get the version string for the named package.
+
+    :param distribution_name: The name of the distribution package to query.
+    :return: The version string for the package as defined in the package's
+        "Version" metadata key.
+    """
+    return distribution(distribution_name).version
+
+
+_unique = functools.partial(
+    unique_everseen,
+    key=_py39compat.normalized_name,
+)
+"""
+Wrapper for ``distributions`` to return unique distributions by name.
+"""
+
+
+def entry_points(**params) -> EntryPoints:
+    """Return EntryPoint objects for all installed packages.
+
+    Pass selection parameters (group or name) to filter the
+    result to entry points matching those properties (see
+    EntryPoints.select()).
+
+    :return: EntryPoints for all installed packages.
+    """
+    eps = itertools.chain.from_iterable(
+        dist.entry_points for dist in _unique(distributions())
+    )
+    return EntryPoints(eps).select(**params)
+
+
+def files(distribution_name):
+    """Return a list of files for the named package.
+
+    :param distribution_name: The name of the distribution package to query.
+    :return: List of files composing the distribution.
+    """
+    return distribution(distribution_name).files
+
+
+def requires(distribution_name):
+    """
+    Return a list of requirements for the named package.
+
+    :return: An iterator of requirements, suitable for
+        packaging.requirement.Requirement.
+    """
+    return distribution(distribution_name).requires
+
+
+def packages_distributions() -> Mapping[str, List[str]]:
+    """
+    Return a mapping of top-level packages to their
+    distributions.
+
+    >>> import collections.abc
+    >>> pkgs = packages_distributions()
+    >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values())
+    True
+    """
+    pkg_to_dist = collections.defaultdict(list)
+    for dist in distributions():
+        for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
+            pkg_to_dist[pkg].append(dist.metadata['Name'])
+    return dict(pkg_to_dist)
+
+
+def _top_level_declared(dist):
+    return (dist.read_text('top_level.txt') or '').split()
+
+
+def _top_level_inferred(dist):
+    return {
+        f.parts[0] if len(f.parts) > 1 else f.with_suffix('').name
+        for f in always_iterable(dist.files)
+        if f.suffix == ".py"
+    }
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..166dc891ca5e5ae073a34dfd29623debec664fcf
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_adapters.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_adapters.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cecaebb632280fc03cfd3bdd83445d293855144b
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_adapters.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_collections.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_collections.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..031ce5b5f1e2263690599eb9656f629519e074f0
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_collections.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_compat.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_compat.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68558ad8c5ab4cde3a2cdbbe9caadf58647ba9c5
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_compat.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_functools.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_functools.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab8a16e64554105f37cd71d503f2fb990ba148d6
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_functools.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_itertools.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_itertools.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c12c7a87d19d2c8eedef4b9335747c03960bdfeb
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_itertools.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_meta.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_meta.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a50ca06fdc5a78f78b504657edeef51dbf950a93
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_meta.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_py39compat.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_py39compat.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2457c3cdfd8511cbc0c69a6c4d52607395a96886
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_py39compat.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_text.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_text.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9fb872a4306d5766882e70ffdf78587ae99ac155
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/__pycache__/_text.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_adapters.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_adapters.py
new file mode 100644
index 0000000000000000000000000000000000000000..e33cba5e44d4f7c62a479db7ea215f7a06ad6efa
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_adapters.py
@@ -0,0 +1,90 @@
+import functools
+import warnings
+import re
+import textwrap
+import email.message
+
+from ._text import FoldedCase
+from ._compat import pypy_partial
+
+
+# Do not remove prior to 2024-01-01 or Python 3.14
+_warn = functools.partial(
+    warnings.warn,
+    "Implicit None on return values is deprecated and will raise KeyErrors.",
+    DeprecationWarning,
+    stacklevel=pypy_partial(2),
+)
+
+
+class Message(email.message.Message):
+    multiple_use_keys = set(
+        map(
+            FoldedCase,
+            [
+                'Classifier',
+                'Obsoletes-Dist',
+                'Platform',
+                'Project-URL',
+                'Provides-Dist',
+                'Provides-Extra',
+                'Requires-Dist',
+                'Requires-External',
+                'Supported-Platform',
+                'Dynamic',
+            ],
+        )
+    )
+    """
+    Keys that may be indicated multiple times per PEP 566.
+    """
+
+    def __new__(cls, orig: email.message.Message):
+        res = super().__new__(cls)
+        vars(res).update(vars(orig))
+        return res
+
+    def __init__(self, *args, **kwargs):
+        self._headers = self._repair_headers()
+
+    # suppress spurious error from mypy
+    def __iter__(self):
+        return super().__iter__()
+
+    def __getitem__(self, item):
+        """
+        Warn users that a ``KeyError`` can be expected when a
+        mising key is supplied. Ref python/importlib_metadata#371.
+        """
+        res = super().__getitem__(item)
+        if res is None:
+            _warn()
+        return res
+
+    def _repair_headers(self):
+        def redent(value):
+            "Correct for RFC822 indentation"
+            if not value or '\n' not in value:
+                return value
+            return textwrap.dedent(' ' * 8 + value)
+
+        headers = [(key, redent(value)) for key, value in vars(self)['_headers']]
+        if self._payload:
+            headers.append(('Description', self.get_payload()))
+        return headers
+
+    @property
+    def json(self):
+        """
+        Convert PackageMetadata to a JSON-compatible format
+        per PEP 0566.
+        """
+
+        def transform(key):
+            value = self.get_all(key) if key in self.multiple_use_keys else self[key]
+            if key == 'Keywords':
+                value = re.split(r'\s+', value)
+            tk = key.lower().replace('-', '_')
+            return tk, value
+
+        return dict(map(transform, map(FoldedCase, self)))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_collections.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_collections.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf0954e1a30546d781bf25781ec716ef92a77e32
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_collections.py
@@ -0,0 +1,30 @@
+import collections
+
+
+# from jaraco.collections 3.3
+class FreezableDefaultDict(collections.defaultdict):
+    """
+    Often it is desirable to prevent the mutation of
+    a default dict after its initial construction, such
+    as to prevent mutation during iteration.
+
+    >>> dd = FreezableDefaultDict(list)
+    >>> dd[0].append('1')
+    >>> dd.freeze()
+    >>> dd[1]
+    []
+    >>> len(dd)
+    1
+    """
+
+    def __missing__(self, key):
+        return getattr(self, '_frozen', super().__missing__)(key)
+
+    def freeze(self):
+        self._frozen = lambda key: self.default_factory()
+
+
+class Pair(collections.namedtuple('Pair', 'name value')):
+    @classmethod
+    def parse(cls, text):
+        return cls(*map(str.strip, text.split("=", 1)))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_compat.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..84f9eea4f3c4e588f5358c586275f2ce8a647630
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_compat.py
@@ -0,0 +1,72 @@
+import sys
+import platform
+
+
+__all__ = ['install', 'NullFinder', 'Protocol']
+
+
+try:
+    from typing import Protocol
+except ImportError:  # pragma: no cover
+    # Python 3.7 compatibility
+    from ..typing_extensions import Protocol  # type: ignore
+
+
+def install(cls):
+    """
+    Class decorator for installation on sys.meta_path.
+
+    Adds the backport DistributionFinder to sys.meta_path and
+    attempts to disable the finder functionality of the stdlib
+    DistributionFinder.
+    """
+    sys.meta_path.append(cls())
+    disable_stdlib_finder()
+    return cls
+
+
+def disable_stdlib_finder():
+    """
+    Give the backport primacy for discovering path-based distributions
+    by monkey-patching the stdlib O_O.
+
+    See #91 for more background for rationale on this sketchy
+    behavior.
+    """
+
+    def matches(finder):
+        return getattr(
+            finder, '__module__', None
+        ) == '_frozen_importlib_external' and hasattr(finder, 'find_distributions')
+
+    for finder in filter(matches, sys.meta_path):  # pragma: nocover
+        del finder.find_distributions
+
+
+class NullFinder:
+    """
+    A "Finder" (aka "MetaClassFinder") that never finds any modules,
+    but may find distributions.
+    """
+
+    @staticmethod
+    def find_spec(*args, **kwargs):
+        return None
+
+    # In Python 2, the import system requires finders
+    # to have a find_module() method, but this usage
+    # is deprecated in Python 3 in favor of find_spec().
+    # For the purposes of this finder (i.e. being present
+    # on sys.meta_path but having no other import
+    # system functionality), the two methods are identical.
+    find_module = find_spec
+
+
+def pypy_partial(val):
+    """
+    Adjust for variable stacklevel on partial under PyPy.
+
+    Workaround for #327.
+    """
+    is_pypy = platform.python_implementation() == 'PyPy'
+    return val + is_pypy
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_functools.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_functools.py
new file mode 100644
index 0000000000000000000000000000000000000000..71f66bd03cb713a2190853bdf7170c4ea80d2425
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_functools.py
@@ -0,0 +1,104 @@
+import types
+import functools
+
+
+# from jaraco.functools 3.3
+def method_cache(method, cache_wrapper=None):
+    """
+    Wrap lru_cache to support storing the cache data in the object instances.
+
+    Abstracts the common paradigm where the method explicitly saves an
+    underscore-prefixed protected property on first call and returns that
+    subsequently.
+
+    >>> class MyClass:
+    ...     calls = 0
+    ...
+    ...     @method_cache
+    ...     def method(self, value):
+    ...         self.calls += 1
+    ...         return value
+
+    >>> a = MyClass()
+    >>> a.method(3)
+    3
+    >>> for x in range(75):
+    ...     res = a.method(x)
+    >>> a.calls
+    75
+
+    Note that the apparent behavior will be exactly like that of lru_cache
+    except that the cache is stored on each instance, so values in one
+    instance will not flush values from another, and when an instance is
+    deleted, so are the cached values for that instance.
+
+    >>> b = MyClass()
+    >>> for x in range(35):
+    ...     res = b.method(x)
+    >>> b.calls
+    35
+    >>> a.method(0)
+    0
+    >>> a.calls
+    75
+
+    Note that if method had been decorated with ``functools.lru_cache()``,
+    a.calls would have been 76 (due to the cached value of 0 having been
+    flushed by the 'b' instance).
+
+    Clear the cache with ``.cache_clear()``
+
+    >>> a.method.cache_clear()
+
+    Same for a method that hasn't yet been called.
+
+    >>> c = MyClass()
+    >>> c.method.cache_clear()
+
+    Another cache wrapper may be supplied:
+
+    >>> cache = functools.lru_cache(maxsize=2)
+    >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache)
+    >>> a = MyClass()
+    >>> a.method2()
+    3
+
+    Caution - do not subsequently wrap the method with another decorator, such
+    as ``@property``, which changes the semantics of the function.
+
+    See also
+    http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
+    for another implementation and additional justification.
+    """
+    cache_wrapper = cache_wrapper or functools.lru_cache()
+
+    def wrapper(self, *args, **kwargs):
+        # it's the first call, replace the method with a cached, bound method
+        bound_method = types.MethodType(method, self)
+        cached_method = cache_wrapper(bound_method)
+        setattr(self, method.__name__, cached_method)
+        return cached_method(*args, **kwargs)
+
+    # Support cache clear even before cache has been created.
+    wrapper.cache_clear = lambda: None
+
+    return wrapper
+
+
+# From jaraco.functools 3.3
+def pass_none(func):
+    """
+    Wrap func so it's not called if its first param is None
+
+    >>> print_text = pass_none(print)
+    >>> print_text('text')
+    text
+    >>> print_text(None)
+    """
+
+    @functools.wraps(func)
+    def wrapper(param, *args, **kwargs):
+        if param is not None:
+            return func(param, *args, **kwargs)
+
+    return wrapper
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_itertools.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_itertools.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4ca9b9140e3f085b36609bb8dfdaea79c78e144
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_itertools.py
@@ -0,0 +1,73 @@
+from itertools import filterfalse
+
+
+def unique_everseen(iterable, key=None):
+    "List unique elements, preserving order. Remember all elements ever seen."
+    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+    # unique_everseen('ABBCcAD', str.lower) --> A B C D
+    seen = set()
+    seen_add = seen.add
+    if key is None:
+        for element in filterfalse(seen.__contains__, iterable):
+            seen_add(element)
+            yield element
+    else:
+        for element in iterable:
+            k = key(element)
+            if k not in seen:
+                seen_add(k)
+                yield element
+
+
+# copied from more_itertools 8.8
+def always_iterable(obj, base_type=(str, bytes)):
+    """If *obj* is iterable, return an iterator over its items::
+
+        >>> obj = (1, 2, 3)
+        >>> list(always_iterable(obj))
+        [1, 2, 3]
+
+    If *obj* is not iterable, return a one-item iterable containing *obj*::
+
+        >>> obj = 1
+        >>> list(always_iterable(obj))
+        [1]
+
+    If *obj* is ``None``, return an empty iterable:
+
+        >>> obj = None
+        >>> list(always_iterable(None))
+        []
+
+    By default, binary and text strings are not considered iterable::
+
+        >>> obj = 'foo'
+        >>> list(always_iterable(obj))
+        ['foo']
+
+    If *base_type* is set, objects for which ``isinstance(obj, base_type)``
+    returns ``True`` won't be considered iterable.
+
+        >>> obj = {'a': 1}
+        >>> list(always_iterable(obj))  # Iterate over the dict's keys
+        ['a']
+        >>> list(always_iterable(obj, base_type=dict))  # Treat dicts as a unit
+        [{'a': 1}]
+
+    Set *base_type* to ``None`` to avoid any special handling and treat objects
+    Python considers iterable as iterable:
+
+        >>> obj = 'foo'
+        >>> list(always_iterable(obj, base_type=None))
+        ['f', 'o', 'o']
+    """
+    if obj is None:
+        return iter(())
+
+    if (base_type is not None) and isinstance(obj, base_type):
+        return iter((obj,))
+
+    try:
+        return iter(obj)
+    except TypeError:
+        return iter((obj,))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_meta.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_meta.py
new file mode 100644
index 0000000000000000000000000000000000000000..259b15ba194db7c02fbcbf170d522230b4418933
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_meta.py
@@ -0,0 +1,49 @@
+from ._compat import Protocol
+from typing import Any, Dict, Iterator, List, TypeVar, Union
+
+
+_T = TypeVar("_T")
+
+
+class PackageMetadata(Protocol):
+    def __len__(self) -> int:
+        ...  # pragma: no cover
+
+    def __contains__(self, item: str) -> bool:
+        ...  # pragma: no cover
+
+    def __getitem__(self, key: str) -> str:
+        ...  # pragma: no cover
+
+    def __iter__(self) -> Iterator[str]:
+        ...  # pragma: no cover
+
+    def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
+        """
+        Return all values associated with a possibly multi-valued key.
+        """
+
+    @property
+    def json(self) -> Dict[str, Union[str, List[str]]]:
+        """
+        A JSON-compatible form of the metadata.
+        """
+
+
+class SimplePath(Protocol[_T]):
+    """
+    A minimal subset of pathlib.Path required by PathDistribution.
+    """
+
+    def joinpath(self) -> _T:
+        ...  # pragma: no cover
+
+    def __truediv__(self, other: Union[str, _T]) -> _T:
+        ...  # pragma: no cover
+
+    @property
+    def parent(self) -> _T:
+        ...  # pragma: no cover
+
+    def read_text(self) -> str:
+        ...  # pragma: no cover
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_py39compat.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_py39compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..cde4558fbbeb938de8a5aa8ee165465c23180ffb
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_py39compat.py
@@ -0,0 +1,35 @@
+"""
+Compatibility layer with Python 3.8/3.9
+"""
+from typing import TYPE_CHECKING, Any, Optional
+
+if TYPE_CHECKING:  # pragma: no cover
+    # Prevent circular imports on runtime.
+    from . import Distribution, EntryPoint
+else:
+    Distribution = EntryPoint = Any
+
+
+def normalized_name(dist: Distribution) -> Optional[str]:
+    """
+    Honor name normalization for distributions that don't provide ``_normalized_name``.
+    """
+    try:
+        return dist._normalized_name
+    except AttributeError:
+        from . import Prepared  # -> delay to prevent circular imports.
+
+        return Prepared.normalize(getattr(dist, "name", None) or dist.metadata['Name'])
+
+
+def ep_matches(ep: EntryPoint, **params) -> bool:
+    """
+    Workaround for ``EntryPoint`` objects without the ``matches`` method.
+    """
+    try:
+        return ep.matches(**params)
+    except AttributeError:
+        from . import EntryPoint  # -> delay to prevent circular imports.
+
+        # Reconstruct the EntryPoint object to make sure it is compatible.
+        return EntryPoint(ep.name, ep.value, ep.group).matches(**params)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_text.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_text.py
new file mode 100644
index 0000000000000000000000000000000000000000..c88cfbb2349c6401336bc5ba6623f51afd1eb59d
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/_text.py
@@ -0,0 +1,99 @@
+import re
+
+from ._functools import method_cache
+
+
+# from jaraco.text 3.5
+class FoldedCase(str):
+    """
+    A case insensitive string class; behaves just like str
+    except compares equal when the only variation is case.
+
+    >>> s = FoldedCase('hello world')
+
+    >>> s == 'Hello World'
+    True
+
+    >>> 'Hello World' == s
+    True
+
+    >>> s != 'Hello World'
+    False
+
+    >>> s.index('O')
+    4
+
+    >>> s.split('O')
+    ['hell', ' w', 'rld']
+
+    >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
+    ['alpha', 'Beta', 'GAMMA']
+
+    Sequence membership is straightforward.
+
+    >>> "Hello World" in [s]
+    True
+    >>> s in ["Hello World"]
+    True
+
+    You may test for set inclusion, but candidate and elements
+    must both be folded.
+
+    >>> FoldedCase("Hello World") in {s}
+    True
+    >>> s in {FoldedCase("Hello World")}
+    True
+
+    String inclusion works as long as the FoldedCase object
+    is on the right.
+
+    >>> "hello" in FoldedCase("Hello World")
+    True
+
+    But not if the FoldedCase object is on the left:
+
+    >>> FoldedCase('hello') in 'Hello World'
+    False
+
+    In that case, use in_:
+
+    >>> FoldedCase('hello').in_('Hello World')
+    True
+
+    >>> FoldedCase('hello') > FoldedCase('Hello')
+    False
+    """
+
+    def __lt__(self, other):
+        return self.lower() < other.lower()
+
+    def __gt__(self, other):
+        return self.lower() > other.lower()
+
+    def __eq__(self, other):
+        return self.lower() == other.lower()
+
+    def __ne__(self, other):
+        return self.lower() != other.lower()
+
+    def __hash__(self):
+        return hash(self.lower())
+
+    def __contains__(self, other):
+        return super().lower().__contains__(other.lower())
+
+    def in_(self, other):
+        "Does self appear in other?"
+        return self in FoldedCase(other)
+
+    # cache lower since it's likely to be called frequently.
+    @method_cache
+    def lower(self):
+        return super().lower()
+
+    def index(self, sub):
+        return self.lower().index(sub.lower())
+
+    def split(self, splitter=' ', maxsplit=0):
+        pattern = re.compile(re.escape(splitter), re.I)
+        return pattern.split(self, maxsplit)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_metadata/py.typed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..34e3a9950cc557879af8d797f9382b18a870fb56
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__init__.py
@@ -0,0 +1,36 @@
+"""Read resources contained within a package."""
+
+from ._common import (
+    as_file,
+    files,
+    Package,
+)
+
+from ._legacy import (
+    contents,
+    open_binary,
+    read_binary,
+    open_text,
+    read_text,
+    is_resource,
+    path,
+    Resource,
+)
+
+from .abc import ResourceReader
+
+
+__all__ = [
+    'Package',
+    'Resource',
+    'ResourceReader',
+    'as_file',
+    'contents',
+    'files',
+    'is_resource',
+    'open_binary',
+    'open_text',
+    'path',
+    'read_binary',
+    'read_text',
+]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..18b07c954fd475befc462521b5d7f916ce562f66
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_adapters.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_adapters.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e1d39576d908947927c99b5b79909eab7178dce2
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_adapters.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_common.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_common.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..01e6fb8269817a14dc4c8c68953d8676eeb2732e
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_common.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_compat.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_compat.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1c6e5c1acfd20561077545460e31034a9466b2cc
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_compat.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_itertools.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_itertools.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8664a50200d2f3014f0fa7bc7f9a693f21de180a
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_itertools.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_legacy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_legacy.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b9a160fa00eea0f1233ad459af0aa0fecc0dea7
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/_legacy.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/abc.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/abc.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..156dd997269a9df034806038ec5db4a4e4525acd
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/abc.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/readers.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/readers.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4c540441361a1fe4a8068ac1848ddb22742fbcf4
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/readers.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/simple.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/simple.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2c179602cb9324424452cc9c088639e8d199d0f6
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/__pycache__/simple.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_adapters.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_adapters.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea363d86a564b5450666aa00aecd46353326a75a
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_adapters.py
@@ -0,0 +1,170 @@
+from contextlib import suppress
+from io import TextIOWrapper
+
+from . import abc
+
+
+class SpecLoaderAdapter:
+    """
+    Adapt a package spec to adapt the underlying loader.
+    """
+
+    def __init__(self, spec, adapter=lambda spec: spec.loader):
+        self.spec = spec
+        self.loader = adapter(spec)
+
+    def __getattr__(self, name):
+        return getattr(self.spec, name)
+
+
+class TraversableResourcesLoader:
+    """
+    Adapt a loader to provide TraversableResources.
+    """
+
+    def __init__(self, spec):
+        self.spec = spec
+
+    def get_resource_reader(self, name):
+        return CompatibilityFiles(self.spec)._native()
+
+
+def _io_wrapper(file, mode='r', *args, **kwargs):
+    if mode == 'r':
+        return TextIOWrapper(file, *args, **kwargs)
+    elif mode == 'rb':
+        return file
+    raise ValueError(
+        "Invalid mode value '{}', only 'r' and 'rb' are supported".format(mode)
+    )
+
+
+class CompatibilityFiles:
+    """
+    Adapter for an existing or non-existent resource reader
+    to provide a compatibility .files().
+    """
+
+    class SpecPath(abc.Traversable):
+        """
+        Path tied to a module spec.
+        Can be read and exposes the resource reader children.
+        """
+
+        def __init__(self, spec, reader):
+            self._spec = spec
+            self._reader = reader
+
+        def iterdir(self):
+            if not self._reader:
+                return iter(())
+            return iter(
+                CompatibilityFiles.ChildPath(self._reader, path)
+                for path in self._reader.contents()
+            )
+
+        def is_file(self):
+            return False
+
+        is_dir = is_file
+
+        def joinpath(self, other):
+            if not self._reader:
+                return CompatibilityFiles.OrphanPath(other)
+            return CompatibilityFiles.ChildPath(self._reader, other)
+
+        @property
+        def name(self):
+            return self._spec.name
+
+        def open(self, mode='r', *args, **kwargs):
+            return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs)
+
+    class ChildPath(abc.Traversable):
+        """
+        Path tied to a resource reader child.
+        Can be read but doesn't expose any meaningful children.
+        """
+
+        def __init__(self, reader, name):
+            self._reader = reader
+            self._name = name
+
+        def iterdir(self):
+            return iter(())
+
+        def is_file(self):
+            return self._reader.is_resource(self.name)
+
+        def is_dir(self):
+            return not self.is_file()
+
+        def joinpath(self, other):
+            return CompatibilityFiles.OrphanPath(self.name, other)
+
+        @property
+        def name(self):
+            return self._name
+
+        def open(self, mode='r', *args, **kwargs):
+            return _io_wrapper(
+                self._reader.open_resource(self.name), mode, *args, **kwargs
+            )
+
+    class OrphanPath(abc.Traversable):
+        """
+        Orphan path, not tied to a module spec or resource reader.
+        Can't be read and doesn't expose any meaningful children.
+        """
+
+        def __init__(self, *path_parts):
+            if len(path_parts) < 1:
+                raise ValueError('Need at least one path part to construct a path')
+            self._path = path_parts
+
+        def iterdir(self):
+            return iter(())
+
+        def is_file(self):
+            return False
+
+        is_dir = is_file
+
+        def joinpath(self, other):
+            return CompatibilityFiles.OrphanPath(*self._path, other)
+
+        @property
+        def name(self):
+            return self._path[-1]
+
+        def open(self, mode='r', *args, **kwargs):
+            raise FileNotFoundError("Can't open orphan path")
+
+    def __init__(self, spec):
+        self.spec = spec
+
+    @property
+    def _reader(self):
+        with suppress(AttributeError):
+            return self.spec.loader.get_resource_reader(self.spec.name)
+
+    def _native(self):
+        """
+        Return the native reader if it supports files().
+        """
+        reader = self._reader
+        return reader if hasattr(reader, 'files') else self
+
+    def __getattr__(self, attr):
+        return getattr(self._reader, attr)
+
+    def files(self):
+        return CompatibilityFiles.SpecPath(self.spec, self._reader)
+
+
+def wrap_spec(package):
+    """
+    Construct a package spec with traversable compatibility
+    on the spec/loader/reader.
+    """
+    return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_common.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_common.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c6de1cfb2e7b8f4ae95100589c4eaa84fb99926
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_common.py
@@ -0,0 +1,207 @@
+import os
+import pathlib
+import tempfile
+import functools
+import contextlib
+import types
+import importlib
+import inspect
+import warnings
+import itertools
+
+from typing import Union, Optional, cast
+from .abc import ResourceReader, Traversable
+
+from ._compat import wrap_spec
+
+Package = Union[types.ModuleType, str]
+Anchor = Package
+
+
+def package_to_anchor(func):
+    """
+    Replace 'package' parameter as 'anchor' and warn about the change.
+
+    Other errors should fall through.
+
+    >>> files('a', 'b')
+    Traceback (most recent call last):
+    TypeError: files() takes from 0 to 1 positional arguments but 2 were given
+    """
+    undefined = object()
+
+    @functools.wraps(func)
+    def wrapper(anchor=undefined, package=undefined):
+        if package is not undefined:
+            if anchor is not undefined:
+                return func(anchor, package)
+            warnings.warn(
+                "First parameter to files is renamed to 'anchor'",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            return func(package)
+        elif anchor is undefined:
+            return func()
+        return func(anchor)
+
+    return wrapper
+
+
+@package_to_anchor
+def files(anchor: Optional[Anchor] = None) -> Traversable:
+    """
+    Get a Traversable resource for an anchor.
+    """
+    return from_package(resolve(anchor))
+
+
+def get_resource_reader(package: types.ModuleType) -> Optional[ResourceReader]:
+    """
+    Return the package's loader if it's a ResourceReader.
+    """
+    # We can't use
+    # a issubclass() check here because apparently abc.'s __subclasscheck__()
+    # hook wants to create a weak reference to the object, but
+    # zipimport.zipimporter does not support weak references, resulting in a
+    # TypeError.  That seems terrible.
+    spec = package.__spec__
+    reader = getattr(spec.loader, 'get_resource_reader', None)  # type: ignore
+    if reader is None:
+        return None
+    return reader(spec.name)  # type: ignore
+
+
+@functools.singledispatch
+def resolve(cand: Optional[Anchor]) -> types.ModuleType:
+    return cast(types.ModuleType, cand)
+
+
+@resolve.register
+def _(cand: str) -> types.ModuleType:
+    return importlib.import_module(cand)
+
+
+@resolve.register
+def _(cand: None) -> types.ModuleType:
+    return resolve(_infer_caller().f_globals['__name__'])
+
+
+def _infer_caller():
+    """
+    Walk the stack and find the frame of the first caller not in this module.
+    """
+
+    def is_this_file(frame_info):
+        return frame_info.filename == __file__
+
+    def is_wrapper(frame_info):
+        return frame_info.function == 'wrapper'
+
+    not_this_file = itertools.filterfalse(is_this_file, inspect.stack())
+    # also exclude 'wrapper' due to singledispatch in the call stack
+    callers = itertools.filterfalse(is_wrapper, not_this_file)
+    return next(callers).frame
+
+
+def from_package(package: types.ModuleType):
+    """
+    Return a Traversable object for the given package.
+
+    """
+    spec = wrap_spec(package)
+    reader = spec.loader.get_resource_reader(spec.name)
+    return reader.files()
+
+
+@contextlib.contextmanager
+def _tempfile(
+    reader,
+    suffix='',
+    # gh-93353: Keep a reference to call os.remove() in late Python
+    # finalization.
+    *,
+    _os_remove=os.remove,
+):
+    # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try'
+    # blocks due to the need to close the temporary file to work on Windows
+    # properly.
+    fd, raw_path = tempfile.mkstemp(suffix=suffix)
+    try:
+        try:
+            os.write(fd, reader())
+        finally:
+            os.close(fd)
+        del reader
+        yield pathlib.Path(raw_path)
+    finally:
+        try:
+            _os_remove(raw_path)
+        except FileNotFoundError:
+            pass
+
+
+def _temp_file(path):
+    return _tempfile(path.read_bytes, suffix=path.name)
+
+
+def _is_present_dir(path: Traversable) -> bool:
+    """
+    Some Traversables implement ``is_dir()`` to raise an
+    exception (i.e. ``FileNotFoundError``) when the
+    directory doesn't exist. This function wraps that call
+    to always return a boolean and only return True
+    if there's a dir and it exists.
+    """
+    with contextlib.suppress(FileNotFoundError):
+        return path.is_dir()
+    return False
+
+
+@functools.singledispatch
+def as_file(path):
+    """
+    Given a Traversable object, return that object as a
+    path on the local file system in a context manager.
+    """
+    return _temp_dir(path) if _is_present_dir(path) else _temp_file(path)
+
+
+@as_file.register(pathlib.Path)
+@contextlib.contextmanager
+def _(path):
+    """
+    Degenerate behavior for pathlib.Path objects.
+    """
+    yield path
+
+
+@contextlib.contextmanager
+def _temp_path(dir: tempfile.TemporaryDirectory):
+    """
+    Wrap tempfile.TemporyDirectory to return a pathlib object.
+    """
+    with dir as result:
+        yield pathlib.Path(result)
+
+
+@contextlib.contextmanager
+def _temp_dir(path):
+    """
+    Given a traversable dir, recursively replicate the whole tree
+    to the file system in a context manager.
+    """
+    assert path.is_dir()
+    with _temp_path(tempfile.TemporaryDirectory()) as temp_dir:
+        yield _write_contents(temp_dir, path)
+
+
+def _write_contents(target, source):
+    child = target.joinpath(source.name)
+    if source.is_dir():
+        child.mkdir()
+        for item in source.iterdir():
+            _write_contents(child, item)
+    else:
+        child.write_bytes(source.read_bytes())
+    return child
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_compat.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b5b1d280f3c7b45cee54338f60d5271a7510c2e
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_compat.py
@@ -0,0 +1,108 @@
+# flake8: noqa
+
+import abc
+import os
+import sys
+import pathlib
+from contextlib import suppress
+from typing import Union
+
+
+if sys.version_info >= (3, 10):
+    from zipfile import Path as ZipPath  # type: ignore
+else:
+    from ..zipp import Path as ZipPath  # type: ignore
+
+
+try:
+    from typing import runtime_checkable  # type: ignore
+except ImportError:
+
+    def runtime_checkable(cls):  # type: ignore
+        return cls
+
+
+try:
+    from typing import Protocol  # type: ignore
+except ImportError:
+    Protocol = abc.ABC  # type: ignore
+
+
+class TraversableResourcesLoader:
+    """
+    Adapt loaders to provide TraversableResources and other
+    compatibility.
+
+    Used primarily for Python 3.9 and earlier where the native
+    loaders do not yet implement TraversableResources.
+    """
+
+    def __init__(self, spec):
+        self.spec = spec
+
+    @property
+    def path(self):
+        return self.spec.origin
+
+    def get_resource_reader(self, name):
+        from . import readers, _adapters
+
+        def _zip_reader(spec):
+            with suppress(AttributeError):
+                return readers.ZipReader(spec.loader, spec.name)
+
+        def _namespace_reader(spec):
+            with suppress(AttributeError, ValueError):
+                return readers.NamespaceReader(spec.submodule_search_locations)
+
+        def _available_reader(spec):
+            with suppress(AttributeError):
+                return spec.loader.get_resource_reader(spec.name)
+
+        def _native_reader(spec):
+            reader = _available_reader(spec)
+            return reader if hasattr(reader, 'files') else None
+
+        def _file_reader(spec):
+            try:
+                path = pathlib.Path(self.path)
+            except TypeError:
+                return None
+            if path.exists():
+                return readers.FileReader(self)
+
+        return (
+            # native reader if it supplies 'files'
+            _native_reader(self.spec)
+            or
+            # local ZipReader if a zip module
+            _zip_reader(self.spec)
+            or
+            # local NamespaceReader if a namespace module
+            _namespace_reader(self.spec)
+            or
+            # local FileReader
+            _file_reader(self.spec)
+            # fallback - adapt the spec ResourceReader to TraversableReader
+            or _adapters.CompatibilityFiles(self.spec)
+        )
+
+
+def wrap_spec(package):
+    """
+    Construct a package spec with traversable compatibility
+    on the spec/loader/reader.
+
+    Supersedes _adapters.wrap_spec to use TraversableResourcesLoader
+    from above for older Python compatibility (<3.10).
+    """
+    from . import _adapters
+
+    return _adapters.SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader)
+
+
+if sys.version_info >= (3, 9):
+    StrPath = Union[str, os.PathLike[str]]
+else:
+    # PathLike is only subscriptable at runtime in 3.9+
+    StrPath = Union[str, "os.PathLike[str]"]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_itertools.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_itertools.py
new file mode 100644
index 0000000000000000000000000000000000000000..cce05582ffc6fe6d72027194f4ccc44ee42f1fcd
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_itertools.py
@@ -0,0 +1,35 @@
+from itertools import filterfalse
+
+from typing import (
+    Callable,
+    Iterable,
+    Iterator,
+    Optional,
+    Set,
+    TypeVar,
+    Union,
+)
+
+# Type and type variable definitions
+_T = TypeVar('_T')
+_U = TypeVar('_U')
+
+
+def unique_everseen(
+    iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = None
+) -> Iterator[_T]:
+    "List unique elements, preserving order. Remember all elements ever seen."
+    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+    # unique_everseen('ABBCcAD', str.lower) --> A B C D
+    seen: Set[Union[_T, _U]] = set()
+    seen_add = seen.add
+    if key is None:
+        for element in filterfalse(seen.__contains__, iterable):
+            seen_add(element)
+            yield element
+    else:
+        for element in iterable:
+            k = key(element)
+            if k not in seen:
+                seen_add(k)
+                yield element
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_legacy.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_legacy.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1ea8105dad6e27eefd5a34f64dfee974a5c4f71
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/_legacy.py
@@ -0,0 +1,120 @@
+import functools
+import os
+import pathlib
+import types
+import warnings
+
+from typing import Union, Iterable, ContextManager, BinaryIO, TextIO, Any
+
+from . import _common
+
+Package = Union[types.ModuleType, str]
+Resource = str
+
+
+def deprecated(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        warnings.warn(
+            f"{func.__name__} is deprecated. Use files() instead. "
+            "Refer to https://importlib-resources.readthedocs.io"
+            "/en/latest/using.html#migrating-from-legacy for migration advice.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+def normalize_path(path: Any) -> str:
+    """Normalize a path by ensuring it is a string.
+
+    If the resulting string contains path separators, an exception is raised.
+    """
+    str_path = str(path)
+    parent, file_name = os.path.split(str_path)
+    if parent:
+        raise ValueError(f'{path!r} must be only a file name')
+    return file_name
+
+
+@deprecated
+def open_binary(package: Package, resource: Resource) -> BinaryIO:
+    """Return a file-like object opened for binary reading of the resource."""
+    return (_common.files(package) / normalize_path(resource)).open('rb')
+
+
+@deprecated
+def read_binary(package: Package, resource: Resource) -> bytes:
+    """Return the binary contents of the resource."""
+    return (_common.files(package) / normalize_path(resource)).read_bytes()
+
+
+@deprecated
+def open_text(
+    package: Package,
+    resource: Resource,
+    encoding: str = 'utf-8',
+    errors: str = 'strict',
+) -> TextIO:
+    """Return a file-like object opened for text reading of the resource."""
+    return (_common.files(package) / normalize_path(resource)).open(
+        'r', encoding=encoding, errors=errors
+    )
+
+
+@deprecated
+def read_text(
+    package: Package,
+    resource: Resource,
+    encoding: str = 'utf-8',
+    errors: str = 'strict',
+) -> str:
+    """Return the decoded string of the resource.
+
+    The decoding-related arguments have the same semantics as those of
+    bytes.decode().
+    """
+    with open_text(package, resource, encoding, errors) as fp:
+        return fp.read()
+
+
+@deprecated
+def contents(package: Package) -> Iterable[str]:
+    """Return an iterable of entries in `package`.
+
+    Note that not all entries are resources.  Specifically, directories are
+    not considered resources.  Use `is_resource()` on each entry returned here
+    to check if it is a resource or not.
+    """
+    return [path.name for path in _common.files(package).iterdir()]
+
+
+@deprecated
+def is_resource(package: Package, name: str) -> bool:
+    """True if `name` is a resource inside `package`.
+
+    Directories are *not* resources.
+    """
+    resource = normalize_path(name)
+    return any(
+        traversable.name == resource and traversable.is_file()
+        for traversable in _common.files(package).iterdir()
+    )
+
+
+@deprecated
+def path(
+    package: Package,
+    resource: Resource,
+) -> ContextManager[pathlib.Path]:
+    """A context manager providing a file path object to the resource.
+
+    If the resource does not already exist on its own on the file system,
+    a temporary file will be created. If the file was created, the file
+    will be deleted upon exiting the context manager (no exception is
+    raised if the file was deleted prior to the context manager
+    exiting).
+    """
+    return _common.as_file(_common.files(package) / normalize_path(resource))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/abc.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/abc.py
new file mode 100644
index 0000000000000000000000000000000000000000..23b6aeafe4f43d097734e186907232513ad27a3c
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/abc.py
@@ -0,0 +1,170 @@
+import abc
+import io
+import itertools
+import pathlib
+from typing import Any, BinaryIO, Iterable, Iterator, NoReturn, Text, Optional
+
+from ._compat import runtime_checkable, Protocol, StrPath
+
+
+__all__ = ["ResourceReader", "Traversable", "TraversableResources"]
+
+
+class ResourceReader(metaclass=abc.ABCMeta):
+    """Abstract base class for loaders to provide resource reading support."""
+
+    @abc.abstractmethod
+    def open_resource(self, resource: Text) -> BinaryIO:
+        """Return an opened, file-like object for binary reading.
+
+        The 'resource' argument is expected to represent only a file name.
+        If the resource cannot be found, FileNotFoundError is raised.
+        """
+        # This deliberately raises FileNotFoundError instead of
+        # NotImplementedError so that if this method is accidentally called,
+        # it'll still do the right thing.
+        raise FileNotFoundError
+
+    @abc.abstractmethod
+    def resource_path(self, resource: Text) -> Text:
+        """Return the file system path to the specified resource.
+
+        The 'resource' argument is expected to represent only a file name.
+        If the resource does not exist on the file system, raise
+        FileNotFoundError.
+        """
+        # This deliberately raises FileNotFoundError instead of
+        # NotImplementedError so that if this method is accidentally called,
+        # it'll still do the right thing.
+        raise FileNotFoundError
+
+    @abc.abstractmethod
+    def is_resource(self, path: Text) -> bool:
+        """Return True if the named 'path' is a resource.
+
+        Files are resources, directories are not.
+        """
+        raise FileNotFoundError
+
+    @abc.abstractmethod
+    def contents(self) -> Iterable[str]:
+        """Return an iterable of entries in `package`."""
+        raise FileNotFoundError
+
+
+class TraversalError(Exception):
+    pass
+
+
+@runtime_checkable
+class Traversable(Protocol):
+    """
+    An object with a subset of pathlib.Path methods suitable for
+    traversing directories and opening files.
+
+    Any exceptions that occur when accessing the backing resource
+    may propagate unaltered.
+    """
+
+    @abc.abstractmethod
+    def iterdir(self) -> Iterator["Traversable"]:
+        """
+        Yield Traversable objects in self
+        """
+
+    def read_bytes(self) -> bytes:
+        """
+        Read contents of self as bytes
+        """
+        with self.open('rb') as strm:
+            return strm.read()
+
+    def read_text(self, encoding: Optional[str] = None) -> str:
+        """
+        Read contents of self as text
+        """
+        with self.open(encoding=encoding) as strm:
+            return strm.read()
+
+    @abc.abstractmethod
+    def is_dir(self) -> bool:
+        """
+        Return True if self is a directory
+        """
+
+    @abc.abstractmethod
+    def is_file(self) -> bool:
+        """
+        Return True if self is a file
+        """
+
+    def joinpath(self, *descendants: StrPath) -> "Traversable":
+        """
+        Return Traversable resolved with any descendants applied.
+
+        Each descendant should be a path segment relative to self
+        and each may contain multiple levels separated by
+        ``posixpath.sep`` (``/``).
+        """
+        if not descendants:
+            return self
+        names = itertools.chain.from_iterable(
+            path.parts for path in map(pathlib.PurePosixPath, descendants)
+        )
+        target = next(names)
+        matches = (
+            traversable for traversable in self.iterdir() if traversable.name == target
+        )
+        try:
+            match = next(matches)
+        except StopIteration:
+            raise TraversalError(
+                "Target not found during traversal.", target, list(names)
+            )
+        return match.joinpath(*names)
+
+    def __truediv__(self, child: StrPath) -> "Traversable":
+        """
+        Return Traversable child in self
+        """
+        return self.joinpath(child)
+
+    @abc.abstractmethod
+    def open(self, mode='r', *args, **kwargs):
+        """
+        mode may be 'r' or 'rb' to open as text or binary. Return a handle
+        suitable for reading (same as pathlib.Path.open).
+
+        When opening as text, accepts encoding parameters such as those
+        accepted by io.TextIOWrapper.
+        """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """
+        The base name of this object without any parent references.
+        """
+
+
+class TraversableResources(ResourceReader):
+    """
+    The required interface for providing traversable
+    resources.
+    """
+
+    @abc.abstractmethod
+    def files(self) -> "Traversable":
+        """Return a Traversable object for the loaded package."""
+
+    def open_resource(self, resource: StrPath) -> io.BufferedReader:
+        return self.files().joinpath(resource).open('rb')
+
+    def resource_path(self, resource: Any) -> NoReturn:
+        raise FileNotFoundError(resource)
+
+    def is_resource(self, path: StrPath) -> bool:
+        return self.files().joinpath(path).is_file()
+
+    def contents(self) -> Iterator[str]:
+        return (item.name for item in self.files().iterdir())
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/py.typed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/readers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/readers.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab34db74091c8a04ee9004ce9a786de3146ec917
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/readers.py
@@ -0,0 +1,120 @@
+import collections
+import pathlib
+import operator
+
+from . import abc
+
+from ._itertools import unique_everseen
+from ._compat import ZipPath
+
+
+def remove_duplicates(items):
+    return iter(collections.OrderedDict.fromkeys(items))
+
+
+class FileReader(abc.TraversableResources):
+    def __init__(self, loader):
+        self.path = pathlib.Path(loader.path).parent
+
+    def resource_path(self, resource):
+        """
+        Return the file system path to prevent
+        `resources.path()` from creating a temporary
+        copy.
+        """
+        return str(self.path.joinpath(resource))
+
+    def files(self):
+        return self.path
+
+
+class ZipReader(abc.TraversableResources):
+    def __init__(self, loader, module):
+        _, _, name = module.rpartition('.')
+        self.prefix = loader.prefix.replace('\\', '/') + name + '/'
+        self.archive = loader.archive
+
+    def open_resource(self, resource):
+        try:
+            return super().open_resource(resource)
+        except KeyError as exc:
+            raise FileNotFoundError(exc.args[0])
+
+    def is_resource(self, path):
+        # workaround for `zipfile.Path.is_file` returning true
+        # for non-existent paths.
+        target = self.files().joinpath(path)
+        return target.is_file() and target.exists()
+
+    def files(self):
+        return ZipPath(self.archive, self.prefix)
+
+
+class MultiplexedPath(abc.Traversable):
+    """
+    Given a series of Traversable objects, implement a merged
+    version of the interface across all objects. Useful for
+    namespace packages which may be multihomed at a single
+    name.
+    """
+
+    def __init__(self, *paths):
+        self._paths = list(map(pathlib.Path, remove_duplicates(paths)))
+        if not self._paths:
+            message = 'MultiplexedPath must contain at least one path'
+            raise FileNotFoundError(message)
+        if not all(path.is_dir() for path in self._paths):
+            raise NotADirectoryError('MultiplexedPath only supports directories')
+
+    def iterdir(self):
+        files = (file for path in self._paths for file in path.iterdir())
+        return unique_everseen(files, key=operator.attrgetter('name'))
+
+    def read_bytes(self):
+        raise FileNotFoundError(f'{self} is not a file')
+
+    def read_text(self, *args, **kwargs):
+        raise FileNotFoundError(f'{self} is not a file')
+
+    def is_dir(self):
+        return True
+
+    def is_file(self):
+        return False
+
+    def joinpath(self, *descendants):
+        try:
+            return super().joinpath(*descendants)
+        except abc.TraversalError:
+            # One of the paths did not resolve (a directory does not exist).
+            # Just return something that will not exist.
+            return self._paths[0].joinpath(*descendants)
+
+    def open(self, *args, **kwargs):
+        raise FileNotFoundError(f'{self} is not a file')
+
+    @property
+    def name(self):
+        return self._paths[0].name
+
+    def __repr__(self):
+        paths = ', '.join(f"'{path}'" for path in self._paths)
+        return f'MultiplexedPath({paths})'
+
+
+class NamespaceReader(abc.TraversableResources):
+    def __init__(self, namespace_path):
+        if 'NamespacePath' not in str(namespace_path):
+            raise ValueError('Invalid path')
+        self.path = MultiplexedPath(*list(namespace_path))
+
+    def resource_path(self, resource):
+        """
+        Return the file system path to prevent
+        `resources.path()` from creating a temporary
+        copy.
+        """
+        return str(self.path.joinpath(resource))
+
+    def files(self):
+        return self.path
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/simple.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/simple.py
new file mode 100644
index 0000000000000000000000000000000000000000..7770c922c84fabe0031333a4de305dd6d6852911
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/importlib_resources/simple.py
@@ -0,0 +1,106 @@
+"""
+Interface adapters for low-level readers.
+"""
+
+import abc
+import io
+import itertools
+from typing import BinaryIO, List
+
+from .abc import Traversable, TraversableResources
+
+
+class SimpleReader(abc.ABC):
+    """
+    The minimum, low-level interface required from a resource
+    provider.
+    """
+
+    @property
+    @abc.abstractmethod
+    def package(self) -> str:
+        """
+        The name of the package for which this reader loads resources.
+        """
+
+    @abc.abstractmethod
+    def children(self) -> List['SimpleReader']:
+        """
+        Obtain an iterable of SimpleReader for available
+        child containers (e.g. directories).
+        """
+
+    @abc.abstractmethod
+    def resources(self) -> List[str]:
+        """
+        Obtain available named resources for this virtual package.
+        """
+
+    @abc.abstractmethod
+    def open_binary(self, resource: str) -> BinaryIO:
+        """
+        Obtain a File-like for a named resource.
+        """
+
+    @property
+    def name(self):
+        return self.package.split('.')[-1]
+
+
+class ResourceContainer(Traversable):
+    """
+    Traversable container for a package's resources via its reader.
+    """
+
+    def __init__(self, reader: SimpleReader):
+        self.reader = reader
+
+    def is_dir(self):
+        return True
+
+    def is_file(self):
+        return False
+
+    def iterdir(self):
+        files = (ResourceHandle(self, name) for name in self.reader.resources)
+        dirs = map(ResourceContainer, self.reader.children())
+        return itertools.chain(files, dirs)
+
+    def open(self, *args, **kwargs):
+        raise IsADirectoryError()
+
+
+class ResourceHandle(Traversable):
+    """
+    Handle to a named resource in a ResourceReader.
+    """
+
+    def __init__(self, parent: ResourceContainer, name: str):
+        self.parent = parent
+        self.name = name  # type: ignore
+
+    def is_file(self):
+        return True
+
+    def is_dir(self):
+        return False
+
+    def open(self, mode='r', *args, **kwargs):
+        stream = self.parent.reader.open_binary(self.name)
+        if 'b' not in mode:
+            stream = io.TextIOWrapper(*args, **kwargs)
+        return stream
+
+    def joinpath(self, name):
+        raise RuntimeError("Cannot traverse into a resource")
+
+
+class TraversableReader(TraversableResources, SimpleReader):
+    """
+    A TraversableResources based on SimpleReader. Resource providers
+    may derive from this class to provide the TraversableResources
+    interface by supplying the SimpleReader interface.
+    """
+
+    def files(self):
+        return ResourceContainer(self)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3af63b2a70bacc096648186b728ee5b58de73bcd
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/__pycache__/context.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/__pycache__/context.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90205645f0f194116e6c8ddabf4c782fb4e18b4a
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/__pycache__/context.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/context.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/context.py
new file mode 100644
index 0000000000000000000000000000000000000000..0322c45d4ab552cb09a48214bf117b5d2eaea895
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/context.py
@@ -0,0 +1,361 @@
+from __future__ import annotations
+
+import contextlib
+import functools
+import operator
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import urllib.request
+import warnings
+from typing import Iterator
+
+
+if sys.version_info < (3, 12):
+    from setuptools.extern.backports import tarfile
+else:
+    import tarfile
+
+
+@contextlib.contextmanager
+def pushd(dir: str | os.PathLike) -> Iterator[str | os.PathLike]:
+    """
+    >>> tmp_path = getfixture('tmp_path')
+    >>> with pushd(tmp_path):
+    ...     assert os.getcwd() == os.fspath(tmp_path)
+    >>> assert os.getcwd() != os.fspath(tmp_path)
+    """
+
+    orig = os.getcwd()
+    os.chdir(dir)
+    try:
+        yield dir
+    finally:
+        os.chdir(orig)
+
+
+@contextlib.contextmanager
+def tarball(
+    url, target_dir: str | os.PathLike | None = None
+) -> Iterator[str | os.PathLike]:
+    """
+    Get a tarball, extract it, yield, then clean up.
+
+    >>> import urllib.request
+    >>> url = getfixture('tarfile_served')
+    >>> target = getfixture('tmp_path') / 'out'
+    >>> tb = tarball(url, target_dir=target)
+    >>> import pathlib
+    >>> with tb as extracted:
+    ...     contents = pathlib.Path(extracted, 'contents.txt').read_text(encoding='utf-8')
+    >>> assert not os.path.exists(extracted)
+    """
+    if target_dir is None:
+        target_dir = os.path.basename(url).replace('.tar.gz', '').replace('.tgz', '')
+    # In the tar command, use --strip-components=1 to strip the first path and
+    #  then
+    #  use -C to cause the files to be extracted to {target_dir}. This ensures
+    #  that we always know where the files were extracted.
+    os.mkdir(target_dir)
+    try:
+        req = urllib.request.urlopen(url)
+        with tarfile.open(fileobj=req, mode='r|*') as tf:
+            tf.extractall(path=target_dir, filter=strip_first_component)
+        yield target_dir
+    finally:
+        shutil.rmtree(target_dir)
+
+
+def strip_first_component(
+    member: tarfile.TarInfo,
+    path,
+) -> tarfile.TarInfo:
+    _, member.name = member.name.split('/', 1)
+    return member
+
+
+def _compose(*cmgrs):
+    """
+    Compose any number of dependent context managers into a single one.
+
+    The last, innermost context manager may take arbitrary arguments, but
+    each successive context manager should accept the result from the
+    previous as a single parameter.
+
+    Like :func:`jaraco.functools.compose`, behavior works from right to
+    left, so the context manager should be indicated from outermost to
+    innermost.
+
+    Example, to create a context manager to change to a temporary
+    directory:
+
+    >>> temp_dir_as_cwd = _compose(pushd, temp_dir)
+    >>> with temp_dir_as_cwd() as dir:
+    ...     assert os.path.samefile(os.getcwd(), dir)
+    """
+
+    def compose_two(inner, outer):
+        def composed(*args, **kwargs):
+            with inner(*args, **kwargs) as saved, outer(saved) as res:
+                yield res
+
+        return contextlib.contextmanager(composed)
+
+    return functools.reduce(compose_two, reversed(cmgrs))
+
+
+tarball_cwd = _compose(pushd, tarball)
+
+
+@contextlib.contextmanager
+def tarball_context(*args, **kwargs):
+    warnings.warn(
+        "tarball_context is deprecated. Use tarball or tarball_cwd instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    pushd_ctx = kwargs.pop('pushd', pushd)
+    with tarball(*args, **kwargs) as tball, pushd_ctx(tball) as dir:
+        yield dir
+
+
+def infer_compression(url):
+    """
+    Given a URL or filename, infer the compression code for tar.
+
+    >>> infer_compression('http://foo/bar.tar.gz')
+    'z'
+    >>> infer_compression('http://foo/bar.tgz')
+    'z'
+    >>> infer_compression('file.bz')
+    'j'
+    >>> infer_compression('file.xz')
+    'J'
+    """
+    warnings.warn(
+        "infer_compression is deprecated with no replacement",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    # cheat and just assume it's the last two characters
+    compression_indicator = url[-2:]
+    mapping = dict(gz='z', bz='j', xz='J')
+    # Assume 'z' (gzip) if no match
+    return mapping.get(compression_indicator, 'z')
+
+
+@contextlib.contextmanager
+def temp_dir(remover=shutil.rmtree):
+    """
+    Create a temporary directory context. Pass a custom remover
+    to override the removal behavior.
+
+    >>> import pathlib
+    >>> with temp_dir() as the_dir:
+    ...     assert os.path.isdir(the_dir)
+    ...     _ = pathlib.Path(the_dir).joinpath('somefile').write_text('contents', encoding='utf-8')
+    >>> assert not os.path.exists(the_dir)
+    """
+    temp_dir = tempfile.mkdtemp()
+    try:
+        yield temp_dir
+    finally:
+        remover(temp_dir)
+
+
+@contextlib.contextmanager
+def repo_context(url, branch=None, quiet=True, dest_ctx=temp_dir):
+    """
+    Check out the repo indicated by url.
+
+    If dest_ctx is supplied, it should be a context manager
+    to yield the target directory for the check out.
+    """
+    exe = 'git' if 'git' in url else 'hg'
+    with dest_ctx() as repo_dir:
+        cmd = [exe, 'clone', url, repo_dir]
+        if branch:
+            cmd.extend(['--branch', branch])
+        devnull = open(os.path.devnull, 'w')
+        stdout = devnull if quiet else None
+        subprocess.check_call(cmd, stdout=stdout)
+        yield repo_dir
+
+
+def null():
+    """
+    A null context suitable to stand in for a meaningful context.
+
+    >>> with null() as value:
+    ...     assert value is None
+
+    This context is most useful when dealing with two or more code
+    branches but only some need a context. Wrap the others in a null
+    context to provide symmetry across all options.
+    """
+    warnings.warn(
+        "null is deprecated. Use contextlib.nullcontext",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return contextlib.nullcontext()
+
+
+class ExceptionTrap:
+    """
+    A context manager that will catch certain exceptions and provide an
+    indication they occurred.
+
+    >>> with ExceptionTrap() as trap:
+    ...     raise Exception()
+    >>> bool(trap)
+    True
+
+    >>> with ExceptionTrap() as trap:
+    ...     pass
+    >>> bool(trap)
+    False
+
+    >>> with ExceptionTrap(ValueError) as trap:
+    ...     raise ValueError("1 + 1 is not 3")
+    >>> bool(trap)
+    True
+    >>> trap.value
+    ValueError('1 + 1 is not 3')
+    >>> trap.tb
+    
+
+    >>> with ExceptionTrap(ValueError) as trap:
+    ...     raise Exception()
+    Traceback (most recent call last):
+    ...
+    Exception
+
+    >>> bool(trap)
+    False
+    """
+
+    exc_info = None, None, None
+
+    def __init__(self, exceptions=(Exception,)):
+        self.exceptions = exceptions
+
+    def __enter__(self):
+        return self
+
+    @property
+    def type(self):
+        return self.exc_info[0]
+
+    @property
+    def value(self):
+        return self.exc_info[1]
+
+    @property
+    def tb(self):
+        return self.exc_info[2]
+
+    def __exit__(self, *exc_info):
+        type = exc_info[0]
+        matches = type and issubclass(type, self.exceptions)
+        if matches:
+            self.exc_info = exc_info
+        return matches
+
+    def __bool__(self):
+        return bool(self.type)
+
+    def raises(self, func, *, _test=bool):
+        """
+        Wrap func and replace the result with the truth
+        value of the trap (True if an exception occurred).
+
+        First, give the decorator an alias to support Python 3.8
+        Syntax.
+
+        >>> raises = ExceptionTrap(ValueError).raises
+
+        Now decorate a function that always fails.
+
+        >>> @raises
+        ... def fail():
+        ...     raise ValueError('failed')
+        >>> fail()
+        True
+        """
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            with ExceptionTrap(self.exceptions) as trap:
+                func(*args, **kwargs)
+            return _test(trap)
+
+        return wrapper
+
+    def passes(self, func):
+        """
+        Wrap func and replace the result with the truth
+        value of the trap (True if no exception).
+
+        First, give the decorator an alias to support Python 3.8
+        Syntax.
+
+        >>> passes = ExceptionTrap(ValueError).passes
+
+        Now decorate a function that always fails.
+
+        >>> @passes
+        ... def fail():
+        ...     raise ValueError('failed')
+
+        >>> fail()
+        False
+        """
+        return self.raises(func, _test=operator.not_)
+
+
+class suppress(contextlib.suppress, contextlib.ContextDecorator):
+    """
+    A version of contextlib.suppress with decorator support.
+
+    >>> @suppress(KeyError)
+    ... def key_error():
+    ...     {}['']
+    >>> key_error()
+    """
+
+
+class on_interrupt(contextlib.ContextDecorator):
+    """
+    Replace a KeyboardInterrupt with SystemExit(1)
+
+    >>> def do_interrupt():
+    ...     raise KeyboardInterrupt()
+    >>> on_interrupt('error')(do_interrupt)()
+    Traceback (most recent call last):
+    ...
+    SystemExit: 1
+    >>> on_interrupt('error', code=255)(do_interrupt)()
+    Traceback (most recent call last):
+    ...
+    SystemExit: 255
+    >>> on_interrupt('suppress')(do_interrupt)()
+    >>> with __import__('pytest').raises(KeyboardInterrupt):
+    ...     on_interrupt('ignore')(do_interrupt)()
+    """
+
+    def __init__(self, action='error', /, code=1):
+        self.action = action
+        self.code = code
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exctype, excinst, exctb):
+        if exctype is not KeyboardInterrupt or self.action == 'ignore':
+            return
+        elif self.action == 'error':
+            raise SystemExit(self.code) from excinst
+        return self.action == 'suppress'
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..130b87a48591cd4b120f1c54966b7f78c5d88167
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__init__.py
@@ -0,0 +1,633 @@
+import collections.abc
+import functools
+import inspect
+import itertools
+import operator
+import time
+import types
+import warnings
+
+import setuptools.extern.more_itertools
+
+
+def compose(*funcs):
+    """
+    Compose any number of unary functions into a single unary function.
+
+    >>> import textwrap
+    >>> expected = str.strip(textwrap.dedent(compose.__doc__))
+    >>> strip_and_dedent = compose(str.strip, textwrap.dedent)
+    >>> strip_and_dedent(compose.__doc__) == expected
+    True
+
+    Compose also allows the innermost function to take arbitrary arguments.
+
+    >>> round_three = lambda x: round(x, ndigits=3)
+    >>> f = compose(round_three, int.__truediv__)
+    >>> [f(3*x, x+1) for x in range(1,10)]
+    [1.5, 2.0, 2.25, 2.4, 2.5, 2.571, 2.625, 2.667, 2.7]
+    """
+
+    def compose_two(f1, f2):
+        return lambda *args, **kwargs: f1(f2(*args, **kwargs))
+
+    return functools.reduce(compose_two, funcs)
+
+
+def once(func):
+    """
+    Decorate func so it's only ever called the first time.
+
+    This decorator can ensure that an expensive or non-idempotent function
+    will not be expensive on subsequent calls and is idempotent.
+
+    >>> add_three = once(lambda a: a+3)
+    >>> add_three(3)
+    6
+    >>> add_three(9)
+    6
+    >>> add_three('12')
+    6
+
+    To reset the stored value, simply clear the property ``saved_result``.
+
+    >>> del add_three.saved_result
+    >>> add_three(9)
+    12
+    >>> add_three(8)
+    12
+
+    Or invoke 'reset()' on it.
+
+    >>> add_three.reset()
+    >>> add_three(-3)
+    0
+    >>> add_three(0)
+    0
+    """
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if not hasattr(wrapper, 'saved_result'):
+            wrapper.saved_result = func(*args, **kwargs)
+        return wrapper.saved_result
+
+    wrapper.reset = lambda: vars(wrapper).__delitem__('saved_result')
+    return wrapper
+
+
+def method_cache(method, cache_wrapper=functools.lru_cache()):
+    """
+    Wrap lru_cache to support storing the cache data in the object instances.
+
+    Abstracts the common paradigm where the method explicitly saves an
+    underscore-prefixed protected property on first call and returns that
+    subsequently.
+
+    >>> class MyClass:
+    ...     calls = 0
+    ...
+    ...     @method_cache
+    ...     def method(self, value):
+    ...         self.calls += 1
+    ...         return value
+
+    >>> a = MyClass()
+    >>> a.method(3)
+    3
+    >>> for x in range(75):
+    ...     res = a.method(x)
+    >>> a.calls
+    75
+
+    Note that the apparent behavior will be exactly like that of lru_cache
+    except that the cache is stored on each instance, so values in one
+    instance will not flush values from another, and when an instance is
+    deleted, so are the cached values for that instance.
+
+    >>> b = MyClass()
+    >>> for x in range(35):
+    ...     res = b.method(x)
+    >>> b.calls
+    35
+    >>> a.method(0)
+    0
+    >>> a.calls
+    75
+
+    Note that if method had been decorated with ``functools.lru_cache()``,
+    a.calls would have been 76 (due to the cached value of 0 having been
+    flushed by the 'b' instance).
+
+    Clear the cache with ``.cache_clear()``
+
+    >>> a.method.cache_clear()
+
+    Same for a method that hasn't yet been called.
+
+    >>> c = MyClass()
+    >>> c.method.cache_clear()
+
+    Another cache wrapper may be supplied:
+
+    >>> cache = functools.lru_cache(maxsize=2)
+    >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache)
+    >>> a = MyClass()
+    >>> a.method2()
+    3
+
+    Caution - do not subsequently wrap the method with another decorator, such
+    as ``@property``, which changes the semantics of the function.
+
+    See also
+    http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
+    for another implementation and additional justification.
+    """
+
+    def wrapper(self, *args, **kwargs):
+        # it's the first call, replace the method with a cached, bound method
+        bound_method = types.MethodType(method, self)
+        cached_method = cache_wrapper(bound_method)
+        setattr(self, method.__name__, cached_method)
+        return cached_method(*args, **kwargs)
+
+    # Support cache clear even before cache has been created.
+    wrapper.cache_clear = lambda: None
+
+    return _special_method_cache(method, cache_wrapper) or wrapper
+
+
+def _special_method_cache(method, cache_wrapper):
+    """
+    Because Python treats special methods differently, it's not
+    possible to use instance attributes to implement the cached
+    methods.
+
+    Instead, install the wrapper method under a different name
+    and return a simple proxy to that wrapper.
+
+    https://github.com/jaraco/jaraco.functools/issues/5
+    """
+    name = method.__name__
+    special_names = '__getattr__', '__getitem__'
+
+    if name not in special_names:
+        return None
+
+    wrapper_name = '__cached' + name
+
+    def proxy(self, /, *args, **kwargs):
+        if wrapper_name not in vars(self):
+            bound = types.MethodType(method, self)
+            cache = cache_wrapper(bound)
+            setattr(self, wrapper_name, cache)
+        else:
+            cache = getattr(self, wrapper_name)
+        return cache(*args, **kwargs)
+
+    return proxy
+
+
+def apply(transform):
+    """
+    Decorate a function with a transform function that is
+    invoked on results returned from the decorated function.
+
+    >>> @apply(reversed)
+    ... def get_numbers(start):
+    ...     "doc for get_numbers"
+    ...     return range(start, start+3)
+    >>> list(get_numbers(4))
+    [6, 5, 4]
+    >>> get_numbers.__doc__
+    'doc for get_numbers'
+    """
+
+    def wrap(func):
+        return functools.wraps(func)(compose(transform, func))
+
+    return wrap
+
+
+def result_invoke(action):
+    r"""
+    Decorate a function with an action function that is
+    invoked on the results returned from the decorated
+    function (for its side effect), then return the original
+    result.
+
+    >>> @result_invoke(print)
+    ... def add_two(a, b):
+    ...     return a + b
+    >>> x = add_two(2, 3)
+    5
+    >>> x
+    5
+    """
+
+    def wrap(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            result = func(*args, **kwargs)
+            action(result)
+            return result
+
+        return wrapper
+
+    return wrap
+
+
+def invoke(f, /, *args, **kwargs):
+    """
+    Call a function for its side effect after initialization.
+
+    The benefit of using the decorator instead of simply invoking a function
+    after defining it is that it makes explicit the author's intent for the
+    function to be called immediately. Whereas if one simply calls the
+    function immediately, it's less obvious if that was intentional or
+    incidental. It also avoids repeating the name - the two actions, defining
+    the function and calling it immediately are modeled separately, but linked
+    by the decorator construct.
+
+    The benefit of having a function construct (opposed to just invoking some
+    behavior inline) is to serve as a scope in which the behavior occurs. It
+    avoids polluting the global namespace with local variables, provides an
+    anchor on which to attach documentation (docstring), keeps the behavior
+    logically separated (instead of conceptually separated or not separated at
+    all), and provides potential to re-use the behavior for testing or other
+    purposes.
+
+    This function is named as a pithy way to communicate, "call this function
+    primarily for its side effect", or "while defining this function, also
+    take it aside and call it". It exists because there's no Python construct
+    for "define and call" (nor should there be, as decorators serve this need
+    just fine). The behavior happens immediately and synchronously.
+
+    >>> @invoke
+    ... def func(): print("called")
+    called
+    >>> func()
+    called
+
+    Use functools.partial to pass parameters to the initial call
+
+    >>> @functools.partial(invoke, name='bingo')
+    ... def func(name): print('called with', name)
+    called with bingo
+    """
+    f(*args, **kwargs)
+    return f
+
+
+class Throttler:
+    """Rate-limit a function (or other callable)."""
+
+    def __init__(self, func, max_rate=float('Inf')):
+        if isinstance(func, Throttler):
+            func = func.func
+        self.func = func
+        self.max_rate = max_rate
+        self.reset()
+
+    def reset(self):
+        self.last_called = 0
+
+    def __call__(self, *args, **kwargs):
+        self._wait()
+        return self.func(*args, **kwargs)
+
+    def _wait(self):
+        """Ensure at least 1/max_rate seconds from last call."""
+        elapsed = time.time() - self.last_called
+        must_wait = 1 / self.max_rate - elapsed
+        time.sleep(max(0, must_wait))
+        self.last_called = time.time()
+
+    def __get__(self, obj, owner=None):
+        return first_invoke(self._wait, functools.partial(self.func, obj))
+
+
+def first_invoke(func1, func2):
+    """
+    Return a function that when invoked will invoke func1 without
+    any parameters (for its side effect) and then invoke func2
+    with whatever parameters were passed, returning its result.
+    """
+
+    def wrapper(*args, **kwargs):
+        func1()
+        return func2(*args, **kwargs)
+
+    return wrapper
+
+
+method_caller = first_invoke(
+    lambda: warnings.warn(
+        '`jaraco.functools.method_caller` is deprecated, '
+        'use `operator.methodcaller` instead',
+        DeprecationWarning,
+        stacklevel=3,
+    ),
+    operator.methodcaller,
+)
+
+
+def retry_call(func, cleanup=lambda: None, retries=0, trap=()):
+    """
+    Given a callable func, trap the indicated exceptions
+    for up to 'retries' times, invoking cleanup on the
+    exception. On the final attempt, allow any exceptions
+    to propagate.
+    """
+    attempts = itertools.count() if retries == float('inf') else range(retries)
+    for _ in attempts:
+        try:
+            return func()
+        except trap:
+            cleanup()
+
+    return func()
+
+
+def retry(*r_args, **r_kwargs):
+    """
+    Decorator wrapper for retry_call. Accepts arguments to retry_call
+    except func and then returns a decorator for the decorated function.
+
+    Ex:
+
+    >>> @retry(retries=3)
+    ... def my_func(a, b):
+    ...     "this is my funk"
+    ...     print(a, b)
+    >>> my_func.__doc__
+    'this is my funk'
+    """
+
+    def decorate(func):
+        @functools.wraps(func)
+        def wrapper(*f_args, **f_kwargs):
+            bound = functools.partial(func, *f_args, **f_kwargs)
+            return retry_call(bound, *r_args, **r_kwargs)
+
+        return wrapper
+
+    return decorate
+
+
+def print_yielded(func):
+    """
+    Convert a generator into a function that prints all yielded elements.
+
+    >>> @print_yielded
+    ... def x():
+    ...     yield 3; yield None
+    >>> x()
+    3
+    None
+    """
+    print_all = functools.partial(map, print)
+    print_results = compose(more_itertools.consume, print_all, func)
+    return functools.wraps(func)(print_results)
+
+
+def pass_none(func):
+    """
+    Wrap func so it's not called if its first param is None.
+
+    >>> print_text = pass_none(print)
+    >>> print_text('text')
+    text
+    >>> print_text(None)
+    """
+
+    @functools.wraps(func)
+    def wrapper(param, /, *args, **kwargs):
+        if param is not None:
+            return func(param, *args, **kwargs)
+        return None
+
+    return wrapper
+
+
+def assign_params(func, namespace):
+    """
+    Assign parameters from namespace where func solicits.
+
+    >>> def func(x, y=3):
+    ...     print(x, y)
+    >>> assigned = assign_params(func, dict(x=2, z=4))
+    >>> assigned()
+    2 3
+
+    The usual errors are raised if a function doesn't receive
+    its required parameters:
+
+    >>> assigned = assign_params(func, dict(y=3, z=4))
+    >>> assigned()
+    Traceback (most recent call last):
+    TypeError: func() ...argument...
+
+    It even works on methods:
+
+    >>> class Handler:
+    ...     def meth(self, arg):
+    ...         print(arg)
+    >>> assign_params(Handler().meth, dict(arg='crystal', foo='clear'))()
+    crystal
+    """
+    sig = inspect.signature(func)
+    params = sig.parameters.keys()
+    call_ns = {k: namespace[k] for k in params if k in namespace}
+    return functools.partial(func, **call_ns)
+
+
+def save_method_args(method):
+    """
+    Wrap a method such that when it is called, the args and kwargs are
+    saved on the method.
+
+    >>> class MyClass:
+    ...     @save_method_args
+    ...     def method(self, a, b):
+    ...         print(a, b)
+    >>> my_ob = MyClass()
+    >>> my_ob.method(1, 2)
+    1 2
+    >>> my_ob._saved_method.args
+    (1, 2)
+    >>> my_ob._saved_method.kwargs
+    {}
+    >>> my_ob.method(a=3, b='foo')
+    3 foo
+    >>> my_ob._saved_method.args
+    ()
+    >>> my_ob._saved_method.kwargs == dict(a=3, b='foo')
+    True
+
+    The arguments are stored on the instance, allowing for
+    different instance to save different args.
+
+    >>> your_ob = MyClass()
+    >>> your_ob.method({str('x'): 3}, b=[4])
+    {'x': 3} [4]
+    >>> your_ob._saved_method.args
+    ({'x': 3},)
+    >>> my_ob._saved_method.args
+    ()
+    """
+    args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs')
+
+    @functools.wraps(method)
+    def wrapper(self, /, *args, **kwargs):
+        attr_name = '_saved_' + method.__name__
+        attr = args_and_kwargs(args, kwargs)
+        setattr(self, attr_name, attr)
+        return method(self, *args, **kwargs)
+
+    return wrapper
+
+
+def except_(*exceptions, replace=None, use=None):
+    """
+    Replace the indicated exceptions, if raised, with the indicated
+    literal replacement or evaluated expression (if present).
+
+    >>> safe_int = except_(ValueError)(int)
+    >>> safe_int('five')
+    >>> safe_int('5')
+    5
+
+    Specify a literal replacement with ``replace``.
+
+    >>> safe_int_r = except_(ValueError, replace=0)(int)
+    >>> safe_int_r('five')
+    0
+
+    Provide an expression to ``use`` to pass through particular parameters.
+
+    >>> safe_int_pt = except_(ValueError, use='args[0]')(int)
+    >>> safe_int_pt('five')
+    'five'
+
+    """
+
+    def decorate(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except exceptions:
+                try:
+                    return eval(use)
+                except TypeError:
+                    return replace
+
+        return wrapper
+
+    return decorate
+
+
+def identity(x):
+    """
+    Return the argument.
+
+    >>> o = object()
+    >>> identity(o) is o
+    True
+    """
+    return x
+
+
+def bypass_when(check, *, _op=identity):
+    """
+    Decorate a function to return its parameter when ``check``.
+
+    >>> bypassed = []  # False
+
+    >>> @bypass_when(bypassed)
+    ... def double(x):
+    ...     return x * 2
+    >>> double(2)
+    4
+    >>> bypassed[:] = [object()]  # True
+    >>> double(2)
+    2
+    """
+
+    def decorate(func):
+        @functools.wraps(func)
+        def wrapper(param, /):
+            return param if _op(check) else func(param)
+
+        return wrapper
+
+    return decorate
+
+
+def bypass_unless(check):
+    """
+    Decorate a function to return its parameter unless ``check``.
+
+    >>> enabled = [object()]  # True
+
+    >>> @bypass_unless(enabled)
+    ... def double(x):
+    ...     return x * 2
+    >>> double(2)
+    4
+    >>> del enabled[:]  # False
+    >>> double(2)
+    2
+    """
+    return bypass_when(check, _op=operator.not_)
+
+
+@functools.singledispatch
+def _splat_inner(args, func):
+    """Splat args to func."""
+    return func(*args)
+
+
+@_splat_inner.register
+def _(args: collections.abc.Mapping, func):
+    """Splat kargs to func as kwargs."""
+    return func(**args)
+
+
+def splat(func):
+    """
+    Wrap func to expect its parameters to be passed positionally in a tuple.
+
+    Has a similar effect to that of ``itertools.starmap`` over
+    simple ``map``.
+
+    >>> pairs = [(-1, 1), (0, 2)]
+    >>> setuptools.extern.more_itertools.consume(itertools.starmap(print, pairs))
+    -1 1
+    0 2
+    >>> setuptools.extern.more_itertools.consume(map(splat(print), pairs))
+    -1 1
+    0 2
+
+    The approach generalizes to other iterators that don't have a "star"
+    equivalent, such as a "starfilter".
+
+    >>> list(filter(splat(operator.add), pairs))
+    [(0, 2)]
+
+    Splat also accepts a mapping argument.
+
+    >>> def is_nice(msg, code):
+    ...     return "smile" in msg or code == 0
+    >>> msgs = [
+    ...     dict(msg='smile!', code=20),
+    ...     dict(msg='error :(', code=1),
+    ...     dict(msg='unknown', code=0),
+    ... ]
+    >>> for msg in filter(splat(is_nice), msgs):
+    ...     print(msg)
+    {'msg': 'smile!', 'code': 20}
+    {'msg': 'unknown', 'code': 0}
+    """
+    return functools.wraps(func)(functools.partial(_splat_inner, func=func))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__init__.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__init__.pyi
new file mode 100644
index 0000000000000000000000000000000000000000..c2b9ab1757ec31dc7ccf13a9eaf8148ba0f31cec
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__init__.pyi
@@ -0,0 +1,128 @@
+from collections.abc import Callable, Hashable, Iterator
+from functools import partial
+from operator import methodcaller
+import sys
+from typing import (
+    Any,
+    Generic,
+    Protocol,
+    TypeVar,
+    overload,
+)
+
+if sys.version_info >= (3, 10):
+    from typing import Concatenate, ParamSpec
+else:
+    from typing_extensions import Concatenate, ParamSpec
+
+_P = ParamSpec('_P')
+_R = TypeVar('_R')
+_T = TypeVar('_T')
+_R1 = TypeVar('_R1')
+_R2 = TypeVar('_R2')
+_V = TypeVar('_V')
+_S = TypeVar('_S')
+_R_co = TypeVar('_R_co', covariant=True)
+
+class _OnceCallable(Protocol[_P, _R]):
+    saved_result: _R
+    reset: Callable[[], None]
+    def __call__(self, *args: _P.args, **kwargs: _P.kwargs) -> _R: ...
+
+class _ProxyMethodCacheWrapper(Protocol[_R_co]):
+    cache_clear: Callable[[], None]
+    def __call__(self, *args: Hashable, **kwargs: Hashable) -> _R_co: ...
+
+class _MethodCacheWrapper(Protocol[_R_co]):
+    def cache_clear(self) -> None: ...
+    def __call__(self, *args: Hashable, **kwargs: Hashable) -> _R_co: ...
+
+# `compose()` overloads below will cover most use cases.
+
+@overload
+def compose(
+    __func1: Callable[[_R], _T],
+    __func2: Callable[_P, _R],
+    /,
+) -> Callable[_P, _T]: ...
+@overload
+def compose(
+    __func1: Callable[[_R], _T],
+    __func2: Callable[[_R1], _R],
+    __func3: Callable[_P, _R1],
+    /,
+) -> Callable[_P, _T]: ...
+@overload
+def compose(
+    __func1: Callable[[_R], _T],
+    __func2: Callable[[_R2], _R],
+    __func3: Callable[[_R1], _R2],
+    __func4: Callable[_P, _R1],
+    /,
+) -> Callable[_P, _T]: ...
+def once(func: Callable[_P, _R]) -> _OnceCallable[_P, _R]: ...
+def method_cache(
+    method: Callable[..., _R],
+    cache_wrapper: Callable[[Callable[..., _R]], _MethodCacheWrapper[_R]] = ...,
+) -> _MethodCacheWrapper[_R] | _ProxyMethodCacheWrapper[_R]: ...
+def apply(
+    transform: Callable[[_R], _T]
+) -> Callable[[Callable[_P, _R]], Callable[_P, _T]]: ...
+def result_invoke(
+    action: Callable[[_R], Any]
+) -> Callable[[Callable[_P, _R]], Callable[_P, _R]]: ...
+def invoke(
+    f: Callable[_P, _R], /, *args: _P.args, **kwargs: _P.kwargs
+) -> Callable[_P, _R]: ...
+def call_aside(
+    f: Callable[_P, _R], *args: _P.args, **kwargs: _P.kwargs
+) -> Callable[_P, _R]: ...
+
+class Throttler(Generic[_R]):
+    last_called: float
+    func: Callable[..., _R]
+    max_rate: float
+    def __init__(
+        self, func: Callable[..., _R] | Throttler[_R], max_rate: float = ...
+    ) -> None: ...
+    def reset(self) -> None: ...
+    def __call__(self, *args: Any, **kwargs: Any) -> _R: ...
+    def __get__(self, obj: Any, owner: type[Any] | None = ...) -> Callable[..., _R]: ...
+
+def first_invoke(
+    func1: Callable[..., Any], func2: Callable[_P, _R]
+) -> Callable[_P, _R]: ...
+
+method_caller: Callable[..., methodcaller]
+
+def retry_call(
+    func: Callable[..., _R],
+    cleanup: Callable[..., None] = ...,
+    retries: int | float = ...,
+    trap: type[BaseException] | tuple[type[BaseException], ...] = ...,
+) -> _R: ...
+def retry(
+    cleanup: Callable[..., None] = ...,
+    retries: int | float = ...,
+    trap: type[BaseException] | tuple[type[BaseException], ...] = ...,
+) -> Callable[[Callable[..., _R]], Callable[..., _R]]: ...
+def print_yielded(func: Callable[_P, Iterator[Any]]) -> Callable[_P, None]: ...
+def pass_none(
+    func: Callable[Concatenate[_T, _P], _R]
+) -> Callable[Concatenate[_T, _P], _R]: ...
+def assign_params(
+    func: Callable[..., _R], namespace: dict[str, Any]
+) -> partial[_R]: ...
+def save_method_args(
+    method: Callable[Concatenate[_S, _P], _R]
+) -> Callable[Concatenate[_S, _P], _R]: ...
+def except_(
+    *exceptions: type[BaseException], replace: Any = ..., use: Any = ...
+) -> Callable[[Callable[_P, Any]], Callable[_P, Any]]: ...
+def identity(x: _T) -> _T: ...
+def bypass_when(
+    check: _V, *, _op: Callable[[_V], Any] = ...
+) -> Callable[[Callable[[_T], _R]], Callable[[_T], _T | _R]]: ...
+def bypass_unless(
+    check: Any,
+) -> Callable[[Callable[[_T], _R]], Callable[[_T], _T | _R]]: ...
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dd4c6a3a9d178e642515881e28b0a121e1acc561
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/functools/py.typed
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/text/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/text/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0306d5ff5cc4a2eb76458c127c462efe59a566d
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/text/__init__.py
@@ -0,0 +1,599 @@
+import re
+import itertools
+import textwrap
+import functools
+
+try:
+    from importlib.resources import files  # type: ignore
+except ImportError:  # pragma: nocover
+    from setuptools.extern.importlib_resources import files  # type: ignore
+
+from setuptools.extern.jaraco.functools import compose, method_cache
+from setuptools.extern.jaraco.context import ExceptionTrap
+
+
+def substitution(old, new):
+    """
+    Return a function that will perform a substitution on a string
+    """
+    return lambda s: s.replace(old, new)
+
+
+def multi_substitution(*substitutions):
+    """
+    Take a sequence of pairs specifying substitutions, and create
+    a function that performs those substitutions.
+
+    >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo')
+    'baz'
+    """
+    substitutions = itertools.starmap(substitution, substitutions)
+    # compose function applies last function first, so reverse the
+    #  substitutions to get the expected order.
+    substitutions = reversed(tuple(substitutions))
+    return compose(*substitutions)
+
+
+class FoldedCase(str):
+    """
+    A case insensitive string class; behaves just like str
+    except compares equal when the only variation is case.
+
+    >>> s = FoldedCase('hello world')
+
+    >>> s == 'Hello World'
+    True
+
+    >>> 'Hello World' == s
+    True
+
+    >>> s != 'Hello World'
+    False
+
+    >>> s.index('O')
+    4
+
+    >>> s.split('O')
+    ['hell', ' w', 'rld']
+
+    >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
+    ['alpha', 'Beta', 'GAMMA']
+
+    Sequence membership is straightforward.
+
+    >>> "Hello World" in [s]
+    True
+    >>> s in ["Hello World"]
+    True
+
+    You may test for set inclusion, but candidate and elements
+    must both be folded.
+
+    >>> FoldedCase("Hello World") in {s}
+    True
+    >>> s in {FoldedCase("Hello World")}
+    True
+
+    String inclusion works as long as the FoldedCase object
+    is on the right.
+
+    >>> "hello" in FoldedCase("Hello World")
+    True
+
+    But not if the FoldedCase object is on the left:
+
+    >>> FoldedCase('hello') in 'Hello World'
+    False
+
+    In that case, use ``in_``:
+
+    >>> FoldedCase('hello').in_('Hello World')
+    True
+
+    >>> FoldedCase('hello') > FoldedCase('Hello')
+    False
+    """
+
+    def __lt__(self, other):
+        return self.lower() < other.lower()
+
+    def __gt__(self, other):
+        return self.lower() > other.lower()
+
+    def __eq__(self, other):
+        return self.lower() == other.lower()
+
+    def __ne__(self, other):
+        return self.lower() != other.lower()
+
+    def __hash__(self):
+        return hash(self.lower())
+
+    def __contains__(self, other):
+        return super().lower().__contains__(other.lower())
+
+    def in_(self, other):
+        "Does self appear in other?"
+        return self in FoldedCase(other)
+
+    # cache lower since it's likely to be called frequently.
+    @method_cache
+    def lower(self):
+        return super().lower()
+
+    def index(self, sub):
+        return self.lower().index(sub.lower())
+
+    def split(self, splitter=' ', maxsplit=0):
+        pattern = re.compile(re.escape(splitter), re.I)
+        return pattern.split(self, maxsplit)
+
+
+# Python 3.8 compatibility
+_unicode_trap = ExceptionTrap(UnicodeDecodeError)
+
+
+@_unicode_trap.passes
+def is_decodable(value):
+    r"""
+    Return True if the supplied value is decodable (using the default
+    encoding).
+
+    >>> is_decodable(b'\xff')
+    False
+    >>> is_decodable(b'\x32')
+    True
+    """
+    value.decode()
+
+
+def is_binary(value):
+    r"""
+    Return True if the value appears to be binary (that is, it's a byte
+    string and isn't decodable).
+
+    >>> is_binary(b'\xff')
+    True
+    >>> is_binary('\xff')
+    False
+    """
+    return isinstance(value, bytes) and not is_decodable(value)
+
+
+def trim(s):
+    r"""
+    Trim something like a docstring to remove the whitespace that
+    is common due to indentation and formatting.
+
+    >>> trim("\n\tfoo = bar\n\t\tbar = baz\n")
+    'foo = bar\n\tbar = baz'
+    """
+    return textwrap.dedent(s).strip()
+
+
+def wrap(s):
+    """
+    Wrap lines of text, retaining existing newlines as
+    paragraph markers.
+
+    >>> print(wrap(lorem_ipsum))
+    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
+    eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
+    minim veniam, quis nostrud exercitation ullamco laboris nisi ut
+    aliquip ex ea commodo consequat. Duis aute irure dolor in
+    reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
+    pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+    culpa qui officia deserunt mollit anim id est laborum.
+    
+    Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam
+    varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus
+    magna felis sollicitudin mauris. Integer in mauris eu nibh euismod
+    gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis
+    risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue,
+    eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas
+    fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla
+    a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis,
+    neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing
+    sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque
+    nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus
+    quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis,
+    molestie eu, feugiat in, orci. In hac habitasse platea dictumst.
+    """
+    paragraphs = s.splitlines()
+    wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs)
+    return '\n\n'.join(wrapped)
+
+
+def unwrap(s):
+    r"""
+    Given a multi-line string, return an unwrapped version.
+
+    >>> wrapped = wrap(lorem_ipsum)
+    >>> wrapped.count('\n')
+    20
+    >>> unwrapped = unwrap(wrapped)
+    >>> unwrapped.count('\n')
+    1
+    >>> print(unwrapped)
+    Lorem ipsum dolor sit amet, consectetur adipiscing ...
+    Curabitur pretium tincidunt lacus. Nulla gravida orci ...
+
+    """
+    paragraphs = re.split(r'\n\n+', s)
+    cleaned = (para.replace('\n', ' ') for para in paragraphs)
+    return '\n'.join(cleaned)
+
+
+
+
+class Splitter(object):
+    """object that will split a string with the given arguments for each call
+
+    >>> s = Splitter(',')
+    >>> s('hello, world, this is your, master calling')
+    ['hello', ' world', ' this is your', ' master calling']
+    """
+
+    def __init__(self, *args):
+        self.args = args
+
+    def __call__(self, s):
+        return s.split(*self.args)
+
+
+def indent(string, prefix=' ' * 4):
+    """
+    >>> indent('foo')
+    '    foo'
+    """
+    return prefix + string
+
+
+class WordSet(tuple):
+    """
+    Given an identifier, return the words that identifier represents,
+    whether in camel case, underscore-separated, etc.
+
+    >>> WordSet.parse("camelCase")
+    ('camel', 'Case')
+
+    >>> WordSet.parse("under_sep")
+    ('under', 'sep')
+
+    Acronyms should be retained
+
+    >>> WordSet.parse("firstSNL")
+    ('first', 'SNL')
+
+    >>> WordSet.parse("you_and_I")
+    ('you', 'and', 'I')
+
+    >>> WordSet.parse("A simple test")
+    ('A', 'simple', 'test')
+
+    Multiple caps should not interfere with the first cap of another word.
+
+    >>> WordSet.parse("myABCClass")
+    ('my', 'ABC', 'Class')
+
+    The result is a WordSet, so you can get the form you need.
+
+    >>> WordSet.parse("myABCClass").underscore_separated()
+    'my_ABC_Class'
+
+    >>> WordSet.parse('a-command').camel_case()
+    'ACommand'
+
+    >>> WordSet.parse('someIdentifier').lowered().space_separated()
+    'some identifier'
+
+    Slices of the result should return another WordSet.
+
+    >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated()
+    'out_of_context'
+
+    >>> WordSet.from_class_name(WordSet()).lowered().space_separated()
+    'word set'
+
+    >>> example = WordSet.parse('figured it out')
+    >>> example.headless_camel_case()
+    'figuredItOut'
+    >>> example.dash_separated()
+    'figured-it-out'
+
+    """
+
+    _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))')
+
+    def capitalized(self):
+        return WordSet(word.capitalize() for word in self)
+
+    def lowered(self):
+        return WordSet(word.lower() for word in self)
+
+    def camel_case(self):
+        return ''.join(self.capitalized())
+
+    def headless_camel_case(self):
+        words = iter(self)
+        first = next(words).lower()
+        new_words = itertools.chain((first,), WordSet(words).camel_case())
+        return ''.join(new_words)
+
+    def underscore_separated(self):
+        return '_'.join(self)
+
+    def dash_separated(self):
+        return '-'.join(self)
+
+    def space_separated(self):
+        return ' '.join(self)
+
+    def trim_right(self, item):
+        """
+        Remove the item from the end of the set.
+
+        >>> WordSet.parse('foo bar').trim_right('foo')
+        ('foo', 'bar')
+        >>> WordSet.parse('foo bar').trim_right('bar')
+        ('foo',)
+        >>> WordSet.parse('').trim_right('bar')
+        ()
+        """
+        return self[:-1] if self and self[-1] == item else self
+
+    def trim_left(self, item):
+        """
+        Remove the item from the beginning of the set.
+
+        >>> WordSet.parse('foo bar').trim_left('foo')
+        ('bar',)
+        >>> WordSet.parse('foo bar').trim_left('bar')
+        ('foo', 'bar')
+        >>> WordSet.parse('').trim_left('bar')
+        ()
+        """
+        return self[1:] if self and self[0] == item else self
+
+    def trim(self, item):
+        """
+        >>> WordSet.parse('foo bar').trim('foo')
+        ('bar',)
+        """
+        return self.trim_left(item).trim_right(item)
+
+    def __getitem__(self, item):
+        result = super(WordSet, self).__getitem__(item)
+        if isinstance(item, slice):
+            result = WordSet(result)
+        return result
+
+    @classmethod
+    def parse(cls, identifier):
+        matches = cls._pattern.finditer(identifier)
+        return WordSet(match.group(0) for match in matches)
+
+    @classmethod
+    def from_class_name(cls, subject):
+        return cls.parse(subject.__class__.__name__)
+
+
+# for backward compatibility
+words = WordSet.parse
+
+
+def simple_html_strip(s):
+    r"""
+    Remove HTML from the string `s`.
+
+    >>> str(simple_html_strip(''))
+    ''
+
+    >>> print(simple_html_strip('A stormy day in paradise'))
+    A stormy day in paradise
+
+    >>> print(simple_html_strip('Somebody  tell the truth.'))
+    Somebody  tell the truth.
+
+    >>> print(simple_html_strip('What about
\nmultiple lines?')) + What about + multiple lines? + """ + html_stripper = re.compile('()|(<[^>]*>)|([^<]+)', re.DOTALL) + texts = (match.group(3) or '' for match in html_stripper.finditer(s)) + return ''.join(texts) + + +class SeparatedValues(str): + """ + A string separated by a separator. Overrides __iter__ for getting + the values. + + >>> list(SeparatedValues('a,b,c')) + ['a', 'b', 'c'] + + Whitespace is stripped and empty values are discarded. + + >>> list(SeparatedValues(' a, b , c, ')) + ['a', 'b', 'c'] + """ + + separator = ',' + + def __iter__(self): + parts = self.split(self.separator) + return filter(None, (part.strip() for part in parts)) + + +class Stripper: + r""" + Given a series of lines, find the common prefix and strip it from them. + + >>> lines = [ + ... 'abcdefg\n', + ... 'abc\n', + ... 'abcde\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix + 'abc' + >>> list(res.lines) + ['defg\n', '\n', 'de\n'] + + If no prefix is common, nothing should be stripped. + + >>> lines = [ + ... 'abcd\n', + ... '1234\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix = '' + >>> list(res.lines) + ['abcd\n', '1234\n'] + """ + + def __init__(self, prefix, lines): + self.prefix = prefix + self.lines = map(self, lines) + + @classmethod + def strip_prefix(cls, lines): + prefix_lines, lines = itertools.tee(lines) + prefix = functools.reduce(cls.common_prefix, prefix_lines) + return cls(prefix, lines) + + def __call__(self, line): + if not self.prefix: + return line + null, prefix, rest = line.partition(self.prefix) + return rest + + @staticmethod + def common_prefix(s1, s2): + """ + Return the common prefix of two lines. + """ + index = min(len(s1), len(s2)) + while s1[:index] != s2[:index]: + index -= 1 + return s1[:index] + + +def remove_prefix(text, prefix): + """ + Remove the prefix from the text if it exists. + + >>> remove_prefix('underwhelming performance', 'underwhelming ') + 'performance' + + >>> remove_prefix('something special', 'sample') + 'something special' + """ + null, prefix, rest = text.rpartition(prefix) + return rest + + +def remove_suffix(text, suffix): + """ + Remove the suffix from the text if it exists. + + >>> remove_suffix('name.git', '.git') + 'name' + + >>> remove_suffix('something special', 'sample') + 'something special' + """ + rest, suffix, null = text.partition(suffix) + return rest + + +def normalize_newlines(text): + r""" + Replace alternate newlines with the canonical newline. + + >>> normalize_newlines('Lorem Ipsum\u2029') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\r\n') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\x85') + 'Lorem Ipsum\n' + """ + newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029'] + pattern = '|'.join(newlines) + return re.sub(pattern, '\n', text) + + +def _nonblank(str): + return str and not str.startswith('#') + + +@functools.singledispatch +def yield_lines(iterable): + r""" + Yield valid lines of a string or iterable. + + >>> list(yield_lines('')) + [] + >>> list(yield_lines(['foo', 'bar'])) + ['foo', 'bar'] + >>> list(yield_lines('foo\nbar')) + ['foo', 'bar'] + >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) + ['foo', 'baz #comment'] + >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) + ['foo', 'bar', 'baz', 'bing'] + """ + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text): + return filter(_nonblank, map(str.strip, text.splitlines())) + + +def drop_comment(line): + """ + Drop comments. + + >>> drop_comment('foo # bar') + 'foo' + + A hash without a space may be in a URL. + + >>> drop_comment('http://example.com/foo#bar') + 'http://example.com/foo#bar' + """ + return line.partition(' #')[0] + + +def join_continuation(lines): + r""" + Join lines continued by a trailing backslash. + + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar \\', 'baz'])) + ['foobarbaz'] + + Not sure why, but... + The character preceeding the backslash is also elided. + + >>> list(join_continuation(['goo\\', 'dly'])) + ['godly'] + + A terrible idea, but... + If no line is available to continue, suppress the lines. + + >>> list(join_continuation(['foo', 'bar\\', 'baz\\'])) + ['foo'] + """ + lines = iter(lines) + for item in lines: + while item.endswith('\\'): + try: + item = item[:-2].strip() + next(lines) + except StopIteration: + return + yield item diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/text/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/text/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2db3af0f04604d1e49b79c6f834691f35ec9a4e4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/jaraco/text/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..19a169fc30183db91f931ad6ad04fbc0e16559b3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__init__.py @@ -0,0 +1,4 @@ +from .more import * # noqa +from .recipes import * # noqa + +__version__ = '8.8.0' diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__init__.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..96f6e36c7f4ac9ea0aebdcd9e11b8d1ff092d2ef --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__init__.pyi @@ -0,0 +1,2 @@ +from .more import * +from .recipes import * diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7d46936a7de8aa2791253391c4c4677c9da8135 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__pycache__/recipes.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__pycache__/recipes.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6897386c1ae22b7faaa6ad60340e4a316db79389 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/__pycache__/recipes.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/more.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/more.py new file mode 100644 index 0000000000000000000000000000000000000000..e6fca4d47f661ff16fdc8c2bb7ae5b86c7f347b2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/more.py @@ -0,0 +1,3824 @@ +import warnings + +from collections import Counter, defaultdict, deque, abc +from collections.abc import Sequence +from functools import partial, reduce, wraps +from heapq import merge, heapify, heapreplace, heappop +from itertools import ( + chain, + compress, + count, + cycle, + dropwhile, + groupby, + islice, + repeat, + starmap, + takewhile, + tee, + zip_longest, +) +from math import exp, factorial, floor, log +from queue import Empty, Queue +from random import random, randrange, uniform +from operator import itemgetter, mul, sub, gt, lt +from sys import hexversion, maxsize +from time import monotonic + +from .recipes import ( + consume, + flatten, + pairwise, + powerset, + take, + unique_everseen, +) + +__all__ = [ + 'AbortThread', + 'adjacent', + 'always_iterable', + 'always_reversible', + 'bucket', + 'callback_iter', + 'chunked', + 'circular_shifts', + 'collapse', + 'collate', + 'consecutive_groups', + 'consumer', + 'countable', + 'count_cycle', + 'mark_ends', + 'difference', + 'distinct_combinations', + 'distinct_permutations', + 'distribute', + 'divide', + 'exactly_n', + 'filter_except', + 'first', + 'groupby_transform', + 'ilen', + 'interleave_longest', + 'interleave', + 'intersperse', + 'islice_extended', + 'iterate', + 'ichunked', + 'is_sorted', + 'last', + 'locate', + 'lstrip', + 'make_decorator', + 'map_except', + 'map_reduce', + 'nth_or_last', + 'nth_permutation', + 'nth_product', + 'numeric_range', + 'one', + 'only', + 'padded', + 'partitions', + 'set_partitions', + 'peekable', + 'repeat_last', + 'replace', + 'rlocate', + 'rstrip', + 'run_length', + 'sample', + 'seekable', + 'SequenceView', + 'side_effect', + 'sliced', + 'sort_together', + 'split_at', + 'split_after', + 'split_before', + 'split_when', + 'split_into', + 'spy', + 'stagger', + 'strip', + 'substrings', + 'substrings_indexes', + 'time_limited', + 'unique_to_each', + 'unzip', + 'windowed', + 'with_iter', + 'UnequalIterablesError', + 'zip_equal', + 'zip_offset', + 'windowed_complete', + 'all_unique', + 'value_chain', + 'product_index', + 'combination_index', + 'permutation_index', +] + +_marker = object() + + +def chunked(iterable, n, strict=False): + """Break *iterable* into lists of length *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6], 3)) + [[1, 2, 3], [4, 5, 6]] + + By the default, the last yielded list will have fewer than *n* elements + if the length of *iterable* is not divisible by *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3)) + [[1, 2, 3], [4, 5, 6], [7, 8]] + + To use a fill-in value instead, see the :func:`grouper` recipe. + + If the length of *iterable* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + list is yielded. + + """ + iterator = iter(partial(take, n, iter(iterable)), []) + if strict: + + def ret(): + for chunk in iterator: + if len(chunk) != n: + raise ValueError('iterable is not divisible by n.') + yield chunk + + return iter(ret()) + else: + return iterator + + +def first(iterable, default=_marker): + """Return the first item of *iterable*, or *default* if *iterable* is + empty. + + >>> first([0, 1, 2, 3]) + 0 + >>> first([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + + :func:`first` is useful when you have a generator of expensive-to-retrieve + values and want any arbitrary one. It is marginally shorter than + ``next(iter(iterable), default)``. + + """ + try: + return next(iter(iterable)) + except StopIteration as e: + if default is _marker: + raise ValueError( + 'first() was called on an empty iterable, and no ' + 'default value was provided.' + ) from e + return default + + +def last(iterable, default=_marker): + """Return the last item of *iterable*, or *default* if *iterable* is + empty. + + >>> last([0, 1, 2, 3]) + 3 + >>> last([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + try: + if isinstance(iterable, Sequence): + return iterable[-1] + # Work around https://bugs.python.org/issue38525 + elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): + return next(reversed(iterable)) + else: + return deque(iterable, maxlen=1)[-1] + except (IndexError, TypeError, StopIteration): + if default is _marker: + raise ValueError( + 'last() was called on an empty iterable, and no default was ' + 'provided.' + ) + return default + + +def nth_or_last(iterable, n, default=_marker): + """Return the nth or the last item of *iterable*, + or *default* if *iterable* is empty. + + >>> nth_or_last([0, 1, 2, 3], 2) + 2 + >>> nth_or_last([0, 1], 2) + 1 + >>> nth_or_last([], 0, 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + return last(islice(iterable, n + 1), default=default) + + +class peekable: + """Wrap an iterator to allow lookahead and prepending elements. + + Call :meth:`peek` on the result to get the value that will be returned + by :func:`next`. This won't advance the iterator: + + >>> p = peekable(['a', 'b']) + >>> p.peek() + 'a' + >>> next(p) + 'a' + + Pass :meth:`peek` a default value to return that instead of raising + ``StopIteration`` when the iterator is exhausted. + + >>> p = peekable([]) + >>> p.peek('hi') + 'hi' + + peekables also offer a :meth:`prepend` method, which "inserts" items + at the head of the iterable: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> p.peek() + 11 + >>> list(p) + [11, 12, 1, 2, 3] + + peekables can be indexed. Index 0 is the item that will be returned by + :func:`next`, index 1 is the item after that, and so on: + The values up to the given index will be cached. + + >>> p = peekable(['a', 'b', 'c', 'd']) + >>> p[0] + 'a' + >>> p[1] + 'b' + >>> next(p) + 'a' + + Negative indexes are supported, but be aware that they will cache the + remaining items in the source iterator, which may require significant + storage. + + To check whether a peekable is exhausted, check its truth value: + + >>> p = peekable(['a', 'b']) + >>> if p: # peekable has items + ... list(p) + ['a', 'b'] + >>> if not p: # peekable is exhausted + ... list(p) + [] + + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self._cache = deque() + + def __iter__(self): + return self + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + """Return the item that will be next returned from ``next()``. + + Return ``default`` if there are no items left. If ``default`` is not + provided, raise ``StopIteration``. + + """ + if not self._cache: + try: + self._cache.append(next(self._it)) + except StopIteration: + if default is _marker: + raise + return default + return self._cache[0] + + def prepend(self, *items): + """Stack up items to be the next ones returned from ``next()`` or + ``self.peek()``. The items will be returned in + first in, first out order:: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> list(p) + [11, 12, 1, 2, 3] + + It is possible, by prepending items, to "resurrect" a peekable that + previously raised ``StopIteration``. + + >>> p = peekable([]) + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + >>> p.prepend(1) + >>> next(p) + 1 + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + + """ + self._cache.extendleft(reversed(items)) + + def __next__(self): + if self._cache: + return self._cache.popleft() + + return next(self._it) + + def _get_slice(self, index): + # Normalize the slice's arguments + step = 1 if (index.step is None) else index.step + if step > 0: + start = 0 if (index.start is None) else index.start + stop = maxsize if (index.stop is None) else index.stop + elif step < 0: + start = -1 if (index.start is None) else index.start + stop = (-maxsize - 1) if (index.stop is None) else index.stop + else: + raise ValueError('slice step cannot be zero') + + # If either the start or stop index is negative, we'll need to cache + # the rest of the iterable in order to slice from the right side. + if (start < 0) or (stop < 0): + self._cache.extend(self._it) + # Otherwise we'll need to find the rightmost index and cache to that + # point. + else: + n = min(max(start, stop) + 1, maxsize) + cache_len = len(self._cache) + if n >= cache_len: + self._cache.extend(islice(self._it, n - cache_len)) + + return list(self._cache)[index] + + def __getitem__(self, index): + if isinstance(index, slice): + return self._get_slice(index) + + cache_len = len(self._cache) + if index < 0: + self._cache.extend(self._it) + elif index >= cache_len: + self._cache.extend(islice(self._it, index + 1 - cache_len)) + + return self._cache[index] + + +def collate(*iterables, **kwargs): + """Return a sorted merge of the items from each of several already-sorted + *iterables*. + + >>> list(collate('ACDZ', 'AZ', 'JKL')) + ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z'] + + Works lazily, keeping only the next value from each iterable in memory. Use + :func:`collate` to, for example, perform a n-way mergesort of items that + don't fit in memory. + + If a *key* function is specified, the iterables will be sorted according + to its result: + + >>> key = lambda s: int(s) # Sort by numeric value, not by string + >>> list(collate(['1', '10'], ['2', '11'], key=key)) + ['1', '2', '10', '11'] + + + If the *iterables* are sorted in descending order, set *reverse* to + ``True``: + + >>> list(collate([5, 3, 1], [4, 2, 0], reverse=True)) + [5, 4, 3, 2, 1, 0] + + If the elements of the passed-in iterables are out of order, you might get + unexpected results. + + On Python 3.5+, this function is an alias for :func:`heapq.merge`. + + """ + warnings.warn( + "collate is no longer part of more_itertools, use heapq.merge", + DeprecationWarning, + ) + return merge(*iterables, **kwargs) + + +def consumer(func): + """Decorator that automatically advances a PEP-342-style "reverse iterator" + to its first yield point so you don't have to call ``next()`` on it + manually. + + >>> @consumer + ... def tally(): + ... i = 0 + ... while True: + ... print('Thing number %s is %s.' % (i, (yield))) + ... i += 1 + ... + >>> t = tally() + >>> t.send('red') + Thing number 0 is red. + >>> t.send('fish') + Thing number 1 is fish. + + Without the decorator, you would have to call ``next(t)`` before + ``t.send()`` could be used. + + """ + + @wraps(func) + def wrapper(*args, **kwargs): + gen = func(*args, **kwargs) + next(gen) + return gen + + return wrapper + + +def ilen(iterable): + """Return the number of items in *iterable*. + + >>> ilen(x for x in range(1000000) if x % 3 == 0) + 333334 + + This consumes the iterable, so handle with care. + + """ + # This approach was selected because benchmarks showed it's likely the + # fastest of the known implementations at the time of writing. + # See GitHub tracker: #236, #230. + counter = count() + deque(zip(iterable, counter), maxlen=0) + return next(counter) + + +def iterate(func, start): + """Return ``start``, ``func(start)``, ``func(func(start))``, ... + + >>> from itertools import islice + >>> list(islice(iterate(lambda x: 2*x, 1), 10)) + [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] + + """ + while True: + yield start + start = func(start) + + +def with_iter(context_manager): + """Wrap an iterable in a ``with`` statement, so it closes once exhausted. + + For example, this will close the file when the iterator is exhausted:: + + upper_lines = (line.upper() for line in with_iter(open('foo'))) + + Any context manager which returns an iterable is a candidate for + ``with_iter``. + + """ + with context_manager as iterable: + yield from iterable + + +def one(iterable, too_short=None, too_long=None): + """Return the first item from *iterable*, which is expected to contain only + that item. Raise an exception if *iterable* is empty or has more than one + item. + + :func:`one` is useful for ensuring that an iterable contains only one item. + For example, it can be used to retrieve the result of a database query + that is expected to return a single row. + + If *iterable* is empty, ``ValueError`` will be raised. You may specify a + different exception with the *too_short* keyword: + + >>> it = [] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (expected 1)' + >>> too_short = IndexError('too few items') + >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + IndexError: too few items + + Similarly, if *iterable* contains more than one item, ``ValueError`` will + be raised. You may specify a different exception with the *too_long* + keyword: + + >>> it = ['too', 'many'] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 'too', + 'many', and perhaps more. + >>> too_long = RuntimeError + >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + Note that :func:`one` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check iterable + contents less destructively. + + """ + it = iter(iterable) + + try: + first_value = next(it) + except StopIteration as e: + raise ( + too_short or ValueError('too few items in iterable (expected 1)') + ) from e + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def distinct_permutations(iterable, r=None): + """Yield successive distinct permutations of the elements in *iterable*. + + >>> sorted(distinct_permutations([1, 0, 1])) + [(0, 1, 1), (1, 0, 1), (1, 1, 0)] + + Equivalent to ``set(permutations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + Duplicate permutations arise when there are duplicated elements in the + input iterable. The number of items returned is + `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of + items input, and each `x_i` is the count of a distinct item in the input + sequence. + + If *r* is given, only the *r*-length permutations are yielded. + + >>> sorted(distinct_permutations([1, 0, 1], r=2)) + [(0, 1), (1, 0), (1, 1)] + >>> sorted(distinct_permutations(range(3), r=2)) + [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + + """ + # Algorithm: https://w.wiki/Qai + def _full(A): + while True: + # Yield the permutation we have + yield tuple(A) + + # Find the largest index i such that A[i] < A[i + 1] + for i in range(size - 2, -1, -1): + if A[i] < A[i + 1]: + break + # If no such index exists, this permutation is the last one + else: + return + + # Find the largest index j greater than j such that A[i] < A[j] + for j in range(size - 1, i, -1): + if A[i] < A[j]: + break + + # Swap the value of A[i] with that of A[j], then reverse the + # sequence from A[i + 1] to form the new permutation + A[i], A[j] = A[j], A[i] + A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1] + + # Algorithm: modified from the above + def _partial(A, r): + # Split A into the first r items and the last r items + head, tail = A[:r], A[r:] + right_head_indexes = range(r - 1, -1, -1) + left_tail_indexes = range(len(tail)) + + while True: + # Yield the permutation we have + yield tuple(head) + + # Starting from the right, find the first index of the head with + # value smaller than the maximum value of the tail - call it i. + pivot = tail[-1] + for i in right_head_indexes: + if head[i] < pivot: + break + pivot = head[i] + else: + return + + # Starting from the left, find the first value of the tail + # with a value greater than head[i] and swap. + for j in left_tail_indexes: + if tail[j] > head[i]: + head[i], tail[j] = tail[j], head[i] + break + # If we didn't find one, start from the right and find the first + # index of the head with a value greater than head[i] and swap. + else: + for j in right_head_indexes: + if head[j] > head[i]: + head[i], head[j] = head[j], head[i] + break + + # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)] + tail += head[: i - r : -1] # head[i + 1:][::-1] + i += 1 + head[i:], tail[:] = tail[: r - i], tail[r - i :] + + items = sorted(iterable) + + size = len(items) + if r is None: + r = size + + if 0 < r <= size: + return _full(items) if (r == size) else _partial(items, r) + + return iter(() if r else ((),)) + + +def intersperse(e, iterable, n=1): + """Intersperse filler element *e* among the items in *iterable*, leaving + *n* items between each filler element. + + >>> list(intersperse('!', [1, 2, 3, 4, 5])) + [1, '!', 2, '!', 3, '!', 4, '!', 5] + + >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2)) + [1, 2, None, 3, 4, None, 5] + + """ + if n == 0: + raise ValueError('n must be > 0') + elif n == 1: + # interleave(repeat(e), iterable) -> e, x_0, e, e, x_1, e, x_2... + # islice(..., 1, None) -> x_0, e, e, x_1, e, x_2... + return islice(interleave(repeat(e), iterable), 1, None) + else: + # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]... + # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]... + # flatten(...) -> x_0, x_1, e, x_2, x_3... + filler = repeat([e]) + chunks = chunked(iterable, n) + return flatten(islice(interleave(filler, chunks), 1, None)) + + +def unique_to_each(*iterables): + """Return the elements from each of the input iterables that aren't in the + other input iterables. + + For example, suppose you have a set of packages, each with a set of + dependencies:: + + {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}} + + If you remove one package, which dependencies can also be removed? + + If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not + associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for + ``pkg_2``, and ``D`` is only needed for ``pkg_3``:: + + >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'}) + [['A'], ['C'], ['D']] + + If there are duplicates in one input iterable that aren't in the others + they will be duplicated in the output. Input order is preserved:: + + >>> unique_to_each("mississippi", "missouri") + [['p', 'p'], ['o', 'u', 'r']] + + It is assumed that the elements of each iterable are hashable. + + """ + pool = [list(it) for it in iterables] + counts = Counter(chain.from_iterable(map(set, pool))) + uniques = {element for element in counts if counts[element] == 1} + return [list(filter(uniques.__contains__, it)) for it in pool] + + +def windowed(seq, n, fillvalue=None, step=1): + """Return a sliding window of width *n* over the given iterable. + + >>> all_windows = windowed([1, 2, 3, 4, 5], 3) + >>> list(all_windows) + [(1, 2, 3), (2, 3, 4), (3, 4, 5)] + + When the window is larger than the iterable, *fillvalue* is used in place + of missing values: + + >>> list(windowed([1, 2, 3], 4)) + [(1, 2, 3, None)] + + Each window will advance in increments of *step*: + + >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2)) + [(1, 2, 3), (3, 4, 5), (5, 6, '!')] + + To slide into the iterable's items, use :func:`chain` to add filler items + to the left: + + >>> iterable = [1, 2, 3, 4] + >>> n = 3 + >>> padding = [None] * (n - 1) + >>> list(windowed(chain(padding, iterable), 3)) + [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)] + """ + if n < 0: + raise ValueError('n must be >= 0') + if n == 0: + yield tuple() + return + if step < 1: + raise ValueError('step must be >= 1') + + window = deque(maxlen=n) + i = n + for _ in map(window.append, seq): + i -= 1 + if not i: + i = step + yield tuple(window) + + size = len(window) + if size < n: + yield tuple(chain(window, repeat(fillvalue, n - size))) + elif 0 < i < min(step, n): + window += (fillvalue,) * i + yield tuple(window) + + +def substrings(iterable): + """Yield all of the substrings of *iterable*. + + >>> [''.join(s) for s in substrings('more')] + ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more'] + + Note that non-string iterables can also be subdivided. + + >>> list(substrings([0, 1, 2])) + [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)] + + """ + # The length-1 substrings + seq = [] + for item in iter(iterable): + seq.append(item) + yield (item,) + seq = tuple(seq) + item_count = len(seq) + + # And the rest + for n in range(2, item_count + 1): + for i in range(item_count - n + 1): + yield seq[i : i + n] + + +def substrings_indexes(seq, reverse=False): + """Yield all substrings and their positions in *seq* + + The items yielded will be a tuple of the form ``(substr, i, j)``, where + ``substr == seq[i:j]``. + + This function only works for iterables that support slicing, such as + ``str`` objects. + + >>> for item in substrings_indexes('more'): + ... print(item) + ('m', 0, 1) + ('o', 1, 2) + ('r', 2, 3) + ('e', 3, 4) + ('mo', 0, 2) + ('or', 1, 3) + ('re', 2, 4) + ('mor', 0, 3) + ('ore', 1, 4) + ('more', 0, 4) + + Set *reverse* to ``True`` to yield the same items in the opposite order. + + + """ + r = range(1, len(seq) + 1) + if reverse: + r = reversed(r) + return ( + (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1) + ) + + +class bucket: + """Wrap *iterable* and return an object that buckets it iterable into + child iterables based on a *key* function. + + >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] + >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character + >>> sorted(list(s)) # Get the keys + ['a', 'b', 'c'] + >>> a_iterable = s['a'] + >>> next(a_iterable) + 'a1' + >>> next(a_iterable) + 'a2' + >>> list(s['b']) + ['b1', 'b2', 'b3'] + + The original iterable will be advanced and its items will be cached until + they are used by the child iterables. This may require significant storage. + + By default, attempting to select a bucket to which no items belong will + exhaust the iterable and cache all values. + If you specify a *validator* function, selected buckets will instead be + checked against it. + + >>> from itertools import count + >>> it = count(1, 2) # Infinite sequence of odd numbers + >>> key = lambda x: x % 10 # Bucket by last digit + >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only + >>> s = bucket(it, key=key, validator=validator) + >>> 2 in s + False + >>> list(s[2]) + [] + + """ + + def __init__(self, iterable, key, validator=None): + self._it = iter(iterable) + self._key = key + self._cache = defaultdict(deque) + self._validator = validator or (lambda x: True) + + def __contains__(self, value): + if not self._validator(value): + return False + + try: + item = next(self[value]) + except StopIteration: + return False + else: + self._cache[value].appendleft(item) + + return True + + def _get_values(self, value): + """ + Helper to yield items from the parent iterator that match *value*. + Items that don't match are stored in the local cache as they + are encountered. + """ + while True: + # If we've cached some items that match the target value, emit + # the first one and evict it from the cache. + if self._cache[value]: + yield self._cache[value].popleft() + # Otherwise we need to advance the parent iterator to search for + # a matching item, caching the rest. + else: + while True: + try: + item = next(self._it) + except StopIteration: + return + item_value = self._key(item) + if item_value == value: + yield item + break + elif self._validator(item_value): + self._cache[item_value].append(item) + + def __iter__(self): + for item in self._it: + item_value = self._key(item) + if self._validator(item_value): + self._cache[item_value].append(item) + + yield from self._cache.keys() + + def __getitem__(self, value): + if not self._validator(value): + return iter(()) + + return self._get_values(value) + + +def spy(iterable, n=1): + """Return a 2-tuple with a list containing the first *n* elements of + *iterable*, and an iterator with the same items as *iterable*. + This allows you to "look ahead" at the items in the iterable without + advancing it. + + There is one item in the list by default: + + >>> iterable = 'abcdefg' + >>> head, iterable = spy(iterable) + >>> head + ['a'] + >>> list(iterable) + ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + + You may use unpacking to retrieve items instead of lists: + + >>> (head,), iterable = spy('abcdefg') + >>> head + 'a' + >>> (first, second), iterable = spy('abcdefg', 2) + >>> first + 'a' + >>> second + 'b' + + The number of items requested can be larger than the number of items in + the iterable: + + >>> iterable = [1, 2, 3, 4, 5] + >>> head, iterable = spy(iterable, 10) + >>> head + [1, 2, 3, 4, 5] + >>> list(iterable) + [1, 2, 3, 4, 5] + + """ + it = iter(iterable) + head = take(n, it) + + return head.copy(), chain(head, it) + + +def interleave(*iterables): + """Return a new iterable yielding from each iterable in turn, + until the shortest is exhausted. + + >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7] + + For a version that doesn't terminate after the shortest iterable is + exhausted, see :func:`interleave_longest`. + + """ + return chain.from_iterable(zip(*iterables)) + + +def interleave_longest(*iterables): + """Return a new iterable yielding from each iterable in turn, + skipping any that are exhausted. + + >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7, 3, 8] + + This function produces the same output as :func:`roundrobin`, but may + perform better for some inputs (in particular when the number of iterables + is large). + + """ + i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker)) + return (x for x in i if x is not _marker) + + +def collapse(iterable, base_type=None, levels=None): + """Flatten an iterable with multiple levels of nesting (e.g., a list of + lists of tuples) into non-iterable types. + + >>> iterable = [(1, 2), ([3, 4], [[5], [6]])] + >>> list(collapse(iterable)) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and + will not be collapsed. + + To avoid collapsing other types, specify *base_type*: + + >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']] + >>> list(collapse(iterable, base_type=tuple)) + ['ab', ('cd', 'ef'), 'gh', 'ij'] + + Specify *levels* to stop flattening after a certain level: + + >>> iterable = [('a', ['b']), ('c', ['d'])] + >>> list(collapse(iterable)) # Fully flattened + ['a', 'b', 'c', 'd'] + >>> list(collapse(iterable, levels=1)) # Only one level flattened + ['a', ['b'], 'c', ['d']] + + """ + + def walk(node, level): + if ( + ((levels is not None) and (level > levels)) + or isinstance(node, (str, bytes)) + or ((base_type is not None) and isinstance(node, base_type)) + ): + yield node + return + + try: + tree = iter(node) + except TypeError: + yield node + return + else: + for child in tree: + yield from walk(child, level + 1) + + yield from walk(iterable, 0) + + +def side_effect(func, iterable, chunk_size=None, before=None, after=None): + """Invoke *func* on each item in *iterable* (or on each *chunk_size* group + of items) before yielding the item. + + `func` must be a function that takes a single argument. Its return value + will be discarded. + + *before* and *after* are optional functions that take no arguments. They + will be executed before iteration starts and after it ends, respectively. + + `side_effect` can be used for logging, updating progress bars, or anything + that is not functionally "pure." + + Emitting a status message: + + >>> from more_itertools import consume + >>> func = lambda item: print('Received {}'.format(item)) + >>> consume(side_effect(func, range(2))) + Received 0 + Received 1 + + Operating on chunks of items: + + >>> pair_sums = [] + >>> func = lambda chunk: pair_sums.append(sum(chunk)) + >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2)) + [0, 1, 2, 3, 4, 5] + >>> list(pair_sums) + [1, 5, 9] + + Writing to a file-like object: + + >>> from io import StringIO + >>> from more_itertools import consume + >>> f = StringIO() + >>> func = lambda x: print(x, file=f) + >>> before = lambda: print(u'HEADER', file=f) + >>> after = f.close + >>> it = [u'a', u'b', u'c'] + >>> consume(side_effect(func, it, before=before, after=after)) + >>> f.closed + True + + """ + try: + if before is not None: + before() + + if chunk_size is None: + for item in iterable: + func(item) + yield item + else: + for chunk in chunked(iterable, chunk_size): + func(chunk) + yield from chunk + finally: + if after is not None: + after() + + +def sliced(seq, n, strict=False): + """Yield slices of length *n* from the sequence *seq*. + + >>> list(sliced((1, 2, 3, 4, 5, 6), 3)) + [(1, 2, 3), (4, 5, 6)] + + By the default, the last yielded slice will have fewer than *n* elements + if the length of *seq* is not divisible by *n*: + + >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3)) + [(1, 2, 3), (4, 5, 6), (7, 8)] + + If the length of *seq* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + slice is yielded. + + This function will only work for iterables that support slicing. + For non-sliceable iterables, see :func:`chunked`. + + """ + iterator = takewhile(len, (seq[i : i + n] for i in count(0, n))) + if strict: + + def ret(): + for _slice in iterator: + if len(_slice) != n: + raise ValueError("seq is not divisible by n.") + yield _slice + + return iter(ret()) + else: + return iterator + + +def split_at(iterable, pred, maxsplit=-1, keep_separator=False): + """Yield lists of items from *iterable*, where each list is delimited by + an item where callable *pred* returns ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b')) + [['a'], ['c', 'd', 'c'], ['a']] + + >>> list(split_at(range(10), lambda n: n % 2 == 1)) + [[0], [2], [4], [6], [8], []] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2)) + [[0], [2], [4, 5, 6, 7, 8, 9]] + + By default, the delimiting items are not included in the output. + The include them, set *keep_separator* to ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True)) + [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item): + yield buf + if keep_separator: + yield [item] + if maxsplit == 1: + yield list(it) + return + buf = [] + maxsplit -= 1 + else: + buf.append(item) + yield buf + + +def split_before(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends just before + an item for which callable *pred* returns ``True``: + + >>> list(split_before('OneTwo', lambda s: s.isupper())) + [['O', 'n', 'e'], ['T', 'w', 'o']] + + >>> list(split_before(range(10), lambda n: n % 3 == 0)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]] + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item) and buf: + yield buf + if maxsplit == 1: + yield [item] + list(it) + return + buf = [] + maxsplit -= 1 + buf.append(item) + if buf: + yield buf + + +def split_after(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends with an + item where callable *pred* returns ``True``: + + >>> list(split_after('one1two2', lambda s: s.isdigit())) + [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']] + + >>> list(split_after(range(10), lambda n: n % 3 == 0)) + [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + buf.append(item) + if pred(item) and buf: + yield buf + if maxsplit == 1: + yield list(it) + return + buf = [] + maxsplit -= 1 + if buf: + yield buf + + +def split_when(iterable, pred, maxsplit=-1): + """Split *iterable* into pieces based on the output of *pred*. + *pred* should be a function that takes successive pairs of items and + returns ``True`` if the iterable should be split in between them. + + For example, to find runs of increasing numbers, split the iterable when + element ``i`` is larger than element ``i + 1``: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y)) + [[1, 2, 3, 3], [2, 5], [2, 4], [2]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], + ... lambda x, y: x > y, maxsplit=2)) + [[1, 2, 3, 3], [2, 5], [2, 4, 2]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + it = iter(iterable) + try: + cur_item = next(it) + except StopIteration: + return + + buf = [cur_item] + for next_item in it: + if pred(cur_item, next_item): + yield buf + if maxsplit == 1: + yield [next_item] + list(it) + return + buf = [] + maxsplit -= 1 + + buf.append(next_item) + cur_item = next_item + + yield buf + + +def split_into(iterable, sizes): + """Yield a list of sequential items from *iterable* of length 'n' for each + integer 'n' in *sizes*. + + >>> list(split_into([1,2,3,4,5,6], [1,2,3])) + [[1], [2, 3], [4, 5, 6]] + + If the sum of *sizes* is smaller than the length of *iterable*, then the + remaining items of *iterable* will not be returned. + + >>> list(split_into([1,2,3,4,5,6], [2,3])) + [[1, 2], [3, 4, 5]] + + If the sum of *sizes* is larger than the length of *iterable*, fewer items + will be returned in the iteration that overruns *iterable* and further + lists will be empty: + + >>> list(split_into([1,2,3,4], [1,2,3,4])) + [[1], [2, 3], [4], []] + + When a ``None`` object is encountered in *sizes*, the returned list will + contain items up to the end of *iterable* the same way that itertools.slice + does: + + >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None])) + [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]] + + :func:`split_into` can be useful for grouping a series of items where the + sizes of the groups are not uniform. An example would be where in a row + from a table, multiple columns represent elements of the same feature + (e.g. a point represented by x,y,z) but, the format is not the same for + all columns. + """ + # convert the iterable argument into an iterator so its contents can + # be consumed by islice in case it is a generator + it = iter(iterable) + + for size in sizes: + if size is None: + yield list(it) + return + else: + yield list(islice(it, size)) + + +def padded(iterable, fillvalue=None, n=None, next_multiple=False): + """Yield the elements from *iterable*, followed by *fillvalue*, such that + at least *n* items are emitted. + + >>> list(padded([1, 2, 3], '?', 5)) + [1, 2, 3, '?', '?'] + + If *next_multiple* is ``True``, *fillvalue* will be emitted until the + number of items emitted is a multiple of *n*:: + + >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True)) + [1, 2, 3, 4, None, None] + + If *n* is ``None``, *fillvalue* will be emitted indefinitely. + + """ + it = iter(iterable) + if n is None: + yield from chain(it, repeat(fillvalue)) + elif n < 1: + raise ValueError('n must be at least 1') + else: + item_count = 0 + for item in it: + yield item + item_count += 1 + + remaining = (n - item_count) % n if next_multiple else n - item_count + for _ in range(remaining): + yield fillvalue + + +def repeat_last(iterable, default=None): + """After the *iterable* is exhausted, keep yielding its last element. + + >>> list(islice(repeat_last(range(3)), 5)) + [0, 1, 2, 2, 2] + + If the iterable is empty, yield *default* forever:: + + >>> list(islice(repeat_last(range(0), 42), 5)) + [42, 42, 42, 42, 42] + + """ + item = _marker + for item in iterable: + yield item + final = default if item is _marker else item + yield from repeat(final) + + +def distribute(n, iterable): + """Distribute the items from *iterable* among *n* smaller iterables. + + >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 3, 5] + >>> list(group_2) + [2, 4, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 4, 7], [2, 5], [3, 6]] + + If the length of *iterable* is smaller than *n*, then the last returned + iterables will be empty: + + >>> children = distribute(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function uses :func:`itertools.tee` and may require significant + storage. If you need the order items in the smaller iterables to match the + original iterable, see :func:`divide`. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + children = tee(iterable, n) + return [islice(it, index, None, n) for index, it in enumerate(children)] + + +def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None): + """Yield tuples whose elements are offset from *iterable*. + The amount by which the `i`-th item in each tuple is offset is given by + the `i`-th item in *offsets*. + + >>> list(stagger([0, 1, 2, 3])) + [(None, 0, 1), (0, 1, 2), (1, 2, 3)] + >>> list(stagger(range(8), offsets=(0, 2, 4))) + [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)] + + By default, the sequence will end when the final element of a tuple is the + last item in the iterable. To continue until the first element of a tuple + is the last item in the iterable, set *longest* to ``True``:: + + >>> list(stagger([0, 1, 2, 3], longest=True)) + [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + children = tee(iterable, len(offsets)) + + return zip_offset( + *children, offsets=offsets, longest=longest, fillvalue=fillvalue + ) + + +class UnequalIterablesError(ValueError): + def __init__(self, details=None): + msg = 'Iterables have different lengths' + if details is not None: + msg += (': index 0 has length {}; index {} has length {}').format( + *details + ) + + super().__init__(msg) + + +def _zip_equal_generator(iterables): + for combo in zip_longest(*iterables, fillvalue=_marker): + for val in combo: + if val is _marker: + raise UnequalIterablesError() + yield combo + + +def zip_equal(*iterables): + """``zip`` the input *iterables* together, but raise + ``UnequalIterablesError`` if they aren't all the same length. + + >>> it_1 = range(3) + >>> it_2 = iter('abc') + >>> list(zip_equal(it_1, it_2)) + [(0, 'a'), (1, 'b'), (2, 'c')] + + >>> it_1 = range(3) + >>> it_2 = iter('abcd') + >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + more_itertools.more.UnequalIterablesError: Iterables have different + lengths + + """ + if hexversion >= 0x30A00A6: + warnings.warn( + ( + 'zip_equal will be removed in a future version of ' + 'more-itertools. Use the builtin zip function with ' + 'strict=True instead.' + ), + DeprecationWarning, + ) + # Check whether the iterables are all the same size. + try: + first_size = len(iterables[0]) + for i, it in enumerate(iterables[1:], 1): + size = len(it) + if size != first_size: + break + else: + # If we didn't break out, we can use the built-in zip. + return zip(*iterables) + + # If we did break out, there was a mismatch. + raise UnequalIterablesError(details=(first_size, i, size)) + # If any one of the iterables didn't have a length, start reading + # them until one runs out. + except TypeError: + return _zip_equal_generator(iterables) + + +def zip_offset(*iterables, offsets, longest=False, fillvalue=None): + """``zip`` the input *iterables* together, but offset the `i`-th iterable + by the `i`-th item in *offsets*. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1))) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')] + + This can be used as a lightweight alternative to SciPy or pandas to analyze + data sets in which some series have a lead or lag relationship. + + By default, the sequence will end when the shortest iterable is exhausted. + To continue until the longest iterable is exhausted, set *longest* to + ``True``. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True)) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + if len(iterables) != len(offsets): + raise ValueError("Number of iterables and offsets didn't match") + + staggered = [] + for it, n in zip(iterables, offsets): + if n < 0: + staggered.append(chain(repeat(fillvalue, -n), it)) + elif n > 0: + staggered.append(islice(it, n, None)) + else: + staggered.append(it) + + if longest: + return zip_longest(*staggered, fillvalue=fillvalue) + + return zip(*staggered) + + +def sort_together(iterables, key_list=(0,), key=None, reverse=False): + """Return the input iterables sorted together, with *key_list* as the + priority for sorting. All iterables are trimmed to the length of the + shortest one. + + This can be used like the sorting function in a spreadsheet. If each + iterable represents a column of data, the key list determines which + columns are used for sorting. + + By default, all iterables are sorted using the ``0``-th iterable:: + + >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')] + >>> sort_together(iterables) + [(1, 2, 3, 4), ('d', 'c', 'b', 'a')] + + Set a different key list to sort according to another iterable. + Specifying multiple keys dictates how ties are broken:: + + >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')] + >>> sort_together(iterables, key_list=(1, 2)) + [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] + + To sort by a function of the elements of the iterable, pass a *key* + function. Its arguments are the elements of the iterables corresponding to + the key list:: + + >>> names = ('a', 'b', 'c') + >>> lengths = (1, 2, 3) + >>> widths = (5, 2, 1) + >>> def area(length, width): + ... return length * width + >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) + [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] + + Set *reverse* to ``True`` to sort in descending order. + + >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) + [(3, 2, 1), ('a', 'b', 'c')] + + """ + if key is None: + # if there is no key function, the key argument to sorted is an + # itemgetter + key_argument = itemgetter(*key_list) + else: + # if there is a key function, call it with the items at the offsets + # specified by the key function as arguments + key_list = list(key_list) + if len(key_list) == 1: + # if key_list contains a single item, pass the item at that offset + # as the only argument to the key function + key_offset = key_list[0] + key_argument = lambda zipped_items: key(zipped_items[key_offset]) + else: + # if key_list contains multiple items, use itemgetter to return a + # tuple of items, which we pass as *args to the key function + get_key_items = itemgetter(*key_list) + key_argument = lambda zipped_items: key( + *get_key_items(zipped_items) + ) + + return list( + zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) + ) + + +def unzip(iterable): + """The inverse of :func:`zip`, this function disaggregates the elements + of the zipped *iterable*. + + The ``i``-th iterable contains the ``i``-th element from each element + of the zipped iterable. The first element is used to to determine the + length of the remaining elements. + + >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> letters, numbers = unzip(iterable) + >>> list(letters) + ['a', 'b', 'c', 'd'] + >>> list(numbers) + [1, 2, 3, 4] + + This is similar to using ``zip(*iterable)``, but it avoids reading + *iterable* into memory. Note, however, that this function uses + :func:`itertools.tee` and thus may require significant storage. + + """ + head, iterable = spy(iter(iterable)) + if not head: + # empty iterable, e.g. zip([], [], []) + return () + # spy returns a one-length iterable as head + head = head[0] + iterables = tee(iterable, len(head)) + + def itemgetter(i): + def getter(obj): + try: + return obj[i] + except IndexError: + # basically if we have an iterable like + # iter([(1, 2, 3), (4, 5), (6,)]) + # the second unzipped iterable would fail at the third tuple + # since it would try to access tup[1] + # same with the third unzipped iterable and the second tuple + # to support these "improperly zipped" iterables, + # we create a custom itemgetter + # which just stops the unzipped iterables + # at first length mismatch + raise StopIteration + + return getter + + return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) + + +def divide(n, iterable): + """Divide the elements from *iterable* into *n* parts, maintaining + order. + + >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 2, 3] + >>> list(group_2) + [4, 5, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 2, 3], [4, 5], [6, 7]] + + If the length of the iterable is smaller than n, then the last returned + iterables will be empty: + + >>> children = divide(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function will exhaust the iterable before returning and may require + significant storage. If order is not important, see :func:`distribute`, + which does not first pull the iterable into memory. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + try: + iterable[:0] + except TypeError: + seq = tuple(iterable) + else: + seq = iterable + + q, r = divmod(len(seq), n) + + ret = [] + stop = 0 + for i in range(1, n + 1): + start = stop + stop += q + 1 if i <= r else q + ret.append(iter(seq[start:stop])) + + return ret + + +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) + + +def adjacent(predicate, iterable, distance=1): + """Return an iterable over `(bool, item)` tuples where the `item` is + drawn from *iterable* and the `bool` indicates whether + that item satisfies the *predicate* or is adjacent to an item that does. + + For example, to find whether items are adjacent to a ``3``:: + + >>> list(adjacent(lambda x: x == 3, range(6))) + [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)] + + Set *distance* to change what counts as adjacent. For example, to find + whether items are two places away from a ``3``: + + >>> list(adjacent(lambda x: x == 3, range(6), distance=2)) + [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)] + + This is useful for contextualizing the results of a search function. + For example, a code comparison tool might want to identify lines that + have changed, but also surrounding lines to give the viewer of the diff + context. + + The predicate function will only be called once for each item in the + iterable. + + See also :func:`groupby_transform`, which can be used with this function + to group ranges of items with the same `bool` value. + + """ + # Allow distance=0 mainly for testing that it reproduces results with map() + if distance < 0: + raise ValueError('distance must be at least 0') + + i1, i2 = tee(iterable) + padding = [False] * distance + selected = chain(padding, map(predicate, i1), padding) + adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1)) + return zip(adjacent_to_selected, i2) + + +def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): + """An extension of :func:`itertools.groupby` that can apply transformations + to the grouped data. + + * *keyfunc* is a function computing a key value for each item in *iterable* + * *valuefunc* is a function that transforms the individual items from + *iterable* after grouping + * *reducefunc* is a function that transforms each group of items + + >>> iterable = 'aAAbBBcCC' + >>> keyfunc = lambda k: k.upper() + >>> valuefunc = lambda v: v.lower() + >>> reducefunc = lambda g: ''.join(g) + >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) + [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] + + Each optional argument defaults to an identity function if not specified. + + :func:`groupby_transform` is useful when grouping elements of an iterable + using a separate iterable as the key. To do this, :func:`zip` the iterables + and pass a *keyfunc* that extracts the first element and a *valuefunc* + that extracts the second element:: + + >>> from operator import itemgetter + >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3] + >>> values = 'abcdefghi' + >>> iterable = zip(keys, values) + >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1)) + >>> [(k, ''.join(g)) for k, g in grouper] + [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')] + + Note that the order of items in the iterable is significant. + Only adjacent items are grouped together, so if you don't want any + duplicate groups, you should sort the iterable by the key function. + + """ + ret = groupby(iterable, keyfunc) + if valuefunc: + ret = ((k, map(valuefunc, g)) for k, g in ret) + if reducefunc: + ret = ((k, reducefunc(g)) for k, g in ret) + + return ret + + +class numeric_range(abc.Sequence, abc.Hashable): + """An extension of the built-in ``range()`` function whose arguments can + be any orderable numeric type. + + With only *stop* specified, *start* defaults to ``0`` and *step* + defaults to ``1``. The output items will match the type of *stop*: + + >>> list(numeric_range(3.5)) + [0.0, 1.0, 2.0, 3.0] + + With only *start* and *stop* specified, *step* defaults to ``1``. The + output items will match the type of *start*: + + >>> from decimal import Decimal + >>> start = Decimal('2.1') + >>> stop = Decimal('5.1') + >>> list(numeric_range(start, stop)) + [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')] + + With *start*, *stop*, and *step* specified the output items will match + the type of ``start + step``: + + >>> from fractions import Fraction + >>> start = Fraction(1, 2) # Start at 1/2 + >>> stop = Fraction(5, 2) # End at 5/2 + >>> step = Fraction(1, 2) # Count by 1/2 + >>> list(numeric_range(start, stop, step)) + [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)] + + If *step* is zero, ``ValueError`` is raised. Negative steps are supported: + + >>> list(numeric_range(3, -1, -1.0)) + [3.0, 2.0, 1.0, 0.0] + + Be aware of the limitations of floating point numbers; the representation + of the yielded numbers may be surprising. + + ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* + is a ``datetime.timedelta`` object: + + >>> import datetime + >>> start = datetime.datetime(2019, 1, 1) + >>> stop = datetime.datetime(2019, 1, 3) + >>> step = datetime.timedelta(days=1) + >>> items = iter(numeric_range(start, stop, step)) + >>> next(items) + datetime.datetime(2019, 1, 1, 0, 0) + >>> next(items) + datetime.datetime(2019, 1, 2, 0, 0) + + """ + + _EMPTY_HASH = hash(range(0, 0)) + + def __init__(self, *args): + argc = len(args) + if argc == 1: + (self._stop,) = args + self._start = type(self._stop)(0) + self._step = type(self._stop - self._start)(1) + elif argc == 2: + self._start, self._stop = args + self._step = type(self._stop - self._start)(1) + elif argc == 3: + self._start, self._stop, self._step = args + elif argc == 0: + raise TypeError( + 'numeric_range expected at least ' + '1 argument, got {}'.format(argc) + ) + else: + raise TypeError( + 'numeric_range expected at most ' + '3 arguments, got {}'.format(argc) + ) + + self._zero = type(self._step)(0) + if self._step == self._zero: + raise ValueError('numeric_range() arg 3 must not be zero') + self._growing = self._step > self._zero + self._init_len() + + def __bool__(self): + if self._growing: + return self._start < self._stop + else: + return self._start > self._stop + + def __contains__(self, elem): + if self._growing: + if self._start <= elem < self._stop: + return (elem - self._start) % self._step == self._zero + else: + if self._start >= elem > self._stop: + return (self._start - elem) % (-self._step) == self._zero + + return False + + def __eq__(self, other): + if isinstance(other, numeric_range): + empty_self = not bool(self) + empty_other = not bool(other) + if empty_self or empty_other: + return empty_self and empty_other # True if both empty + else: + return ( + self._start == other._start + and self._step == other._step + and self._get_by_index(-1) == other._get_by_index(-1) + ) + else: + return False + + def __getitem__(self, key): + if isinstance(key, int): + return self._get_by_index(key) + elif isinstance(key, slice): + step = self._step if key.step is None else key.step * self._step + + if key.start is None or key.start <= -self._len: + start = self._start + elif key.start >= self._len: + start = self._stop + else: # -self._len < key.start < self._len + start = self._get_by_index(key.start) + + if key.stop is None or key.stop >= self._len: + stop = self._stop + elif key.stop <= -self._len: + stop = self._start + else: # -self._len < key.stop < self._len + stop = self._get_by_index(key.stop) + + return numeric_range(start, stop, step) + else: + raise TypeError( + 'numeric range indices must be ' + 'integers or slices, not {}'.format(type(key).__name__) + ) + + def __hash__(self): + if self: + return hash((self._start, self._get_by_index(-1), self._step)) + else: + return self._EMPTY_HASH + + def __iter__(self): + values = (self._start + (n * self._step) for n in count()) + if self._growing: + return takewhile(partial(gt, self._stop), values) + else: + return takewhile(partial(lt, self._stop), values) + + def __len__(self): + return self._len + + def _init_len(self): + if self._growing: + start = self._start + stop = self._stop + step = self._step + else: + start = self._stop + stop = self._start + step = -self._step + distance = stop - start + if distance <= self._zero: + self._len = 0 + else: # distance > 0 and step > 0: regular euclidean division + q, r = divmod(distance, step) + self._len = int(q) + int(r != self._zero) + + def __reduce__(self): + return numeric_range, (self._start, self._stop, self._step) + + def __repr__(self): + if self._step == 1: + return "numeric_range({}, {})".format( + repr(self._start), repr(self._stop) + ) + else: + return "numeric_range({}, {}, {})".format( + repr(self._start), repr(self._stop), repr(self._step) + ) + + def __reversed__(self): + return iter( + numeric_range( + self._get_by_index(-1), self._start - self._step, -self._step + ) + ) + + def count(self, value): + return int(value in self) + + def index(self, value): + if self._growing: + if self._start <= value < self._stop: + q, r = divmod(value - self._start, self._step) + if r == self._zero: + return int(q) + else: + if self._start >= value > self._stop: + q, r = divmod(self._start - value, -self._step) + if r == self._zero: + return int(q) + + raise ValueError("{} is not in numeric range".format(value)) + + def _get_by_index(self, i): + if i < 0: + i += self._len + if i < 0 or i >= self._len: + raise IndexError("numeric range object index out of range") + return self._start + i * self._step + + +def count_cycle(iterable, n=None): + """Cycle through the items from *iterable* up to *n* times, yielding + the number of completed cycles along with each item. If *n* is omitted the + process repeats indefinitely. + + >>> list(count_cycle('AB', 3)) + [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')] + + """ + iterable = tuple(iterable) + if not iterable: + return iter(()) + counter = count() if n is None else range(n) + return ((i, item) for i in counter for item in iterable) + + +def mark_ends(iterable): + """Yield 3-tuples of the form ``(is_first, is_last, item)``. + + >>> list(mark_ends('ABC')) + [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] + + Use this when looping over an iterable to take special action on its first + and/or last items: + + >>> iterable = ['Header', 100, 200, 'Footer'] + >>> total = 0 + >>> for is_first, is_last, item in mark_ends(iterable): + ... if is_first: + ... continue # Skip the header + ... if is_last: + ... continue # Skip the footer + ... total += item + >>> print(total) + 300 + """ + it = iter(iterable) + + try: + b = next(it) + except StopIteration: + return + + try: + for i in count(): + a = b + b = next(it) + yield i == 0, False, a + + except StopIteration: + yield i == 0, True, a + + +def locate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(locate([0, 1, 1, 0, 1, 0, 0])) + [1, 2, 4] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item. + + >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b')) + [1, 3] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(locate(iterable, pred=pred, window_size=3)) + [1, 5, 9] + + Use with :func:`seekable` to find indexes and then retrieve the associated + items: + + >>> from itertools import count + >>> from more_itertools import seekable + >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count()) + >>> it = seekable(source) + >>> pred = lambda x: x > 100 + >>> indexes = locate(it, pred=pred) + >>> i = next(indexes) + >>> it.seek(i) + >>> next(it) + 106 + + """ + if window_size is None: + return compress(count(), map(pred, iterable)) + + if window_size < 1: + raise ValueError('window size must be at least 1') + + it = windowed(iterable, window_size, fillvalue=_marker) + return compress(count(), starmap(pred, it)) + + +def lstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the beginning + for which *pred* returns ``True``. + + For example, to remove a set of items from the start of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(lstrip(iterable, pred)) + [1, 2, None, 3, False, None] + + This function is analogous to to :func:`str.lstrip`, and is essentially + an wrapper for :func:`itertools.dropwhile`. + + """ + return dropwhile(pred, iterable) + + +def rstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the end + for which *pred* returns ``True``. + + For example, to remove a set of items from the end of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(rstrip(iterable, pred)) + [None, False, None, 1, 2, None, 3] + + This function is analogous to :func:`str.rstrip`. + + """ + cache = [] + cache_append = cache.append + cache_clear = cache.clear + for x in iterable: + if pred(x): + cache_append(x) + else: + yield from cache + cache_clear() + yield x + + +def strip(iterable, pred): + """Yield the items from *iterable*, but strip any from the + beginning and end for which *pred* returns ``True``. + + For example, to remove a set of items from both ends of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(strip(iterable, pred)) + [1, 2, None, 3] + + This function is analogous to :func:`str.strip`. + + """ + return rstrip(lstrip(iterable, pred), pred) + + +class islice_extended: + """An extension of :func:`itertools.islice` that supports negative values + for *stop*, *start*, and *step*. + + >>> iterable = iter('abcdefgh') + >>> list(islice_extended(iterable, -4, -1)) + ['e', 'f', 'g'] + + Slices with negative values require some caching of *iterable*, but this + function takes care to minimize the amount of memory required. + + For example, you can use a negative step with an infinite iterator: + + >>> from itertools import count + >>> list(islice_extended(count(), 110, 99, -2)) + [110, 108, 106, 104, 102, 100] + + You can also use slice notation directly: + + >>> iterable = map(str, count()) + >>> it = islice_extended(iterable)[10:20:2] + >>> list(it) + ['10', '12', '14', '16', '18'] + + """ + + def __init__(self, iterable, *args): + it = iter(iterable) + if args: + self._iterable = _islice_helper(it, slice(*args)) + else: + self._iterable = it + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterable) + + def __getitem__(self, key): + if isinstance(key, slice): + return islice_extended(_islice_helper(self._iterable, key)) + + raise TypeError('islice_extended.__getitem__ argument must be a slice') + + +def _islice_helper(it, s): + start = s.start + stop = s.stop + if s.step == 0: + raise ValueError('step argument must be a non-zero integer or None.') + step = s.step or 1 + + if step > 0: + start = 0 if (start is None) else start + + if start < 0: + # Consume all but the last -start items + cache = deque(enumerate(it, 1), maxlen=-start) + len_iter = cache[-1][0] if cache else 0 + + # Adjust start to be positive + i = max(len_iter + start, 0) + + # Adjust stop to be positive + if stop is None: + j = len_iter + elif stop >= 0: + j = min(stop, len_iter) + else: + j = max(len_iter + stop, 0) + + # Slice the cache + n = j - i + if n <= 0: + return + + for index, item in islice(cache, 0, n, step): + yield item + elif (stop is not None) and (stop < 0): + # Advance to the start position + next(islice(it, start, start), None) + + # When stop is negative, we have to carry -stop items while + # iterating + cache = deque(islice(it, -stop), maxlen=-stop) + + for index, item in enumerate(it): + cached_item = cache.popleft() + if index % step == 0: + yield cached_item + cache.append(item) + else: + # When both start and stop are positive we have the normal case + yield from islice(it, start, stop, step) + else: + start = -1 if (start is None) else start + + if (stop is not None) and (stop < 0): + # Consume all but the last items + n = -stop - 1 + cache = deque(enumerate(it, 1), maxlen=n) + len_iter = cache[-1][0] if cache else 0 + + # If start and stop are both negative they are comparable and + # we can just slice. Otherwise we can adjust start to be negative + # and then slice. + if start < 0: + i, j = start, stop + else: + i, j = min(start - len_iter, -1), None + + for index, item in list(cache)[i:j:step]: + yield item + else: + # Advance to the stop position + if stop is not None: + m = stop + 1 + next(islice(it, m, m), None) + + # stop is positive, so if start is negative they are not comparable + # and we need the rest of the items. + if start < 0: + i = start + n = None + # stop is None and start is positive, so we just need items up to + # the start index. + elif stop is None: + i = None + n = start + 1 + # Both stop and start are positive, so they are comparable. + else: + i = None + n = start - stop + if n <= 0: + return + + cache = list(islice(it, n)) + + yield from cache[i::step] + + +def always_reversible(iterable): + """An extension of :func:`reversed` that supports all iterables, not + just those which implement the ``Reversible`` or ``Sequence`` protocols. + + >>> print(*always_reversible(x for x in range(3))) + 2 1 0 + + If the iterable is already reversible, this function returns the + result of :func:`reversed()`. If the iterable is not reversible, + this function will cache the remaining items in the iterable and + yield them in reverse order, which may require significant storage. + """ + try: + return reversed(iterable) + except TypeError: + return reversed(list(iterable)) + + +def consecutive_groups(iterable, ordering=lambda x: x): + """Yield groups of consecutive items using :func:`itertools.groupby`. + The *ordering* function determines whether two items are adjacent by + returning their position. + + By default, the ordering function is the identity function. This is + suitable for finding runs of numbers: + + >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40] + >>> for group in consecutive_groups(iterable): + ... print(list(group)) + [1] + [10, 11, 12] + [20] + [30, 31, 32, 33] + [40] + + For finding runs of adjacent letters, try using the :meth:`index` method + of a string of letters: + + >>> from string import ascii_lowercase + >>> iterable = 'abcdfgilmnop' + >>> ordering = ascii_lowercase.index + >>> for group in consecutive_groups(iterable, ordering): + ... print(list(group)) + ['a', 'b', 'c', 'd'] + ['f', 'g'] + ['i'] + ['l', 'm', 'n', 'o', 'p'] + + Each group of consecutive items is an iterator that shares it source with + *iterable*. When an an output group is advanced, the previous group is + no longer available unless its elements are copied (e.g., into a ``list``). + + >>> iterable = [1, 2, 11, 12, 21, 22] + >>> saved_groups = [] + >>> for group in consecutive_groups(iterable): + ... saved_groups.append(list(group)) # Copy group elements + >>> saved_groups + [[1, 2], [11, 12], [21, 22]] + + """ + for k, g in groupby( + enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) + ): + yield map(itemgetter(1), g) + + +def difference(iterable, func=sub, *, initial=None): + """This function is the inverse of :func:`itertools.accumulate`. By default + it will compute the first difference of *iterable* using + :func:`operator.sub`: + + >>> from itertools import accumulate + >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 + >>> list(difference(iterable)) + [0, 1, 2, 3, 4] + + *func* defaults to :func:`operator.sub`, but other functions can be + specified. They will be applied as follows:: + + A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... + + For example, to do progressive division: + + >>> iterable = [1, 2, 6, 24, 120] + >>> func = lambda x, y: x // y + >>> list(difference(iterable, func)) + [1, 2, 3, 4, 5] + + If the *initial* keyword is set, the first element will be skipped when + computing successive differences. + + >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) + >>> list(difference(it, initial=10)) + [1, 2, 3] + + """ + a, b = tee(iterable) + try: + first = [next(b)] + except StopIteration: + return iter([]) + + if initial is not None: + first = [] + + return chain(first, starmap(func, zip(b, a))) + + +class SequenceView(Sequence): + """Return a read-only view of the sequence object *target*. + + :class:`SequenceView` objects are analogous to Python's built-in + "dictionary view" types. They provide a dynamic view of a sequence's items, + meaning that when the sequence updates, so does the view. + + >>> seq = ['0', '1', '2'] + >>> view = SequenceView(seq) + >>> view + SequenceView(['0', '1', '2']) + >>> seq.append('3') + >>> view + SequenceView(['0', '1', '2', '3']) + + Sequence views support indexing, slicing, and length queries. They act + like the underlying sequence, except they don't allow assignment: + + >>> view[1] + '1' + >>> view[1:-1] + ['1', '2'] + >>> len(view) + 4 + + Sequence views are useful as an alternative to copying, as they don't + require (much) extra storage. + + """ + + def __init__(self, target): + if not isinstance(target, Sequence): + raise TypeError + self._target = target + + def __getitem__(self, index): + return self._target[index] + + def __len__(self): + return len(self._target) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, repr(self._target)) + + +class seekable: + """Wrap an iterator to allow for seeking backward and forward. This + progressively caches the items in the source iterable so they can be + re-visited. + + Call :meth:`seek` with an index to seek to that position in the source + iterable. + + To "reset" an iterator, seek to ``0``: + + >>> from itertools import count + >>> it = seekable((str(n) for n in count())) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> it.seek(0) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> next(it) + '3' + + You can also seek forward: + + >>> it = seekable((str(n) for n in range(20))) + >>> it.seek(10) + >>> next(it) + '10' + >>> it.seek(20) # Seeking past the end of the source isn't a problem + >>> list(it) + [] + >>> it.seek(0) # Resetting works even after hitting the end + >>> next(it), next(it), next(it) + ('0', '1', '2') + + Call :meth:`peek` to look ahead one item without advancing the iterator: + + >>> it = seekable('1234') + >>> it.peek() + '1' + >>> list(it) + ['1', '2', '3', '4'] + >>> it.peek(default='empty') + 'empty' + + Before the iterator is at its end, calling :func:`bool` on it will return + ``True``. After it will return ``False``: + + >>> it = seekable('5678') + >>> bool(it) + True + >>> list(it) + ['5', '6', '7', '8'] + >>> bool(it) + False + + You may view the contents of the cache with the :meth:`elements` method. + That returns a :class:`SequenceView`, a view that updates automatically: + + >>> it = seekable((str(n) for n in range(10))) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> elements = it.elements() + >>> elements + SequenceView(['0', '1', '2']) + >>> next(it) + '3' + >>> elements + SequenceView(['0', '1', '2', '3']) + + By default, the cache grows as the source iterable progresses, so beware of + wrapping very large or infinite iterables. Supply *maxlen* to limit the + size of the cache (this of course limits how far back you can seek). + + >>> from itertools import count + >>> it = seekable((str(n) for n in count()), maxlen=2) + >>> next(it), next(it), next(it), next(it) + ('0', '1', '2', '3') + >>> list(it.elements()) + ['2', '3'] + >>> it.seek(0) + >>> next(it), next(it), next(it), next(it) + ('2', '3', '4', '5') + >>> next(it) + '6' + + """ + + def __init__(self, iterable, maxlen=None): + self._source = iter(iterable) + if maxlen is None: + self._cache = [] + else: + self._cache = deque([], maxlen) + self._index = None + + def __iter__(self): + return self + + def __next__(self): + if self._index is not None: + try: + item = self._cache[self._index] + except IndexError: + self._index = None + else: + self._index += 1 + return item + + item = next(self._source) + self._cache.append(item) + return item + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + try: + peeked = next(self) + except StopIteration: + if default is _marker: + raise + return default + if self._index is None: + self._index = len(self._cache) + self._index -= 1 + return peeked + + def elements(self): + return SequenceView(self._cache) + + def seek(self, index): + self._index = index + remainder = index - len(self._cache) + if remainder > 0: + consume(self, remainder) + + +class run_length: + """ + :func:`run_length.encode` compresses an iterable with run-length encoding. + It yields groups of repeated items with the count of how many times they + were repeated: + + >>> uncompressed = 'abbcccdddd' + >>> list(run_length.encode(uncompressed)) + [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + + :func:`run_length.decode` decompresses an iterable that was previously + compressed with run-length encoding. It yields the items of the + decompressed iterable: + + >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> list(run_length.decode(compressed)) + ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] + + """ + + @staticmethod + def encode(iterable): + return ((k, ilen(g)) for k, g in groupby(iterable)) + + @staticmethod + def decode(iterable): + return chain.from_iterable(repeat(k, n) for k, n in iterable) + + +def exactly_n(iterable, n, predicate=bool): + """Return ``True`` if exactly ``n`` items in the iterable are ``True`` + according to the *predicate* function. + + >>> exactly_n([True, True, False], 2) + True + >>> exactly_n([True, True, False], 1) + False + >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3) + True + + The iterable will be advanced until ``n + 1`` truthy items are encountered, + so avoid calling it on infinite iterables. + + """ + return len(take(n + 1, filter(predicate, iterable))) == n + + +def circular_shifts(iterable): + """Return a list of circular shifts of *iterable*. + + >>> circular_shifts(range(4)) + [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] + """ + lst = list(iterable) + return take(len(lst), windowed(cycle(lst), len(lst))) + + +def make_decorator(wrapping_func, result_index=0): + """Return a decorator version of *wrapping_func*, which is a function that + modifies an iterable. *result_index* is the position in that function's + signature where the iterable goes. + + This lets you use itertools on the "production end," i.e. at function + definition. This can augment what the function returns without changing the + function's code. + + For example, to produce a decorator version of :func:`chunked`: + + >>> from more_itertools import chunked + >>> chunker = make_decorator(chunked, result_index=0) + >>> @chunker(3) + ... def iter_range(n): + ... return iter(range(n)) + ... + >>> list(iter_range(9)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + + To only allow truthy items to be returned: + + >>> truth_serum = make_decorator(filter, result_index=1) + >>> @truth_serum(bool) + ... def boolean_test(): + ... return [0, 1, '', ' ', False, True] + ... + >>> list(boolean_test()) + [1, ' ', True] + + The :func:`peekable` and :func:`seekable` wrappers make for practical + decorators: + + >>> from more_itertools import peekable + >>> peekable_function = make_decorator(peekable) + >>> @peekable_function() + ... def str_range(*args): + ... return (str(x) for x in range(*args)) + ... + >>> it = str_range(1, 20, 2) + >>> next(it), next(it), next(it) + ('1', '3', '5') + >>> it.peek() + '7' + >>> next(it) + '7' + + """ + # See https://sites.google.com/site/bbayles/index/decorator_factory for + # notes on how this works. + def decorator(*wrapping_args, **wrapping_kwargs): + def outer_wrapper(f): + def inner_wrapper(*args, **kwargs): + result = f(*args, **kwargs) + wrapping_args_ = list(wrapping_args) + wrapping_args_.insert(result_index, result) + return wrapping_func(*wrapping_args_, **wrapping_kwargs) + + return inner_wrapper + + return outer_wrapper + + return decorator + + +def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None): + """Return a dictionary that maps the items in *iterable* to categories + defined by *keyfunc*, transforms them with *valuefunc*, and + then summarizes them by category with *reducefunc*. + + *valuefunc* defaults to the identity function if it is unspecified. + If *reducefunc* is unspecified, no summarization takes place: + + >>> keyfunc = lambda x: x.upper() + >>> result = map_reduce('abbccc', keyfunc) + >>> sorted(result.items()) + [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])] + + Specifying *valuefunc* transforms the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> result = map_reduce('abbccc', keyfunc, valuefunc) + >>> sorted(result.items()) + [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])] + + Specifying *reducefunc* summarizes the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> reducefunc = sum + >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc) + >>> sorted(result.items()) + [('A', 1), ('B', 2), ('C', 3)] + + You may want to filter the input iterable before applying the map/reduce + procedure: + + >>> all_items = range(30) + >>> items = [x for x in all_items if 10 <= x <= 20] # Filter + >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1 + >>> categories = map_reduce(items, keyfunc=keyfunc) + >>> sorted(categories.items()) + [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])] + >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum) + >>> sorted(summaries.items()) + [(0, 90), (1, 75)] + + Note that all items in the iterable are gathered into a list before the + summarization step, which may require significant storage. + + The returned object is a :obj:`collections.defaultdict` with the + ``default_factory`` set to ``None``, such that it behaves like a normal + dictionary. + + """ + valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc + + ret = defaultdict(list) + for item in iterable: + key = keyfunc(item) + value = valuefunc(item) + ret[key].append(value) + + if reducefunc is not None: + for key, value_list in ret.items(): + ret[key] = reducefunc(value_list) + + ret.default_factory = None + return ret + + +def rlocate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``, starting from the right and moving left. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4 + [4, 2, 1] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item: + + >>> iterable = iter('abcb') + >>> pred = lambda x: x == 'b' + >>> list(rlocate(iterable, pred)) + [3, 1] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(rlocate(iterable, pred=pred, window_size=3)) + [9, 5, 1] + + Beware, this function won't return anything for infinite iterables. + If *iterable* is reversible, ``rlocate`` will reverse it and search from + the right. Otherwise, it will search from the left and return the results + in reverse order. + + See :func:`locate` to for other example applications. + + """ + if window_size is None: + try: + len_iter = len(iterable) + return (len_iter - i - 1 for i in locate(reversed(iterable), pred)) + except TypeError: + pass + + return reversed(list(locate(iterable, pred, window_size))) + + +def replace(iterable, pred, substitutes, count=None, window_size=1): + """Yield the items from *iterable*, replacing the items for which *pred* + returns ``True`` with the items from the iterable *substitutes*. + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1] + >>> pred = lambda x: x == 0 + >>> substitutes = (2, 3) + >>> list(replace(iterable, pred, substitutes)) + [1, 1, 2, 3, 1, 1, 2, 3, 1, 1] + + If *count* is given, the number of replacements will be limited: + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0] + >>> pred = lambda x: x == 0 + >>> substitutes = [None] + >>> list(replace(iterable, pred, substitutes, count=2)) + [1, 1, None, 1, 1, None, 1, 1, 0] + + Use *window_size* to control the number of items passed as arguments to + *pred*. This allows for locating and replacing subsequences. + + >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5] + >>> window_size = 3 + >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred + >>> substitutes = [3, 4] # Splice in these items + >>> list(replace(iterable, pred, substitutes, window_size=window_size)) + [3, 4, 5, 3, 4, 5] + + """ + if window_size < 1: + raise ValueError('window_size must be at least 1') + + # Save the substitutes iterable, since it's used more than once + substitutes = tuple(substitutes) + + # Add padding such that the number of windows matches the length of the + # iterable + it = chain(iterable, [_marker] * (window_size - 1)) + windows = windowed(it, window_size) + + n = 0 + for w in windows: + # If the current window matches our predicate (and we haven't hit + # our maximum number of replacements), splice in the substitutes + # and then consume the following windows that overlap with this one. + # For example, if the iterable is (0, 1, 2, 3, 4...) + # and the window size is 2, we have (0, 1), (1, 2), (2, 3)... + # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2) + if pred(*w): + if (count is None) or (n < count): + n += 1 + yield from substitutes + consume(windows, window_size - 1) + continue + + # If there was no match (or we've reached the replacement limit), + # yield the first item from the window. + if w and (w[0] is not _marker): + yield w[0] + + +def partitions(iterable): + """Yield all possible order-preserving partitions of *iterable*. + + >>> iterable = 'abc' + >>> for part in partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['a', 'b', 'c'] + + This is unrelated to :func:`partition`. + + """ + sequence = list(iterable) + n = len(sequence) + for i in powerset(range(1, n)): + yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))] + + +def set_partitions(iterable, k=None): + """ + Yield the set partitions of *iterable* into *k* parts. Set partitions are + not order-preserving. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable, 2): + ... print([''.join(p) for p in part]) + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + + + If *k* is not given, every set partition is generated. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + ['a', 'b', 'c'] + + """ + L = list(iterable) + n = len(L) + if k is not None: + if k < 1: + raise ValueError( + "Can't partition in a negative or zero number of groups" + ) + elif k > n: + return + + def set_partitions_helper(L, k): + n = len(L) + if k == 1: + yield [L] + elif n == k: + yield [[s] for s in L] + else: + e, *M = L + for p in set_partitions_helper(M, k - 1): + yield [[e], *p] + for p in set_partitions_helper(M, k): + for i in range(len(p)): + yield p[:i] + [[e] + p[i]] + p[i + 1 :] + + if k is None: + for k in range(1, n + 1): + yield from set_partitions_helper(L, k) + else: + yield from set_partitions_helper(L, k) + + +class time_limited: + """ + Yield items from *iterable* until *limit_seconds* have passed. + If the time limit expires before all items have been yielded, the + ``timed_out`` parameter will be set to ``True``. + + >>> from time import sleep + >>> def generator(): + ... yield 1 + ... yield 2 + ... sleep(0.2) + ... yield 3 + >>> iterable = time_limited(0.1, generator()) + >>> list(iterable) + [1, 2] + >>> iterable.timed_out + True + + Note that the time is checked before each item is yielded, and iteration + stops if the time elapsed is greater than *limit_seconds*. If your time + limit is 1 second, but it takes 2 seconds to generate the first item from + the iterable, the function will run for 2 seconds and not yield anything. + + """ + + def __init__(self, limit_seconds, iterable): + if limit_seconds < 0: + raise ValueError('limit_seconds must be positive') + self.limit_seconds = limit_seconds + self._iterable = iter(iterable) + self._start_time = monotonic() + self.timed_out = False + + def __iter__(self): + return self + + def __next__(self): + item = next(self._iterable) + if monotonic() - self._start_time > self.limit_seconds: + self.timed_out = True + raise StopIteration + + return item + + +def only(iterable, default=None, too_long=None): + """If *iterable* has only one item, return it. + If it has zero items, return *default*. + If it has more than one item, raise the exception given by *too_long*, + which is ``ValueError`` by default. + + >>> only([], default='missing') + 'missing' + >>> only([1]) + 1 + >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 1, 2, + and perhaps more.' + >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError + + Note that :func:`only` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check + iterable contents less destructively. + """ + it = iter(iterable) + first_value = next(it, default) + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def ichunked(iterable, n): + """Break *iterable* into sub-iterables with *n* elements each. + :func:`ichunked` is like :func:`chunked`, but it yields iterables + instead of lists. + + If the sub-iterables are read in order, the elements of *iterable* + won't be stored in memory. + If they are read out of order, :func:`itertools.tee` is used to cache + elements as necessary. + + >>> from itertools import count + >>> all_chunks = ichunked(count(), 4) + >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks) + >>> list(c_2) # c_1's elements have been cached; c_3's haven't been + [4, 5, 6, 7] + >>> list(c_1) + [0, 1, 2, 3] + >>> list(c_3) + [8, 9, 10, 11] + + """ + source = iter(iterable) + + while True: + # Check to see whether we're at the end of the source iterable + item = next(source, _marker) + if item is _marker: + return + + # Clone the source and yield an n-length slice + source, it = tee(chain([item], source)) + yield islice(it, n) + + # Advance the source iterable + consume(source, n) + + +def distinct_combinations(iterable, r): + """Yield the distinct combinations of *r* items taken from *iterable*. + + >>> list(distinct_combinations([0, 0, 1], 2)) + [(0, 0), (0, 1)] + + Equivalent to ``set(combinations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + """ + if r < 0: + raise ValueError('r must be non-negative') + elif r == 0: + yield () + return + pool = tuple(iterable) + generators = [unique_everseen(enumerate(pool), key=itemgetter(1))] + current_combo = [None] * r + level = 0 + while generators: + try: + cur_idx, p = next(generators[-1]) + except StopIteration: + generators.pop() + level -= 1 + continue + current_combo[level] = p + if level + 1 == r: + yield tuple(current_combo) + else: + generators.append( + unique_everseen( + enumerate(pool[cur_idx + 1 :], cur_idx + 1), + key=itemgetter(1), + ) + ) + level += 1 + + +def filter_except(validator, iterable, *exceptions): + """Yield the items from *iterable* for which the *validator* function does + not raise one of the specified *exceptions*. + + *validator* is called for each item in *iterable*. + It should be a function that accepts one argument and raises an exception + if that item is not valid. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(filter_except(int, iterable, ValueError, TypeError)) + ['1', '2', '4'] + + If an exception other than one given by *exceptions* is raised by + *validator*, it is raised like normal. + """ + for item in iterable: + try: + validator(item) + except exceptions: + pass + else: + yield item + + +def map_except(function, iterable, *exceptions): + """Transform each item from *iterable* with *function* and yield the + result, unless *function* raises one of the specified *exceptions*. + + *function* is called to transform each item in *iterable*. + It should be a accept one argument. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(map_except(int, iterable, ValueError, TypeError)) + [1, 2, 4] + + If an exception other than one given by *exceptions* is raised by + *function*, it is raised like normal. + """ + for item in iterable: + try: + yield function(item) + except exceptions: + pass + + +def _sample_unweighted(iterable, k): + # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li: + # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))". + + # Fill up the reservoir (collection of samples) with the first `k` samples + reservoir = take(k, iterable) + + # Generate random number that's the largest in a sample of k U(0,1) numbers + # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic + W = exp(log(random()) / k) + + # The number of elements to skip before changing the reservoir is a random + # number with a geometric distribution. Sample it using random() and logs. + next_index = k + floor(log(random()) / log(1 - W)) + + for index, element in enumerate(iterable, k): + + if index == next_index: + reservoir[randrange(k)] = element + # The new W is the largest in a sample of k U(0, `old_W`) numbers + W *= exp(log(random()) / k) + next_index += floor(log(random()) / log(1 - W)) + 1 + + return reservoir + + +def _sample_weighted(iterable, k, weights): + # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. : + # "Weighted random sampling with a reservoir". + + # Log-transform for numerical stability for weights that are small/large + weight_keys = (log(random()) / weight for weight in weights) + + # Fill up the reservoir (collection of samples) with the first `k` + # weight-keys and elements, then heapify the list. + reservoir = take(k, zip(weight_keys, iterable)) + heapify(reservoir) + + # The number of jumps before changing the reservoir is a random variable + # with an exponential distribution. Sample it using random() and logs. + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + + for weight, element in zip(weights, iterable): + if weight >= weights_to_skip: + # The notation here is consistent with the paper, but we store + # the weight-keys in log-space for better numerical stability. + smallest_weight_key, _ = reservoir[0] + t_w = exp(weight * smallest_weight_key) + r_2 = uniform(t_w, 1) # generate U(t_w, 1) + weight_key = log(r_2) / weight + heapreplace(reservoir, (weight_key, element)) + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + else: + weights_to_skip -= weight + + # Equivalent to [element for weight_key, element in sorted(reservoir)] + return [heappop(reservoir)[1] for _ in range(k)] + + +def sample(iterable, k, weights=None): + """Return a *k*-length list of elements chosen (without replacement) + from the *iterable*. Like :func:`random.sample`, but works on iterables + of unknown length. + + >>> iterable = range(100) + >>> sample(iterable, 5) # doctest: +SKIP + [81, 60, 96, 16, 4] + + An iterable with *weights* may also be given: + + >>> iterable = range(100) + >>> weights = (i * i + 1 for i in range(100)) + >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP + [79, 67, 74, 66, 78] + + The algorithm can also be used to generate weighted random permutations. + The relative weight of each item determines the probability that it + appears late in the permutation. + + >>> data = "abcdefgh" + >>> weights = range(1, len(data) + 1) + >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP + ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f'] + """ + if k == 0: + return [] + + iterable = iter(iterable) + if weights is None: + return _sample_unweighted(iterable, k) + else: + weights = iter(weights) + return _sample_weighted(iterable, k, weights) + + +def is_sorted(iterable, key=None, reverse=False): + """Returns ``True`` if the items of iterable are in sorted order, and + ``False`` otherwise. *key* and *reverse* have the same meaning that they do + in the built-in :func:`sorted` function. + + >>> is_sorted(['1', '2', '3', '4', '5'], key=int) + True + >>> is_sorted([5, 4, 3, 1, 2], reverse=True) + False + + The function returns ``False`` after encountering the first out-of-order + item. If there are no out-of-order items, the iterable is exhausted. + """ + + compare = lt if reverse else gt + it = iterable if (key is None) else map(key, iterable) + return not any(starmap(compare, pairwise(it))) + + +class AbortThread(BaseException): + pass + + +class callback_iter: + """Convert a function that uses callbacks to an iterator. + + Let *func* be a function that takes a `callback` keyword argument. + For example: + + >>> def func(callback=None): + ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]: + ... if callback: + ... callback(i, c) + ... return 4 + + + Use ``with callback_iter(func)`` to get an iterator over the parameters + that are delivered to the callback. + + >>> with callback_iter(func) as it: + ... for args, kwargs in it: + ... print(args) + (1, 'a') + (2, 'b') + (3, 'c') + + The function will be called in a background thread. The ``done`` property + indicates whether it has completed execution. + + >>> it.done + True + + If it completes successfully, its return value will be available + in the ``result`` property. + + >>> it.result + 4 + + Notes: + + * If the function uses some keyword argument besides ``callback``, supply + *callback_kwd*. + * If it finished executing, but raised an exception, accessing the + ``result`` property will raise the same exception. + * If it hasn't finished executing, accessing the ``result`` + property from within the ``with`` block will raise ``RuntimeError``. + * If it hasn't finished executing, accessing the ``result`` property from + outside the ``with`` block will raise a + ``more_itertools.AbortThread`` exception. + * Provide *wait_seconds* to adjust how frequently the it is polled for + output. + + """ + + def __init__(self, func, callback_kwd='callback', wait_seconds=0.1): + self._func = func + self._callback_kwd = callback_kwd + self._aborted = False + self._future = None + self._wait_seconds = wait_seconds + self._executor = __import__("concurrent.futures").futures.ThreadPoolExecutor(max_workers=1) + self._iterator = self._reader() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._aborted = True + self._executor.shutdown() + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterator) + + @property + def done(self): + if self._future is None: + return False + return self._future.done() + + @property + def result(self): + if not self.done: + raise RuntimeError('Function has not yet completed') + + return self._future.result() + + def _reader(self): + q = Queue() + + def callback(*args, **kwargs): + if self._aborted: + raise AbortThread('canceled by user') + + q.put((args, kwargs)) + + self._future = self._executor.submit( + self._func, **{self._callback_kwd: callback} + ) + + while True: + try: + item = q.get(timeout=self._wait_seconds) + except Empty: + pass + else: + q.task_done() + yield item + + if self._future.done(): + break + + remaining = [] + while True: + try: + item = q.get_nowait() + except Empty: + break + else: + q.task_done() + remaining.append(item) + q.join() + yield from remaining + + +def windowed_complete(iterable, n): + """ + Yield ``(beginning, middle, end)`` tuples, where: + + * Each ``middle`` has *n* items from *iterable* + * Each ``beginning`` has the items before the ones in ``middle`` + * Each ``end`` has the items after the ones in ``middle`` + + >>> iterable = range(7) + >>> n = 3 + >>> for beginning, middle, end in windowed_complete(iterable, n): + ... print(beginning, middle, end) + () (0, 1, 2) (3, 4, 5, 6) + (0,) (1, 2, 3) (4, 5, 6) + (0, 1) (2, 3, 4) (5, 6) + (0, 1, 2) (3, 4, 5) (6,) + (0, 1, 2, 3) (4, 5, 6) () + + Note that *n* must be at least 0 and most equal to the length of + *iterable*. + + This function will exhaust the iterable and may require significant + storage. + """ + if n < 0: + raise ValueError('n must be >= 0') + + seq = tuple(iterable) + size = len(seq) + + if n > size: + raise ValueError('n must be <= len(seq)') + + for i in range(size - n + 1): + beginning = seq[:i] + middle = seq[i : i + n] + end = seq[i + n :] + yield beginning, middle, end + + +def all_unique(iterable, key=None): + """ + Returns ``True`` if all the elements of *iterable* are unique (no two + elements are equal). + + >>> all_unique('ABCB') + False + + If a *key* function is specified, it will be used to make comparisons. + + >>> all_unique('ABCb') + True + >>> all_unique('ABCb', str.lower) + False + + The function returns as soon as the first non-unique element is + encountered. Iterables with a mix of hashable and unhashable items can + be used, but the function will be slower for unhashable items. + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + for element in map(key, iterable) if key else iterable: + try: + if element in seenset: + return False + seenset_add(element) + except TypeError: + if element in seenlist: + return False + seenlist_add(element) + return True + + +def nth_product(index, *args): + """Equivalent to ``list(product(*args))[index]``. + + The products of *args* can be ordered lexicographically. + :func:`nth_product` computes the product at sort position *index* without + computing the previous products. + + >>> nth_product(8, range(2), range(2), range(2), range(2)) + (1, 0, 0, 0) + + ``IndexError`` will be raised if the given *index* is invalid. + """ + pools = list(map(tuple, reversed(args))) + ns = list(map(len, pools)) + + c = reduce(mul, ns) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + result = [] + for pool, n in zip(pools, ns): + result.append(pool[index % n]) + index //= n + + return tuple(reversed(result)) + + +def nth_permutation(iterable, r, index): + """Equivalent to ``list(permutations(iterable, r))[index]``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`nth_permutation` + computes the subsequence at sort position *index* directly, without + computing the previous subsequences. + + >>> nth_permutation('ghijk', 2, 5) + ('h', 'i') + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = list(iterable) + n = len(pool) + + if r is None or r == n: + r, c = n, factorial(n) + elif not 0 <= r < n: + raise ValueError + else: + c = factorial(n) // factorial(n - r) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + if c == 0: + return tuple() + + result = [0] * r + q = index * factorial(n) // c if r < n else index + for d in range(1, n + 1): + q, i = divmod(q, d) + if 0 <= n - d < r: + result[n - d] = i + if q == 0: + break + + return tuple(map(pool.pop, result)) + + +def value_chain(*args): + """Yield all arguments passed to the function in the same order in which + they were passed. If an argument itself is iterable then iterate over its + values. + + >>> list(value_chain(1, 2, 3, [4, 5, 6])) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and are emitted + as-is: + + >>> list(value_chain('12', '34', ['56', '78'])) + ['12', '34', '56', '78'] + + + Multiple levels of nesting are not flattened. + + """ + for value in args: + if isinstance(value, (str, bytes)): + yield value + continue + try: + yield from value + except TypeError: + yield value + + +def product_index(element, *args): + """Equivalent to ``list(product(*args)).index(element)`` + + The products of *args* can be ordered lexicographically. + :func:`product_index` computes the first index of *element* without + computing the previous products. + + >>> product_index([8, 2], range(10), range(5)) + 42 + + ``ValueError`` will be raised if the given *element* isn't in the product + of *args*. + """ + index = 0 + + for x, pool in zip_longest(element, args, fillvalue=_marker): + if x is _marker or pool is _marker: + raise ValueError('element is not a product of args') + + pool = tuple(pool) + index = index * len(pool) + pool.index(x) + + return index + + +def combination_index(element, iterable): + """Equivalent to ``list(combinations(iterable, r)).index(element)`` + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`combination_index` computes the index of the + first *element*, without computing the previous combinations. + + >>> combination_index('adf', 'abcdefg') + 10 + + ``ValueError`` will be raised if the given *element* isn't one of the + combinations of *iterable*. + """ + element = enumerate(element) + k, y = next(element, (None, None)) + if k is None: + return 0 + + indexes = [] + pool = enumerate(iterable) + for n, x in pool: + if x == y: + indexes.append(n) + tmp, y = next(element, (None, None)) + if tmp is None: + break + else: + k = tmp + else: + raise ValueError('element is not a combination of iterable') + + n, _ = last(pool, default=(n, None)) + + # Python versiosn below 3.8 don't have math.comb + index = 1 + for i, j in enumerate(reversed(indexes), start=1): + j = n - j + if i <= j: + index += factorial(j) // (factorial(i) * factorial(j - i)) + + return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index + + +def permutation_index(element, iterable): + """Equivalent to ``list(permutations(iterable, r)).index(element)``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`permutation_index` + computes the index of the first *element* directly, without computing + the previous permutations. + + >>> permutation_index([1, 3, 2], range(5)) + 19 + + ``ValueError`` will be raised if the given *element* isn't one of the + permutations of *iterable*. + """ + index = 0 + pool = list(iterable) + for i, x in zip(range(len(pool), -1, -1), element): + r = pool.index(x) + index = index * i + r + del pool[r] + + return index + + +class countable: + """Wrap *iterable* and keep a count of how many items have been consumed. + + The ``items_seen`` attribute starts at ``0`` and increments as the iterable + is consumed: + + >>> iterable = map(str, range(10)) + >>> it = countable(iterable) + >>> it.items_seen + 0 + >>> next(it), next(it) + ('0', '1') + >>> list(it) + ['2', '3', '4', '5', '6', '7', '8', '9'] + >>> it.items_seen + 10 + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self.items_seen = 0 + + def __iter__(self): + return self + + def __next__(self): + item = next(self._it) + self.items_seen += 1 + + return item diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/more.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/more.pyi new file mode 100644 index 0000000000000000000000000000000000000000..2fba9cb300b24ec67d6fbfeedc20ca74025b11a2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/more.pyi @@ -0,0 +1,480 @@ +"""Stubs for more_itertools.more""" + +from typing import ( + Any, + Callable, + Container, + Dict, + Generic, + Hashable, + Iterable, + Iterator, + List, + Optional, + Reversible, + Sequence, + Sized, + Tuple, + Union, + TypeVar, + type_check_only, +) +from types import TracebackType +from typing_extensions import ContextManager, Protocol, Type, overload + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') +_V = TypeVar('_V') +_W = TypeVar('_W') +_T_co = TypeVar('_T_co', covariant=True) +_GenFn = TypeVar('_GenFn', bound=Callable[..., Iterator[object]]) +_Raisable = Union[BaseException, 'Type[BaseException]'] + +@type_check_only +class _SizedIterable(Protocol[_T_co], Sized, Iterable[_T_co]): ... + +@type_check_only +class _SizedReversible(Protocol[_T_co], Sized, Reversible[_T_co]): ... + +def chunked( + iterable: Iterable[_T], n: int, strict: bool = ... +) -> Iterator[List[_T]]: ... +@overload +def first(iterable: Iterable[_T]) -> _T: ... +@overload +def first(iterable: Iterable[_T], default: _U) -> Union[_T, _U]: ... +@overload +def last(iterable: Iterable[_T]) -> _T: ... +@overload +def last(iterable: Iterable[_T], default: _U) -> Union[_T, _U]: ... +@overload +def nth_or_last(iterable: Iterable[_T], n: int) -> _T: ... +@overload +def nth_or_last( + iterable: Iterable[_T], n: int, default: _U +) -> Union[_T, _U]: ... + +class peekable(Generic[_T], Iterator[_T]): + def __init__(self, iterable: Iterable[_T]) -> None: ... + def __iter__(self) -> peekable[_T]: ... + def __bool__(self) -> bool: ... + @overload + def peek(self) -> _T: ... + @overload + def peek(self, default: _U) -> Union[_T, _U]: ... + def prepend(self, *items: _T) -> None: ... + def __next__(self) -> _T: ... + @overload + def __getitem__(self, index: int) -> _T: ... + @overload + def __getitem__(self, index: slice) -> List[_T]: ... + +def collate(*iterables: Iterable[_T], **kwargs: Any) -> Iterable[_T]: ... +def consumer(func: _GenFn) -> _GenFn: ... +def ilen(iterable: Iterable[object]) -> int: ... +def iterate(func: Callable[[_T], _T], start: _T) -> Iterator[_T]: ... +def with_iter( + context_manager: ContextManager[Iterable[_T]], +) -> Iterator[_T]: ... +def one( + iterable: Iterable[_T], + too_short: Optional[_Raisable] = ..., + too_long: Optional[_Raisable] = ..., +) -> _T: ... +def distinct_permutations( + iterable: Iterable[_T], r: Optional[int] = ... +) -> Iterator[Tuple[_T, ...]]: ... +def intersperse( + e: _U, iterable: Iterable[_T], n: int = ... +) -> Iterator[Union[_T, _U]]: ... +def unique_to_each(*iterables: Iterable[_T]) -> List[List[_T]]: ... +@overload +def windowed( + seq: Iterable[_T], n: int, *, step: int = ... +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def windowed( + seq: Iterable[_T], n: int, fillvalue: _U, step: int = ... +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... +def substrings(iterable: Iterable[_T]) -> Iterator[Tuple[_T, ...]]: ... +def substrings_indexes( + seq: Sequence[_T], reverse: bool = ... +) -> Iterator[Tuple[Sequence[_T], int, int]]: ... + +class bucket(Generic[_T, _U], Container[_U]): + def __init__( + self, + iterable: Iterable[_T], + key: Callable[[_T], _U], + validator: Optional[Callable[[object], object]] = ..., + ) -> None: ... + def __contains__(self, value: object) -> bool: ... + def __iter__(self) -> Iterator[_U]: ... + def __getitem__(self, value: object) -> Iterator[_T]: ... + +def spy( + iterable: Iterable[_T], n: int = ... +) -> Tuple[List[_T], Iterator[_T]]: ... +def interleave(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def interleave_longest(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def collapse( + iterable: Iterable[Any], + base_type: Optional[type] = ..., + levels: Optional[int] = ..., +) -> Iterator[Any]: ... +@overload +def side_effect( + func: Callable[[_T], object], + iterable: Iterable[_T], + chunk_size: None = ..., + before: Optional[Callable[[], object]] = ..., + after: Optional[Callable[[], object]] = ..., +) -> Iterator[_T]: ... +@overload +def side_effect( + func: Callable[[List[_T]], object], + iterable: Iterable[_T], + chunk_size: int, + before: Optional[Callable[[], object]] = ..., + after: Optional[Callable[[], object]] = ..., +) -> Iterator[_T]: ... +def sliced( + seq: Sequence[_T], n: int, strict: bool = ... +) -> Iterator[Sequence[_T]]: ... +def split_at( + iterable: Iterable[_T], + pred: Callable[[_T], object], + maxsplit: int = ..., + keep_separator: bool = ..., +) -> Iterator[List[_T]]: ... +def split_before( + iterable: Iterable[_T], pred: Callable[[_T], object], maxsplit: int = ... +) -> Iterator[List[_T]]: ... +def split_after( + iterable: Iterable[_T], pred: Callable[[_T], object], maxsplit: int = ... +) -> Iterator[List[_T]]: ... +def split_when( + iterable: Iterable[_T], + pred: Callable[[_T, _T], object], + maxsplit: int = ..., +) -> Iterator[List[_T]]: ... +def split_into( + iterable: Iterable[_T], sizes: Iterable[Optional[int]] +) -> Iterator[List[_T]]: ... +@overload +def padded( + iterable: Iterable[_T], + *, + n: Optional[int] = ..., + next_multiple: bool = ... +) -> Iterator[Optional[_T]]: ... +@overload +def padded( + iterable: Iterable[_T], + fillvalue: _U, + n: Optional[int] = ..., + next_multiple: bool = ..., +) -> Iterator[Union[_T, _U]]: ... +@overload +def repeat_last(iterable: Iterable[_T]) -> Iterator[_T]: ... +@overload +def repeat_last( + iterable: Iterable[_T], default: _U +) -> Iterator[Union[_T, _U]]: ... +def distribute(n: int, iterable: Iterable[_T]) -> List[Iterator[_T]]: ... +@overload +def stagger( + iterable: Iterable[_T], + offsets: _SizedIterable[int] = ..., + longest: bool = ..., +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def stagger( + iterable: Iterable[_T], + offsets: _SizedIterable[int] = ..., + longest: bool = ..., + fillvalue: _U = ..., +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... + +class UnequalIterablesError(ValueError): + def __init__( + self, details: Optional[Tuple[int, int, int]] = ... + ) -> None: ... + +def zip_equal(*iterables: Iterable[_T]) -> Iterator[Tuple[_T, ...]]: ... +@overload +def zip_offset( + *iterables: Iterable[_T], offsets: _SizedIterable[int], longest: bool = ... +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def zip_offset( + *iterables: Iterable[_T], + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: _U +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... +def sort_together( + iterables: Iterable[Iterable[_T]], + key_list: Iterable[int] = ..., + key: Optional[Callable[..., Any]] = ..., + reverse: bool = ..., +) -> List[Tuple[_T, ...]]: ... +def unzip(iterable: Iterable[Sequence[_T]]) -> Tuple[Iterator[_T], ...]: ... +def divide(n: int, iterable: Iterable[_T]) -> List[Iterator[_T]]: ... +def always_iterable( + obj: object, + base_type: Union[ + type, Tuple[Union[type, Tuple[Any, ...]], ...], None + ] = ..., +) -> Iterator[Any]: ... +def adjacent( + predicate: Callable[[_T], bool], + iterable: Iterable[_T], + distance: int = ..., +) -> Iterator[Tuple[bool, _T]]: ... +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Optional[Callable[[_T], _U]] = ..., + valuefunc: Optional[Callable[[_T], _V]] = ..., + reducefunc: Optional[Callable[..., _W]] = ..., +) -> Iterator[Tuple[_T, _W]]: ... + +class numeric_range(Generic[_T, _U], Sequence[_T], Hashable, Reversible[_T]): + @overload + def __init__(self, __stop: _T) -> None: ... + @overload + def __init__(self, __start: _T, __stop: _T) -> None: ... + @overload + def __init__(self, __start: _T, __stop: _T, __step: _U) -> None: ... + def __bool__(self) -> bool: ... + def __contains__(self, elem: object) -> bool: ... + def __eq__(self, other: object) -> bool: ... + @overload + def __getitem__(self, key: int) -> _T: ... + @overload + def __getitem__(self, key: slice) -> numeric_range[_T, _U]: ... + def __hash__(self) -> int: ... + def __iter__(self) -> Iterator[_T]: ... + def __len__(self) -> int: ... + def __reduce__( + self, + ) -> Tuple[Type[numeric_range[_T, _U]], Tuple[_T, _T, _U]]: ... + def __repr__(self) -> str: ... + def __reversed__(self) -> Iterator[_T]: ... + def count(self, value: _T) -> int: ... + def index(self, value: _T) -> int: ... # type: ignore + +def count_cycle( + iterable: Iterable[_T], n: Optional[int] = ... +) -> Iterable[Tuple[int, _T]]: ... +def mark_ends( + iterable: Iterable[_T], +) -> Iterable[Tuple[bool, bool, _T]]: ... +def locate( + iterable: Iterable[object], + pred: Callable[..., Any] = ..., + window_size: Optional[int] = ..., +) -> Iterator[int]: ... +def lstrip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... +def rstrip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... +def strip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... + +class islice_extended(Generic[_T], Iterator[_T]): + def __init__( + self, iterable: Iterable[_T], *args: Optional[int] + ) -> None: ... + def __iter__(self) -> islice_extended[_T]: ... + def __next__(self) -> _T: ... + def __getitem__(self, index: slice) -> islice_extended[_T]: ... + +def always_reversible(iterable: Iterable[_T]) -> Iterator[_T]: ... +def consecutive_groups( + iterable: Iterable[_T], ordering: Callable[[_T], int] = ... +) -> Iterator[Iterator[_T]]: ... +@overload +def difference( + iterable: Iterable[_T], + func: Callable[[_T, _T], _U] = ..., + *, + initial: None = ... +) -> Iterator[Union[_T, _U]]: ... +@overload +def difference( + iterable: Iterable[_T], func: Callable[[_T, _T], _U] = ..., *, initial: _U +) -> Iterator[_U]: ... + +class SequenceView(Generic[_T], Sequence[_T]): + def __init__(self, target: Sequence[_T]) -> None: ... + @overload + def __getitem__(self, index: int) -> _T: ... + @overload + def __getitem__(self, index: slice) -> Sequence[_T]: ... + def __len__(self) -> int: ... + +class seekable(Generic[_T], Iterator[_T]): + def __init__( + self, iterable: Iterable[_T], maxlen: Optional[int] = ... + ) -> None: ... + def __iter__(self) -> seekable[_T]: ... + def __next__(self) -> _T: ... + def __bool__(self) -> bool: ... + @overload + def peek(self) -> _T: ... + @overload + def peek(self, default: _U) -> Union[_T, _U]: ... + def elements(self) -> SequenceView[_T]: ... + def seek(self, index: int) -> None: ... + +class run_length: + @staticmethod + def encode(iterable: Iterable[_T]) -> Iterator[Tuple[_T, int]]: ... + @staticmethod + def decode(iterable: Iterable[Tuple[_T, int]]) -> Iterator[_T]: ... + +def exactly_n( + iterable: Iterable[_T], n: int, predicate: Callable[[_T], object] = ... +) -> bool: ... +def circular_shifts(iterable: Iterable[_T]) -> List[Tuple[_T, ...]]: ... +def make_decorator( + wrapping_func: Callable[..., _U], result_index: int = ... +) -> Callable[..., Callable[[Callable[..., Any]], Callable[..., _U]]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None = ..., + reducefunc: None = ..., +) -> Dict[_U, List[_T]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: None = ..., +) -> Dict[_U, List[_V]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None = ..., + reducefunc: Callable[[List[_T]], _W] = ..., +) -> Dict[_U, _W]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: Callable[[List[_V]], _W], +) -> Dict[_U, _W]: ... +def rlocate( + iterable: Iterable[_T], + pred: Callable[..., object] = ..., + window_size: Optional[int] = ..., +) -> Iterator[int]: ... +def replace( + iterable: Iterable[_T], + pred: Callable[..., object], + substitutes: Iterable[_U], + count: Optional[int] = ..., + window_size: int = ..., +) -> Iterator[Union[_T, _U]]: ... +def partitions(iterable: Iterable[_T]) -> Iterator[List[List[_T]]]: ... +def set_partitions( + iterable: Iterable[_T], k: Optional[int] = ... +) -> Iterator[List[List[_T]]]: ... + +class time_limited(Generic[_T], Iterator[_T]): + def __init__( + self, limit_seconds: float, iterable: Iterable[_T] + ) -> None: ... + def __iter__(self) -> islice_extended[_T]: ... + def __next__(self) -> _T: ... + +@overload +def only( + iterable: Iterable[_T], *, too_long: Optional[_Raisable] = ... +) -> Optional[_T]: ... +@overload +def only( + iterable: Iterable[_T], default: _U, too_long: Optional[_Raisable] = ... +) -> Union[_T, _U]: ... +def ichunked(iterable: Iterable[_T], n: int) -> Iterator[Iterator[_T]]: ... +def distinct_combinations( + iterable: Iterable[_T], r: int +) -> Iterator[Tuple[_T, ...]]: ... +def filter_except( + validator: Callable[[Any], object], + iterable: Iterable[_T], + *exceptions: Type[BaseException] +) -> Iterator[_T]: ... +def map_except( + function: Callable[[Any], _U], + iterable: Iterable[_T], + *exceptions: Type[BaseException] +) -> Iterator[_U]: ... +def sample( + iterable: Iterable[_T], + k: int, + weights: Optional[Iterable[float]] = ..., +) -> List[_T]: ... +def is_sorted( + iterable: Iterable[_T], + key: Optional[Callable[[_T], _U]] = ..., + reverse: bool = False, +) -> bool: ... + +class AbortThread(BaseException): + pass + +class callback_iter(Generic[_T], Iterator[_T]): + def __init__( + self, + func: Callable[..., Any], + callback_kwd: str = ..., + wait_seconds: float = ..., + ) -> None: ... + def __enter__(self) -> callback_iter[_T]: ... + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> Optional[bool]: ... + def __iter__(self) -> callback_iter[_T]: ... + def __next__(self) -> _T: ... + def _reader(self) -> Iterator[_T]: ... + @property + def done(self) -> bool: ... + @property + def result(self) -> Any: ... + +def windowed_complete( + iterable: Iterable[_T], n: int +) -> Iterator[Tuple[_T, ...]]: ... +def all_unique( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ... +) -> bool: ... +def nth_product(index: int, *args: Iterable[_T]) -> Tuple[_T, ...]: ... +def nth_permutation( + iterable: Iterable[_T], r: int, index: int +) -> Tuple[_T, ...]: ... +def value_chain(*args: Union[_T, Iterable[_T]]) -> Iterable[_T]: ... +def product_index(element: Iterable[_T], *args: Iterable[_T]) -> int: ... +def combination_index( + element: Iterable[_T], iterable: Iterable[_T] +) -> int: ... +def permutation_index( + element: Iterable[_T], iterable: Iterable[_T] +) -> int: ... + +class countable(Generic[_T], Iterator[_T]): + def __init__(self, iterable: Iterable[_T]) -> None: ... + def __iter__(self) -> countable[_T]: ... + def __next__(self) -> _T: ... diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/recipes.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/recipes.py new file mode 100644 index 0000000000000000000000000000000000000000..521abd7c2ca633f90a5ba13a8060c5c3d0c32205 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/recipes.py @@ -0,0 +1,620 @@ +"""Imported from the recipes section of the itertools documentation. + +All functions taken from the recipes section of the itertools library docs +[1]_. +Some backward-compatible usability improvements have been made. + +.. [1] http://docs.python.org/library/itertools.html#recipes + +""" +import warnings +from collections import deque +from itertools import ( + chain, + combinations, + count, + cycle, + groupby, + islice, + repeat, + starmap, + tee, + zip_longest, +) +import operator +from random import randrange, sample, choice + +__all__ = [ + 'all_equal', + 'consume', + 'convolve', + 'dotproduct', + 'first_true', + 'flatten', + 'grouper', + 'iter_except', + 'ncycles', + 'nth', + 'nth_combination', + 'padnone', + 'pad_none', + 'pairwise', + 'partition', + 'powerset', + 'prepend', + 'quantify', + 'random_combination_with_replacement', + 'random_combination', + 'random_permutation', + 'random_product', + 'repeatfunc', + 'roundrobin', + 'tabulate', + 'tail', + 'take', + 'unique_everseen', + 'unique_justseen', +] + + +def take(n, iterable): + """Return first *n* items of the iterable as a list. + + >>> take(3, range(10)) + [0, 1, 2] + + If there are fewer than *n* items in the iterable, all of them are + returned. + + >>> take(10, range(3)) + [0, 1, 2] + + """ + return list(islice(iterable, n)) + + +def tabulate(function, start=0): + """Return an iterator over the results of ``func(start)``, + ``func(start + 1)``, ``func(start + 2)``... + + *func* should be a function that accepts one integer argument. + + If *start* is not specified it defaults to 0. It will be incremented each + time the iterator is advanced. + + >>> square = lambda x: x ** 2 + >>> iterator = tabulate(square, -3) + >>> take(4, iterator) + [9, 4, 1, 0] + + """ + return map(function, count(start)) + + +def tail(n, iterable): + """Return an iterator over the last *n* items of *iterable*. + + >>> t = tail(3, 'ABCDEFG') + >>> list(t) + ['E', 'F', 'G'] + + """ + return iter(deque(iterable, maxlen=n)) + + +def consume(iterator, n=None): + """Advance *iterable* by *n* steps. If *n* is ``None``, consume it + entirely. + + Efficiently exhausts an iterator without returning values. Defaults to + consuming the whole iterator, but an optional second argument may be + provided to limit consumption. + + >>> i = (x for x in range(10)) + >>> next(i) + 0 + >>> consume(i, 3) + >>> next(i) + 4 + >>> consume(i) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + If the iterator has fewer items remaining than the provided limit, the + whole iterator will be consumed. + + >>> i = (x for x in range(3)) + >>> consume(i, 5) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + """ + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +def nth(iterable, n, default=None): + """Returns the nth item or a default value. + + >>> l = range(10) + >>> nth(l, 3) + 3 + >>> nth(l, 20, "zebra") + 'zebra' + + """ + return next(islice(iterable, n, None), default) + + +def all_equal(iterable): + """ + Returns ``True`` if all the elements are equal to each other. + + >>> all_equal('aaaa') + True + >>> all_equal('aaab') + False + + """ + g = groupby(iterable) + return next(g, True) and not next(g, False) + + +def quantify(iterable, pred=bool): + """Return the how many times the predicate is true. + + >>> quantify([True, False, True]) + 2 + + """ + return sum(map(pred, iterable)) + + +def pad_none(iterable): + """Returns the sequence of elements and then returns ``None`` indefinitely. + + >>> take(5, pad_none(range(3))) + [0, 1, 2, None, None] + + Useful for emulating the behavior of the built-in :func:`map` function. + + See also :func:`padded`. + + """ + return chain(iterable, repeat(None)) + + +padnone = pad_none + + +def ncycles(iterable, n): + """Returns the sequence elements *n* times + + >>> list(ncycles(["a", "b"], 3)) + ['a', 'b', 'a', 'b', 'a', 'b'] + + """ + return chain.from_iterable(repeat(tuple(iterable), n)) + + +def dotproduct(vec1, vec2): + """Returns the dot product of the two iterables. + + >>> dotproduct([10, 10], [20, 20]) + 400 + + """ + return sum(map(operator.mul, vec1, vec2)) + + +def flatten(listOfLists): + """Return an iterator flattening one level of nesting in a list of lists. + + >>> list(flatten([[0, 1], [2, 3]])) + [0, 1, 2, 3] + + See also :func:`collapse`, which can flatten multiple levels of nesting. + + """ + return chain.from_iterable(listOfLists) + + +def repeatfunc(func, times=None, *args): + """Call *func* with *args* repeatedly, returning an iterable over the + results. + + If *times* is specified, the iterable will terminate after that many + repetitions: + + >>> from operator import add + >>> times = 4 + >>> args = 3, 5 + >>> list(repeatfunc(add, times, *args)) + [8, 8, 8, 8] + + If *times* is ``None`` the iterable will not terminate: + + >>> from random import randrange + >>> times = None + >>> args = 1, 11 + >>> take(6, repeatfunc(randrange, times, *args)) # doctest:+SKIP + [2, 4, 8, 1, 8, 4] + + """ + if times is None: + return starmap(func, repeat(args)) + return starmap(func, repeat(args, times)) + + +def _pairwise(iterable): + """Returns an iterator of paired items, overlapping, from the original + + >>> take(4, pairwise(count())) + [(0, 1), (1, 2), (2, 3), (3, 4)] + + On Python 3.10 and above, this is an alias for :func:`itertools.pairwise`. + + """ + a, b = tee(iterable) + next(b, None) + yield from zip(a, b) + + +try: + from itertools import pairwise as itertools_pairwise +except ImportError: + pairwise = _pairwise +else: + + def pairwise(iterable): + yield from itertools_pairwise(iterable) + + pairwise.__doc__ = _pairwise.__doc__ + + +def grouper(iterable, n, fillvalue=None): + """Collect data into fixed-length chunks or blocks. + + >>> list(grouper('ABCDEFG', 3, 'x')) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')] + + """ + if isinstance(iterable, int): + warnings.warn( + "grouper expects iterable as first parameter", DeprecationWarning + ) + n, iterable = iterable, n + args = [iter(iterable)] * n + return zip_longest(fillvalue=fillvalue, *args) + + +def roundrobin(*iterables): + """Yields an item from each iterable, alternating between them. + + >>> list(roundrobin('ABC', 'D', 'EF')) + ['A', 'D', 'E', 'B', 'F', 'C'] + + This function produces the same output as :func:`interleave_longest`, but + may perform better for some inputs (in particular when the number of + iterables is small). + + """ + # Recipe credited to George Sakkis + pending = len(iterables) + nexts = cycle(iter(it).__next__ for it in iterables) + while pending: + try: + for next in nexts: + yield next() + except StopIteration: + pending -= 1 + nexts = cycle(islice(nexts, pending)) + + +def partition(pred, iterable): + """ + Returns a 2-tuple of iterables derived from the input iterable. + The first yields the items that have ``pred(item) == False``. + The second yields the items that have ``pred(item) == True``. + + >>> is_odd = lambda x: x % 2 != 0 + >>> iterable = range(10) + >>> even_items, odd_items = partition(is_odd, iterable) + >>> list(even_items), list(odd_items) + ([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]) + + If *pred* is None, :func:`bool` is used. + + >>> iterable = [0, 1, False, True, '', ' '] + >>> false_items, true_items = partition(None, iterable) + >>> list(false_items), list(true_items) + ([0, False, ''], [1, True, ' ']) + + """ + if pred is None: + pred = bool + + evaluations = ((pred(x), x) for x in iterable) + t1, t2 = tee(evaluations) + return ( + (x for (cond, x) in t1 if not cond), + (x for (cond, x) in t2 if cond), + ) + + +def powerset(iterable): + """Yields all possible subsets of the iterable. + + >>> list(powerset([1, 2, 3])) + [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)] + + :func:`powerset` will operate on iterables that aren't :class:`set` + instances, so repeated elements in the input will produce repeated elements + in the output. Use :func:`unique_everseen` on the input to avoid generating + duplicates: + + >>> seq = [1, 1, 0] + >>> list(powerset(seq)) + [(), (1,), (1,), (0,), (1, 1), (1, 0), (1, 0), (1, 1, 0)] + >>> from more_itertools import unique_everseen + >>> list(powerset(unique_everseen(seq))) + [(), (1,), (0,), (1, 0)] + + """ + s = list(iterable) + return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) + + +def unique_everseen(iterable, key=None): + """ + Yield unique elements, preserving order. + + >>> list(unique_everseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D'] + >>> list(unique_everseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'D'] + + Sequences with a mix of hashable and unhashable items can be used. + The function will be slower (i.e., `O(n^2)`) for unhashable items. + + Remember that ``list`` objects are unhashable - you can use the *key* + parameter to transform the list to a tuple (which is hashable) to + avoid a slowdown. + + >>> iterable = ([1, 2], [2, 3], [1, 2]) + >>> list(unique_everseen(iterable)) # Slow + [[1, 2], [2, 3]] + >>> list(unique_everseen(iterable, key=tuple)) # Faster + [[1, 2], [2, 3]] + + Similary, you may want to convert unhashable ``set`` objects with + ``key=frozenset``. For ``dict`` objects, + ``key=lambda x: frozenset(x.items())`` can be used. + + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seenset: + seenset_add(k) + yield element + except TypeError: + if k not in seenlist: + seenlist_add(k) + yield element + + +def unique_justseen(iterable, key=None): + """Yields elements in order, ignoring serial duplicates + + >>> list(unique_justseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D', 'A', 'B'] + >>> list(unique_justseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'A', 'D'] + + """ + return map(next, map(operator.itemgetter(1), groupby(iterable, key))) + + +def iter_except(func, exception, first=None): + """Yields results from a function repeatedly until an exception is raised. + + Converts a call-until-exception interface to an iterator interface. + Like ``iter(func, sentinel)``, but uses an exception instead of a sentinel + to end the loop. + + >>> l = [0, 1, 2] + >>> list(iter_except(l.pop, IndexError)) + [2, 1, 0] + + """ + try: + if first is not None: + yield first() + while 1: + yield func() + except exception: + pass + + +def first_true(iterable, default=None, pred=None): + """ + Returns the first true value in the iterable. + + If no true value is found, returns *default* + + If *pred* is not None, returns the first item for which + ``pred(item) == True`` . + + >>> first_true(range(10)) + 1 + >>> first_true(range(10), pred=lambda x: x > 5) + 6 + >>> first_true(range(10), default='missing', pred=lambda x: x > 9) + 'missing' + + """ + return next(filter(pred, iterable), default) + + +def random_product(*args, repeat=1): + """Draw an item at random from each of the input iterables. + + >>> random_product('abc', range(4), 'XYZ') # doctest:+SKIP + ('c', 3, 'Z') + + If *repeat* is provided as a keyword argument, that many items will be + drawn from each iterable. + + >>> random_product('abcd', range(4), repeat=2) # doctest:+SKIP + ('a', 2, 'd', 3) + + This equivalent to taking a random selection from + ``itertools.product(*args, **kwarg)``. + + """ + pools = [tuple(pool) for pool in args] * repeat + return tuple(choice(pool) for pool in pools) + + +def random_permutation(iterable, r=None): + """Return a random *r* length permutation of the elements in *iterable*. + + If *r* is not specified or is ``None``, then *r* defaults to the length of + *iterable*. + + >>> random_permutation(range(5)) # doctest:+SKIP + (3, 4, 0, 1, 2) + + This equivalent to taking a random selection from + ``itertools.permutations(iterable, r)``. + + """ + pool = tuple(iterable) + r = len(pool) if r is None else r + return tuple(sample(pool, r)) + + +def random_combination(iterable, r): + """Return a random *r* length subsequence of the elements in *iterable*. + + >>> random_combination(range(5), 3) # doctest:+SKIP + (2, 3, 4) + + This equivalent to taking a random selection from + ``itertools.combinations(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(sample(range(n), r)) + return tuple(pool[i] for i in indices) + + +def random_combination_with_replacement(iterable, r): + """Return a random *r* length subsequence of elements in *iterable*, + allowing individual elements to be repeated. + + >>> random_combination_with_replacement(range(3), 5) # doctest:+SKIP + (0, 0, 1, 2, 2) + + This equivalent to taking a random selection from + ``itertools.combinations_with_replacement(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(randrange(n) for i in range(r)) + return tuple(pool[i] for i in indices) + + +def nth_combination(iterable, r, index): + """Equivalent to ``list(combinations(iterable, r))[index]``. + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`nth_combination` computes the subsequence at + sort position *index* directly, without computing the previous + subsequences. + + >>> nth_combination(range(5), 3, 5) + (0, 3, 4) + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = tuple(iterable) + n = len(pool) + if (r < 0) or (r > n): + raise ValueError + + c = 1 + k = min(r, n - r) + for i in range(1, k + 1): + c = c * (n - k + i) // i + + if index < 0: + index += c + + if (index < 0) or (index >= c): + raise IndexError + + result = [] + while r: + c, n, r = c * r // n, n - 1, r - 1 + while index >= c: + index -= c + c, n = c * (n - r) // n, n - 1 + result.append(pool[-1 - n]) + + return tuple(result) + + +def prepend(value, iterator): + """Yield *value*, followed by the elements in *iterator*. + + >>> value = '0' + >>> iterator = ['1', '2', '3'] + >>> list(prepend(value, iterator)) + ['0', '1', '2', '3'] + + To prepend multiple values, see :func:`itertools.chain` + or :func:`value_chain`. + + """ + return chain([value], iterator) + + +def convolve(signal, kernel): + """Convolve the iterable *signal* with the iterable *kernel*. + + >>> signal = (1, 2, 3, 4, 5) + >>> kernel = [3, 2, 1] + >>> list(convolve(signal, kernel)) + [3, 8, 14, 20, 26, 14, 5] + + Note: the input arguments are not interchangeable, as the *kernel* + is immediately consumed and stored. + + """ + kernel = tuple(kernel)[::-1] + n = len(kernel) + window = deque([0], maxlen=n) * n + for x in chain(signal, repeat(0, n - 1)): + window.append(x) + yield sum(map(operator.mul, kernel, window)) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/recipes.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/recipes.pyi new file mode 100644 index 0000000000000000000000000000000000000000..5e39d963907545f63363898c814e7b7169bcf30e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/more_itertools/recipes.pyi @@ -0,0 +1,103 @@ +"""Stubs for more_itertools.recipes""" +from typing import ( + Any, + Callable, + Iterable, + Iterator, + List, + Optional, + Tuple, + TypeVar, + Union, +) +from typing_extensions import overload, Type + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') + +def take(n: int, iterable: Iterable[_T]) -> List[_T]: ... +def tabulate( + function: Callable[[int], _T], start: int = ... +) -> Iterator[_T]: ... +def tail(n: int, iterable: Iterable[_T]) -> Iterator[_T]: ... +def consume(iterator: Iterable[object], n: Optional[int] = ...) -> None: ... +@overload +def nth(iterable: Iterable[_T], n: int) -> Optional[_T]: ... +@overload +def nth(iterable: Iterable[_T], n: int, default: _U) -> Union[_T, _U]: ... +def all_equal(iterable: Iterable[object]) -> bool: ... +def quantify( + iterable: Iterable[_T], pred: Callable[[_T], bool] = ... +) -> int: ... +def pad_none(iterable: Iterable[_T]) -> Iterator[Optional[_T]]: ... +def padnone(iterable: Iterable[_T]) -> Iterator[Optional[_T]]: ... +def ncycles(iterable: Iterable[_T], n: int) -> Iterator[_T]: ... +def dotproduct(vec1: Iterable[object], vec2: Iterable[object]) -> object: ... +def flatten(listOfLists: Iterable[Iterable[_T]]) -> Iterator[_T]: ... +def repeatfunc( + func: Callable[..., _U], times: Optional[int] = ..., *args: Any +) -> Iterator[_U]: ... +def pairwise(iterable: Iterable[_T]) -> Iterator[Tuple[_T, _T]]: ... +@overload +def grouper( + iterable: Iterable[_T], n: int +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def grouper( + iterable: Iterable[_T], n: int, fillvalue: _U +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... +@overload +def grouper( # Deprecated interface + iterable: int, n: Iterable[_T] +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def grouper( # Deprecated interface + iterable: int, n: Iterable[_T], fillvalue: _U +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... +def roundrobin(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def partition( + pred: Optional[Callable[[_T], object]], iterable: Iterable[_T] +) -> Tuple[Iterator[_T], Iterator[_T]]: ... +def powerset(iterable: Iterable[_T]) -> Iterator[Tuple[_T, ...]]: ... +def unique_everseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ... +) -> Iterator[_T]: ... +def unique_justseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], object]] = ... +) -> Iterator[_T]: ... +@overload +def iter_except( + func: Callable[[], _T], exception: Type[BaseException], first: None = ... +) -> Iterator[_T]: ... +@overload +def iter_except( + func: Callable[[], _T], + exception: Type[BaseException], + first: Callable[[], _U], +) -> Iterator[Union[_T, _U]]: ... +@overload +def first_true( + iterable: Iterable[_T], *, pred: Optional[Callable[[_T], object]] = ... +) -> Optional[_T]: ... +@overload +def first_true( + iterable: Iterable[_T], + default: _U, + pred: Optional[Callable[[_T], object]] = ..., +) -> Union[_T, _U]: ... +def random_product( + *args: Iterable[_T], repeat: int = ... +) -> Tuple[_T, ...]: ... +def random_permutation( + iterable: Iterable[_T], r: Optional[int] = ... +) -> Tuple[_T, ...]: ... +def random_combination(iterable: Iterable[_T], r: int) -> Tuple[_T, ...]: ... +def random_combination_with_replacement( + iterable: Iterable[_T], r: int +) -> Tuple[_T, ...]: ... +def nth_combination( + iterable: Iterable[_T], r: int, index: int +) -> Tuple[_T, ...]: ... +def prepend(value: _T, iterator: Iterable[_U]) -> Iterator[Union[_T, _U]]: ... +def convolve(signal: Iterable[_T], kernel: Iterable[_T]) -> Iterator[_T]: ... diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/ordered_set.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/ordered_set.py new file mode 100644 index 0000000000000000000000000000000000000000..14876000de895a609d5b9f3de39c3c8fc44ef1fc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/ordered_set.py @@ -0,0 +1,488 @@ +""" +An OrderedSet is a custom MutableSet that remembers its order, so that every +entry has an index that can be looked up. + +Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger, +and released under the MIT license. +""" +import itertools as it +from collections import deque + +try: + # Python 3 + from collections.abc import MutableSet, Sequence +except ImportError: + # Python 2.7 + from collections import MutableSet, Sequence + +SLICE_ALL = slice(None) +__version__ = "3.1" + + +def is_iterable(obj): + """ + Are we being asked to look up a list of things, instead of a single thing? + We check for the `__iter__` attribute so that this can cover types that + don't have to be known by this module, such as NumPy arrays. + + Strings, however, should be considered as atomic values to look up, not + iterables. The same goes for tuples, since they are immutable and therefore + valid entries. + + We don't need to check for the Python 2 `unicode` type, because it doesn't + have an `__iter__` attribute anyway. + """ + return ( + hasattr(obj, "__iter__") + and not isinstance(obj, str) + and not isinstance(obj, tuple) + ) + + +class OrderedSet(MutableSet, Sequence): + """ + An OrderedSet is a custom MutableSet that remembers its order, so that + every entry has an index that can be looked up. + + Example: + >>> OrderedSet([1, 1, 2, 3, 2]) + OrderedSet([1, 2, 3]) + """ + + def __init__(self, iterable=None): + self.items = [] + self.map = {} + if iterable is not None: + self |= iterable + + def __len__(self): + """ + Returns the number of unique elements in the ordered set + + Example: + >>> len(OrderedSet([])) + 0 + >>> len(OrderedSet([1, 2])) + 2 + """ + return len(self.items) + + def __getitem__(self, index): + """ + Get the item at a given index. + + If `index` is a slice, you will get back that slice of items, as a + new OrderedSet. + + If `index` is a list or a similar iterable, you'll get a list of + items corresponding to those indices. This is similar to NumPy's + "fancy indexing". The result is not an OrderedSet because you may ask + for duplicate indices, and the number of elements returned should be + the number of elements asked for. + + Example: + >>> oset = OrderedSet([1, 2, 3]) + >>> oset[1] + 2 + """ + if isinstance(index, slice) and index == SLICE_ALL: + return self.copy() + elif is_iterable(index): + return [self.items[i] for i in index] + elif hasattr(index, "__index__") or isinstance(index, slice): + result = self.items[index] + if isinstance(result, list): + return self.__class__(result) + else: + return result + else: + raise TypeError("Don't know how to index an OrderedSet by %r" % index) + + def copy(self): + """ + Return a shallow copy of this object. + + Example: + >>> this = OrderedSet([1, 2, 3]) + >>> other = this.copy() + >>> this == other + True + >>> this is other + False + """ + return self.__class__(self) + + def __getstate__(self): + if len(self) == 0: + # The state can't be an empty list. + # We need to return a truthy value, or else __setstate__ won't be run. + # + # This could have been done more gracefully by always putting the state + # in a tuple, but this way is backwards- and forwards- compatible with + # previous versions of OrderedSet. + return (None,) + else: + return list(self) + + def __setstate__(self, state): + if state == (None,): + self.__init__([]) + else: + self.__init__(state) + + def __contains__(self, key): + """ + Test if the item is in this ordered set + + Example: + >>> 1 in OrderedSet([1, 3, 2]) + True + >>> 5 in OrderedSet([1, 3, 2]) + False + """ + return key in self.map + + def add(self, key): + """ + Add `key` as an item to this OrderedSet, then return its index. + + If `key` is already in the OrderedSet, return the index it already + had. + + Example: + >>> oset = OrderedSet() + >>> oset.append(3) + 0 + >>> print(oset) + OrderedSet([3]) + """ + if key not in self.map: + self.map[key] = len(self.items) + self.items.append(key) + return self.map[key] + + append = add + + def update(self, sequence): + """ + Update the set with the given iterable sequence, then return the index + of the last element inserted. + + Example: + >>> oset = OrderedSet([1, 2, 3]) + >>> oset.update([3, 1, 5, 1, 4]) + 4 + >>> print(oset) + OrderedSet([1, 2, 3, 5, 4]) + """ + item_index = None + try: + for item in sequence: + item_index = self.add(item) + except TypeError: + raise ValueError( + "Argument needs to be an iterable, got %s" % type(sequence) + ) + return item_index + + def index(self, key): + """ + Get the index of a given entry, raising an IndexError if it's not + present. + + `key` can be an iterable of entries that is not a string, in which case + this returns a list of indices. + + Example: + >>> oset = OrderedSet([1, 2, 3]) + >>> oset.index(2) + 1 + """ + if is_iterable(key): + return [self.index(subkey) for subkey in key] + return self.map[key] + + # Provide some compatibility with pd.Index + get_loc = index + get_indexer = index + + def pop(self): + """ + Remove and return the last element from the set. + + Raises KeyError if the set is empty. + + Example: + >>> oset = OrderedSet([1, 2, 3]) + >>> oset.pop() + 3 + """ + if not self.items: + raise KeyError("Set is empty") + + elem = self.items[-1] + del self.items[-1] + del self.map[elem] + return elem + + def discard(self, key): + """ + Remove an element. Do not raise an exception if absent. + + The MutableSet mixin uses this to implement the .remove() method, which + *does* raise an error when asked to remove a non-existent item. + + Example: + >>> oset = OrderedSet([1, 2, 3]) + >>> oset.discard(2) + >>> print(oset) + OrderedSet([1, 3]) + >>> oset.discard(2) + >>> print(oset) + OrderedSet([1, 3]) + """ + if key in self: + i = self.map[key] + del self.items[i] + del self.map[key] + for k, v in self.map.items(): + if v >= i: + self.map[k] = v - 1 + + def clear(self): + """ + Remove all items from this OrderedSet. + """ + del self.items[:] + self.map.clear() + + def __iter__(self): + """ + Example: + >>> list(iter(OrderedSet([1, 2, 3]))) + [1, 2, 3] + """ + return iter(self.items) + + def __reversed__(self): + """ + Example: + >>> list(reversed(OrderedSet([1, 2, 3]))) + [3, 2, 1] + """ + return reversed(self.items) + + def __repr__(self): + if not self: + return "%s()" % (self.__class__.__name__,) + return "%s(%r)" % (self.__class__.__name__, list(self)) + + def __eq__(self, other): + """ + Returns true if the containers have the same items. If `other` is a + Sequence, then order is checked, otherwise it is ignored. + + Example: + >>> oset = OrderedSet([1, 3, 2]) + >>> oset == [1, 3, 2] + True + >>> oset == [1, 2, 3] + False + >>> oset == [2, 3] + False + >>> oset == OrderedSet([3, 2, 1]) + False + """ + # In Python 2 deque is not a Sequence, so treat it as one for + # consistent behavior with Python 3. + if isinstance(other, (Sequence, deque)): + # Check that this OrderedSet contains the same elements, in the + # same order, as the other object. + return list(self) == list(other) + try: + other_as_set = set(other) + except TypeError: + # If `other` can't be converted into a set, it's not equal. + return False + else: + return set(self) == other_as_set + + def union(self, *sets): + """ + Combines all unique items. + Each items order is defined by its first appearance. + + Example: + >>> oset = OrderedSet.union(OrderedSet([3, 1, 4, 1, 5]), [1, 3], [2, 0]) + >>> print(oset) + OrderedSet([3, 1, 4, 5, 2, 0]) + >>> oset.union([8, 9]) + OrderedSet([3, 1, 4, 5, 2, 0, 8, 9]) + >>> oset | {10} + OrderedSet([3, 1, 4, 5, 2, 0, 10]) + """ + cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet + containers = map(list, it.chain([self], sets)) + items = it.chain.from_iterable(containers) + return cls(items) + + def __and__(self, other): + # the parent implementation of this is backwards + return self.intersection(other) + + def intersection(self, *sets): + """ + Returns elements in common between all sets. Order is defined only + by the first set. + + Example: + >>> oset = OrderedSet.intersection(OrderedSet([0, 1, 2, 3]), [1, 2, 3]) + >>> print(oset) + OrderedSet([1, 2, 3]) + >>> oset.intersection([2, 4, 5], [1, 2, 3, 4]) + OrderedSet([2]) + >>> oset.intersection() + OrderedSet([1, 2, 3]) + """ + cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet + if sets: + common = set.intersection(*map(set, sets)) + items = (item for item in self if item in common) + else: + items = self + return cls(items) + + def difference(self, *sets): + """ + Returns all elements that are in this set but not the others. + + Example: + >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2])) + OrderedSet([1, 3]) + >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]), OrderedSet([3])) + OrderedSet([1]) + >>> OrderedSet([1, 2, 3]) - OrderedSet([2]) + OrderedSet([1, 3]) + >>> OrderedSet([1, 2, 3]).difference() + OrderedSet([1, 2, 3]) + """ + cls = self.__class__ + if sets: + other = set.union(*map(set, sets)) + items = (item for item in self if item not in other) + else: + items = self + return cls(items) + + def issubset(self, other): + """ + Report whether another set contains this set. + + Example: + >>> OrderedSet([1, 2, 3]).issubset({1, 2}) + False + >>> OrderedSet([1, 2, 3]).issubset({1, 2, 3, 4}) + True + >>> OrderedSet([1, 2, 3]).issubset({1, 4, 3, 5}) + False + """ + if len(self) > len(other): # Fast check for obvious cases + return False + return all(item in other for item in self) + + def issuperset(self, other): + """ + Report whether this set contains another set. + + Example: + >>> OrderedSet([1, 2]).issuperset([1, 2, 3]) + False + >>> OrderedSet([1, 2, 3, 4]).issuperset({1, 2, 3}) + True + >>> OrderedSet([1, 4, 3, 5]).issuperset({1, 2, 3}) + False + """ + if len(self) < len(other): # Fast check for obvious cases + return False + return all(item in self for item in other) + + def symmetric_difference(self, other): + """ + Return the symmetric difference of two OrderedSets as a new set. + That is, the new set will contain all elements that are in exactly + one of the sets. + + Their order will be preserved, with elements from `self` preceding + elements from `other`. + + Example: + >>> this = OrderedSet([1, 4, 3, 5, 7]) + >>> other = OrderedSet([9, 7, 1, 3, 2]) + >>> this.symmetric_difference(other) + OrderedSet([4, 5, 9, 2]) + """ + cls = self.__class__ if isinstance(self, OrderedSet) else OrderedSet + diff1 = cls(self).difference(other) + diff2 = cls(other).difference(self) + return diff1.union(diff2) + + def _update_items(self, items): + """ + Replace the 'items' list of this OrderedSet with a new one, updating + self.map accordingly. + """ + self.items = items + self.map = {item: idx for (idx, item) in enumerate(items)} + + def difference_update(self, *sets): + """ + Update this OrderedSet to remove items from one or more other sets. + + Example: + >>> this = OrderedSet([1, 2, 3]) + >>> this.difference_update(OrderedSet([2, 4])) + >>> print(this) + OrderedSet([1, 3]) + + >>> this = OrderedSet([1, 2, 3, 4, 5]) + >>> this.difference_update(OrderedSet([2, 4]), OrderedSet([1, 4, 6])) + >>> print(this) + OrderedSet([3, 5]) + """ + items_to_remove = set() + for other in sets: + items_to_remove |= set(other) + self._update_items([item for item in self.items if item not in items_to_remove]) + + def intersection_update(self, other): + """ + Update this OrderedSet to keep only items in another set, preserving + their order in this set. + + Example: + >>> this = OrderedSet([1, 4, 3, 5, 7]) + >>> other = OrderedSet([9, 7, 1, 3, 2]) + >>> this.intersection_update(other) + >>> print(this) + OrderedSet([1, 3, 7]) + """ + other = set(other) + self._update_items([item for item in self.items if item in other]) + + def symmetric_difference_update(self, other): + """ + Update this OrderedSet to remove items from another set, then + add items from the other set that were not present in this set. + + Example: + >>> this = OrderedSet([1, 4, 3, 5, 7]) + >>> other = OrderedSet([9, 7, 1, 3, 2]) + >>> this.symmetric_difference_update(other) + >>> print(this) + OrderedSet([4, 5, 9, 2]) + """ + items_to_add = [item for item in other if item not in self] + items_to_remove = set(other) + self._update_items( + [item for item in self.items if item not in items_to_remove] + items_to_add + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e7c0aa12ca950f230c8092436a985b2305702642 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__init__.py @@ -0,0 +1,15 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" + +__version__ = "24.0" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014 %s" % __author__ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49c2038abdd3fce5cb67284ad4040c2b023dc2d2 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_elffile.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_elffile.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..daaf3b2332d2229802a8c15069378e581c83a244 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_elffile.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_manylinux.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_manylinux.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff60439d26638de598d6d5893ea1b6262e8ecaef Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_manylinux.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_musllinux.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_musllinux.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..832eed77540f0ea56fc9ea2d1082322444345ae1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_musllinux.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_parser.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_parser.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4cb2bb5a22adb0e85d5d56d1228bc176592c79ea Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_parser.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_structures.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_structures.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6261082d9c30a94fca49024c07ef34a7a5e92767 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_structures.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_tokenizer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_tokenizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f0c8c65d121bd358b17df870899cbc95fbc087dc Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/_tokenizer.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/markers.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/markers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..474221cc73b34cb33715b314225645ab84de2c4e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/markers.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/metadata.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/metadata.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74d44ef15caafa304a769a0af05e6e3fa9bd8aae Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/metadata.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/requirements.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/requirements.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf55140a2067ef249fd711fa82a9a666186e6677 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/requirements.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/specifiers.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/specifiers.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a3e279db83a0fda439b4e81d2d15f6748f93ec7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/specifiers.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/tags.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/tags.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1cdde0b8afd63787326bf7347952965c96820020 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/tags.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..544c2861cb9549bd8c9979eb559d3bcc3ebec78f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/utils.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/version.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/version.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ada61b4733181e1cf333dd849525fdaf773e270 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/__pycache__/version.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_elffile.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_elffile.py new file mode 100644 index 0000000000000000000000000000000000000000..6fb19b30bb53c18f38a9ef02dd7c4478670fb962 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_manylinux.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_manylinux.py new file mode 100644 index 0000000000000000000000000000000000000000..ad62505f3ff66c3d4da07ce1f2a50d9f10bc1bdd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_manylinux.py @@ -0,0 +1,260 @@ +import collections +import contextlib +import functools +import os +import re +import sys +import warnings +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Sequence, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: + try: + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None + + +def _is_linux_armhf(executable: str) -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) + + +def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool: + if "armv7l" in archs: + return _is_linux_armhf(executable) + if "i686" in archs: + return _is_linux_i686(executable) + allowed_archs = { + "x86_64", + "aarch64", + "ppc64", + "ppc64le", + "s390x", + "loongarch64", + "riscv64", + } + return any(arch in allowed_archs for arch in archs) + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> Optional[str]: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # Should be a string like "glibc 2.17". + version_string: Optional[str] = os.confstr("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.rsplit() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> Optional[str]: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> Optional[str]: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> Tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", + RuntimeWarning, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache() +def _get_glibc_version() -> Tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate manylinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be manylinux-compatible. + + :returns: An iterator of compatible manylinux tags. + """ + if not _have_compatible_abi(sys.executable, archs): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if set(archs) & {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for arch in archs: + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(arch, glibc_version): + yield f"{tag}_{arch}" + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(arch, glibc_version): + yield f"{legacy_tag}_{arch}" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_musllinux.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_musllinux.py new file mode 100644 index 0000000000000000000000000000000000000000..86419df9d7087f3f8b6d0096f32a52c24b05e7c1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_musllinux.py @@ -0,0 +1,83 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +import functools +import re +import subprocess +import sys +from typing import Iterator, NamedTuple, Optional, Sequence + +from ._elffile import ELFFile + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> Optional[_MuslVersion]: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache() +def _get_musl_version(executable: str) -> Optional[_MuslVersion]: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for arch in archs: + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_parser.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..684df75457cb82d3683dc99ff52c5bf911f3341b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_parser.py @@ -0,0 +1,356 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not support recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: + retval = _parse_marker(tokenizer) + tokenizer.expect("END", expected="end of marker expression") + return retval + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if env_var in ("platform_python_implementation", "python_implementation"): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_structures.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_structures.py new file mode 100644 index 0000000000000000000000000000000000000000..90a6465f9682c886363eea5327dac64bf623a6ff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_structures.py @@ -0,0 +1,61 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + + +class InfinityType: + def __repr__(self) -> str: + return "Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return False + + def __le__(self, other: object) -> bool: + return False + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return True + + def __ge__(self, other: object) -> bool: + return True + + def __neg__(self: object) -> "NegativeInfinityType": + return NegativeInfinity + + +Infinity = InfinityType() + + +class NegativeInfinityType: + def __repr__(self) -> str: + return "-Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return True + + def __le__(self, other: object) -> bool: + return True + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return False + + def __ge__(self, other: object) -> bool: + return False + + def __neg__(self: object) -> InfinityType: + return Infinity + + +NegativeInfinity = NegativeInfinityType() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_tokenizer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..dd0d648d49a7c1a62d25ce5c9107aa448a8a22d1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/_tokenizer.py @@ -0,0 +1,192 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/markers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/markers.py new file mode 100644 index 0000000000000000000000000000000000000000..8b98fca7233be6dd9324cd2b6d71b6a8ac91a6cb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/markers.py @@ -0,0 +1,252 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import operator +import os +import platform +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from ._parser import ( + MarkerAtom, + MarkerList, + Op, + Value, + Variable, + parse_marker as _parse_marker, +) +from ._tokenizer import ParserSyntaxError +from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name + +__all__ = [ + "InvalidMarker", + "UndefinedComparison", + "UndefinedEnvironmentName", + "Marker", + "default_environment", +] + +Operator = Callable[[str, str], bool] + + +class InvalidMarker(ValueError): + """ + An invalid marker was found, users should refer to PEP 508. + """ + + +class UndefinedComparison(ValueError): + """ + An invalid operation was attempted on a value that doesn't support it. + """ + + +class UndefinedEnvironmentName(ValueError): + """ + A name was attempted to be used that does not exist inside of the + environment. + """ + + +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results + + +def _format_marker( + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True +) -> str: + + assert isinstance(marker, (list, tuple, str)) + + # Sometimes we have a structure like [[...]] which is a single item list + # where the single item is itself it's own list. In that case we want skip + # the rest of this function so that we don't get extraneous () on the + # outside. + if ( + isinstance(marker, list) + and len(marker) == 1 + and isinstance(marker[0], (list, tuple)) + ): + return _format_marker(marker[0]) + + if isinstance(marker, list): + inner = (_format_marker(m, first=False) for m in marker) + if first: + return " ".join(inner) + else: + return "(" + " ".join(inner) + ")" + elif isinstance(marker, tuple): + return " ".join([m.serialize() for m in marker]) + else: + return marker + + +_operators: Dict[str, Operator] = { + "in": lambda lhs, rhs: lhs in rhs, + "not in": lambda lhs, rhs: lhs not in rhs, + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, +} + + +def _eval_op(lhs: str, op: Op, rhs: str) -> bool: + try: + spec = Specifier("".join([op.serialize(), rhs])) + except InvalidSpecifier: + pass + else: + return spec.contains(lhs, prereleases=True) + + oper: Optional[Operator] = _operators.get(op.serialize()) + if oper is None: + raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.") + + return oper(lhs, rhs) + + +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) + + # other environment markers don't have such standards + return values + + +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: + groups: List[List[bool]] = [[]] + + for marker in markers: + assert isinstance(marker, (list, tuple, str)) + + if isinstance(marker, list): + groups[-1].append(_evaluate_markers(marker, environment)) + elif isinstance(marker, tuple): + lhs, op, rhs = marker + + if isinstance(lhs, Variable): + environment_key = lhs.value + lhs_value = environment[environment_key] + rhs_value = rhs.value + else: + lhs_value = lhs.value + environment_key = rhs.value + rhs_value = environment[environment_key] + + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) + groups[-1].append(_eval_op(lhs_value, op, rhs_value)) + else: + assert marker in ["and", "or"] + if marker == "or": + groups.append([]) + + return any(all(item) for item in groups) + + +def format_full_version(info: "sys._version_info") -> str: + version = "{0.major}.{0.minor}.{0.micro}".format(info) + kind = info.releaselevel + if kind != "final": + version += kind[0] + str(info.serial) + return version + + +def default_environment() -> Dict[str, str]: + iver = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name + return { + "implementation_name": implementation_name, + "implementation_version": iver, + "os_name": os.name, + "platform_machine": platform.machine(), + "platform_release": platform.release(), + "platform_system": platform.system(), + "platform_version": platform.version(), + "python_full_version": platform.python_version(), + "platform_python_implementation": platform.python_implementation(), + "python_version": ".".join(platform.python_version_tuple()[:2]), + "sys_platform": sys.platform, + } + + +class Marker: + def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. + try: + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e + + def __str__(self) -> str: + return _format_marker(self._markers) + + def __repr__(self) -> str: + return f"" + + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: + """Evaluate a marker. + + Return the boolean from evaluating the given marker against the + environment. environment is an optional argument to override all or + part of the determined environment. + + The environment is determined from the current Python process. + """ + current_environment = default_environment() + current_environment["extra"] = "" + if environment is not None: + current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if current_environment["extra"] is None: + current_environment["extra"] = "" + + return _evaluate_markers(self._markers, current_environment) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/metadata.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..fb274930799da0f8ee17566b5b587b4047282c7b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/metadata.py @@ -0,0 +1,825 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import ( + Any, + Callable, + Dict, + Generic, + List, + Optional, + Tuple, + Type, + Union, + cast, +) + +from . import requirements, specifiers, utils, version as version_module + +T = typing.TypeVar("T") +if sys.version_info[:2] >= (3, 8): # pragma: no cover + from typing import Literal, TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from typing_extensions import Literal, TypedDict + else: + try: + from typing_extensions import Literal, TypedDict + except ImportError: + + class Literal: + def __init_subclass__(*_args, **_kwargs): + pass + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +try: + ExceptionGroup +except NameError: # pragma: no cover + + class ExceptionGroup(Exception): # noqa: N818 + """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. + + If :external:exc:`ExceptionGroup` is already defined by Python itself, + that version is used instead. + """ + + message: str + exceptions: List[Exception] + + def __init__(self, message: str, exceptions: List[Exception]) -> None: + self.message = message + self.exceptions = exceptions + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" + +else: # pragma: no cover + ExceptionGroup = ExceptionGroup + + +class InvalidMetadata(ValueError): + """A metadata field contains invalid data.""" + + field: str + """The name of the field that contains invalid data.""" + + def __init__(self, field: str, message: str) -> None: + self.field = field + super().__init__(message) + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. See :class:`Metadata` whose attributes + match the keys of this dictionary. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emitting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + +_DICT_FIELDS = { + "project_urls", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} +_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) or [] + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed + + +_NOT_FOUND = object() + + +# Keep the two values in sync. +_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] +_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] + +_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) + + +class _Validator(Generic[T]): + """Validate a metadata field. + + All _process_*() methods correspond to a core metadata field. The method is + called with the field's raw value. If the raw value is valid it is returned + in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). + If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause + as appropriate). + """ + + name: str + raw_name: str + added: _MetadataVersion + + def __init__( + self, + *, + added: _MetadataVersion = "1.0", + ) -> None: + self.added = added + + def __set_name__(self, _owner: "Metadata", name: str) -> None: + self.name = name + self.raw_name = _RAW_TO_EMAIL_MAPPING[name] + + def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: + # With Python 3.8, the caching can be replaced with functools.cached_property(). + # No need to check the cache as attribute lookup will resolve into the + # instance's __dict__ before __get__ is called. + cache = instance.__dict__ + value = instance._raw.get(self.name) + + # To make the _process_* methods easier, we'll check if the value is None + # and if this field is NOT a required attribute, and if both of those + # things are true, we'll skip the the converter. This will mean that the + # converters never have to deal with the None union. + if self.name in _REQUIRED_ATTRS or value is not None: + try: + converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") + except AttributeError: + pass + else: + value = converter(value) + + cache[self.name] = value + try: + del instance._raw[self.name] # type: ignore[misc] + except KeyError: + pass + + return cast(T, value) + + def _invalid_metadata( + self, msg: str, cause: Optional[Exception] = None + ) -> InvalidMetadata: + exc = InvalidMetadata( + self.raw_name, msg.format_map({"field": repr(self.raw_name)}) + ) + exc.__cause__ = cause + return exc + + def _process_metadata_version(self, value: str) -> _MetadataVersion: + # Implicitly makes Metadata-Version required. + if value not in _VALID_METADATA_VERSIONS: + raise self._invalid_metadata(f"{value!r} is not a valid metadata version") + return cast(_MetadataVersion, value) + + def _process_name(self, value: str) -> str: + if not value: + raise self._invalid_metadata("{field} is a required field") + # Validate the name as a side-effect. + try: + utils.canonicalize_name(value, validate=True) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + else: + return value + + def _process_version(self, value: str) -> version_module.Version: + if not value: + raise self._invalid_metadata("{field} is a required field") + try: + return version_module.parse(value) + except version_module.InvalidVersion as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_summary(self, value: str) -> str: + """Check the field contains no newlines.""" + if "\n" in value: + raise self._invalid_metadata("{field} must be a single line") + return value + + def _process_description_content_type(self, value: str) -> str: + content_types = {"text/plain", "text/x-rst", "text/markdown"} + message = email.message.EmailMessage() + message["content-type"] = value + + content_type, parameters = ( + # Defaults to `text/plain` if parsing failed. + message.get_content_type().lower(), + message["content-type"].params, + ) + # Check if content-type is valid or defaulted to `text/plain` and thus was + # not parseable. + if content_type not in content_types or content_type not in value.lower(): + raise self._invalid_metadata( + f"{{field}} must be one of {list(content_types)}, not {value!r}" + ) + + charset = parameters.get("charset", "UTF-8") + if charset != "UTF-8": + raise self._invalid_metadata( + f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" + ) + + markdown_variants = {"GFM", "CommonMark"} + variant = parameters.get("variant", "GFM") # Use an acceptable default. + if content_type == "text/markdown" and variant not in markdown_variants: + raise self._invalid_metadata( + f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " + f"not {variant!r}", + ) + return value + + def _process_dynamic(self, value: List[str]) -> List[str]: + for dynamic_field in map(str.lower, value): + if dynamic_field in {"name", "version", "metadata-version"}: + raise self._invalid_metadata( + f"{value!r} is not allowed as a dynamic field" + ) + elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: + raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") + return list(map(str.lower, value)) + + def _process_provides_extra( + self, + value: List[str], + ) -> List[utils.NormalizedName]: + normalized_names = [] + try: + for name in value: + normalized_names.append(utils.canonicalize_name(name, validate=True)) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{name!r} is invalid for {{field}}", cause=exc + ) + else: + return normalized_names + + def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: + try: + return specifiers.SpecifierSet(value) + except specifiers.InvalidSpecifier as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_requires_dist( + self, + value: List[str], + ) -> List[requirements.Requirement]: + reqs = [] + try: + for req in value: + reqs.append(requirements.Requirement(req)) + except requirements.InvalidRequirement as exc: + raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) + else: + return reqs + + +class Metadata: + """Representation of distribution metadata. + + Compared to :class:`RawMetadata`, this class provides objects representing + metadata fields instead of only using built-in types. Any invalid metadata + will cause :exc:`InvalidMetadata` to be raised (with a + :py:attr:`~BaseException.__cause__` attribute as appropriate). + """ + + _raw: RawMetadata + + @classmethod + def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": + """Create an instance from :class:`RawMetadata`. + + If *validate* is true, all metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + ins = cls() + ins._raw = data.copy() # Mutations occur due to caching enriched values. + + if validate: + exceptions: List[Exception] = [] + try: + metadata_version = ins.metadata_version + metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) + except InvalidMetadata as metadata_version_exc: + exceptions.append(metadata_version_exc) + metadata_version = None + + # Make sure to check for the fields that are present, the required + # fields (so their absence can be reported). + fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS + # Remove fields that have already been checked. + fields_to_check -= {"metadata_version"} + + for key in fields_to_check: + try: + if metadata_version: + # Can't use getattr() as that triggers descriptor protocol which + # will fail due to no value for the instance argument. + try: + field_metadata_version = cls.__dict__[key].added + except KeyError: + exc = InvalidMetadata(key, f"unrecognized field: {key!r}") + exceptions.append(exc) + continue + field_age = _VALID_METADATA_VERSIONS.index( + field_metadata_version + ) + if field_age > metadata_age: + field = _RAW_TO_EMAIL_MAPPING[key] + exc = InvalidMetadata( + field, + "{field} introduced in metadata version " + "{field_metadata_version}, not {metadata_version}", + ) + exceptions.append(exc) + continue + getattr(ins, key) + except InvalidMetadata as exc: + exceptions.append(exc) + + if exceptions: + raise ExceptionGroup("invalid metadata", exceptions) + + return ins + + @classmethod + def from_email( + cls, data: Union[bytes, str], *, validate: bool = True + ) -> "Metadata": + """Parse metadata from email headers. + + If *validate* is true, the metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + raw, unparsed = parse_email(data) + + if validate: + exceptions: list[Exception] = [] + for unparsed_key in unparsed: + if unparsed_key in _EMAIL_TO_RAW_MAPPING: + message = f"{unparsed_key!r} has invalid data" + else: + message = f"unrecognized field: {unparsed_key!r}" + exceptions.append(InvalidMetadata(unparsed_key, message)) + + if exceptions: + raise ExceptionGroup("unparsed", exceptions) + + try: + return cls.from_raw(raw, validate=validate) + except ExceptionGroup as exc_group: + raise ExceptionGroup( + "invalid or unparsed metadata", exc_group.exceptions + ) from None + + metadata_version: _Validator[_MetadataVersion] = _Validator() + """:external:ref:`core-metadata-metadata-version` + (required; validated to be a valid metadata version)""" + name: _Validator[str] = _Validator() + """:external:ref:`core-metadata-name` + (required; validated using :func:`~packaging.utils.canonicalize_name` and its + *validate* parameter)""" + version: _Validator[version_module.Version] = _Validator() + """:external:ref:`core-metadata-version` (required)""" + dynamic: _Validator[Optional[List[str]]] = _Validator( + added="2.2", + ) + """:external:ref:`core-metadata-dynamic` + (validated against core metadata field names and lowercased)""" + platforms: _Validator[Optional[List[str]]] = _Validator() + """:external:ref:`core-metadata-platform`""" + supported_platforms: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """:external:ref:`core-metadata-supported-platform`""" + summary: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" + description: _Validator[Optional[str]] = _Validator() # TODO 2.1: can be in body + """:external:ref:`core-metadata-description`""" + description_content_type: _Validator[Optional[str]] = _Validator(added="2.1") + """:external:ref:`core-metadata-description-content-type` (validated)""" + keywords: _Validator[Optional[List[str]]] = _Validator() + """:external:ref:`core-metadata-keywords`""" + home_page: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-home-page`""" + download_url: _Validator[Optional[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-download-url`""" + author: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-author`""" + author_email: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-author-email`""" + maintainer: _Validator[Optional[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer`""" + maintainer_email: _Validator[Optional[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer-email`""" + license: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-license`""" + classifiers: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """:external:ref:`core-metadata-classifier`""" + requires_dist: _Validator[Optional[List[requirements.Requirement]]] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-dist`""" + requires_python: _Validator[Optional[specifiers.SpecifierSet]] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-python`""" + # Because `Requires-External` allows for non-PEP 440 version specifiers, we + # don't do any processing on the values. + requires_external: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-external`""" + project_urls: _Validator[Optional[Dict[str, str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-project-url`""" + # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation + # regardless of metadata version. + provides_extra: _Validator[Optional[List[utils.NormalizedName]]] = _Validator( + added="2.1", + ) + """:external:ref:`core-metadata-provides-extra`""" + provides_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-provides-dist`""" + obsoletes_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-obsoletes-dist`""" + requires: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Requires`` (deprecated)""" + provides: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Provides`` (deprecated)""" + obsoletes: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Obsoletes`` (deprecated)""" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/requirements.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/requirements.py new file mode 100644 index 0000000000000000000000000000000000000000..bdc43a7e98d87dba0c2069bfb4554f71d228cad4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/requirements.py @@ -0,0 +1,90 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +from typing import Any, Iterator, Optional, Set + +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet +from .utils import canonicalize_name + + +class InvalidRequirement(ValueError): + """ + An invalid requirement was found, users should refer to PEP 508. + """ + + +class Requirement: + """Parse a requirement. + + Parse a given requirement string into its parts, such as name, specifier, + URL, and extras. Raises InvalidRequirement on a badly-formed requirement + string. + """ + + # TODO: Can we test whether something is contained within a requirement? + # If so how do we do that? Do we need to test against the _name_ of + # the thing as well as the version? What about the markers? + # TODO: Can we normalize the name and extra name? + + def __init__(self, requirement_string: str) -> None: + try: + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e + + self.name: str = parsed.name + self.url: Optional[str] = parsed.url or None + self.extras: Set[str] = set(parsed.extras or []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) + + def _iter_parts(self, name: str) -> Iterator[str]: + yield name + + if self.extras: + formatted_extras = ",".join(sorted(self.extras)) + yield f"[{formatted_extras}]" + + if self.specifier: + yield str(self.specifier) + + if self.url: + yield f"@ {self.url}" + if self.marker: + yield " " + + if self.marker: + yield f"; {self.marker}" + + def __str__(self) -> str: + return "".join(self._iter_parts(self.name)) + + def __repr__(self) -> str: + return f"" + + def __hash__(self) -> int: + return hash( + ( + self.__class__.__name__, + *self._iter_parts(canonicalize_name(self.name)), + ) + ) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + canonicalize_name(self.name) == canonicalize_name(other.name) + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/specifiers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/specifiers.py new file mode 100644 index 0000000000000000000000000000000000000000..2d015bab5958fd9767cf5c9e449f2fa33292c962 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/specifiers.py @@ -0,0 +1,1017 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +""" +.. testsetup:: + + from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from packaging.version import Version +""" + +import abc +import itertools +import re +from typing import Callable, Iterable, Iterator, List, Optional, Tuple, TypeVar, Union + +from .utils import canonicalize_version +from .version import Version + +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] + + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version + + +class InvalidSpecifier(ValueError): + """ + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' + """ + + +class BaseSpecifier(metaclass=abc.ABCMeta): + @abc.abstractmethod + def __str__(self) -> str: + """ + Returns the str representation of this Specifier-like object. This + should be representative of the Specifier itself. + """ + + @abc.abstractmethod + def __hash__(self) -> int: + """ + Returns a hash value for this Specifier-like object. + """ + + @abc.abstractmethod + def __eq__(self, other: object) -> bool: + """ + Returns a boolean representing whether or not the two Specifier-like + objects are equal. + + :param other: The other object to check against. + """ + + @property + @abc.abstractmethod + def prereleases(self) -> Optional[bool]: + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. + """ + + @prereleases.setter + def prereleases(self, value: bool) -> None: + """Setter for :attr:`prereleases`. + + :param value: The value to set. + """ + + @abc.abstractmethod + def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: + """ + Determines if the given item is contained within this specifier. + """ + + @abc.abstractmethod + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """ + Takes an iterable of items and filters them so that only items which + are contained within this specifier are allowed in it. + """ + + +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. + + .. tip:: + + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ + + _operator_regex_str = r""" + (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" + (?P + (?: + # The identity operators allow for an escape hatch that will + # do an exact string match of the version you wish to install. + # This will not be parsed by PEP 440 and we cannot determine + # any semantic meaning from it. This operator is discouraged + # but included entirely as an escape hatch. + (?<====) # Only match for the identity operator + \s* + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. + ) + | + (?: + # The (non)equality operators allow for wild card and local + # versions to be specified so we have to define these two + # operators separately to enable that. + (?<===|!=) # Only match for equals and not equals + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)* # release + + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. + (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local + )? + ) + | + (?: + # The compatible operator requires at least two digits in the + # release segment. + (?<=~=) # Only match for the compatible operator + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + ) + | + (?: + # All other operators only allow a sub set of what the + # (non)equality operators do. Specifically they do not allow + # local versions to be specified nor do they allow the prefix + # matching wild cards. + (?=": "greater_than_equal", + "<": "less_than", + ">": "greater_than", + "===": "arbitrary", + } + + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: + + # Compatible releases have an equivalent combination of >= and ==. That + # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to + # implement this in terms of the other specifiers instead of + # implementing it ourselves. The only thing we need to do is construct + # the other specifiers. + + # We want everything but the last item in the version, but we want to + # ignore suffix segments. + prefix = _version_join( + list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] + ) + + # Add the prefix notation to the end of our string + prefix += ".*" + + return self._get_operator(">=")(prospective, spec) and self._get_operator("==")( + prospective, prefix + ) + + def _compare_equal(self, prospective: Version, spec: str) -> bool: + + # We need special logic to handle prefix matching + if spec.endswith(".*"): + # In the case of prefix matching we want to ignore local segment. + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) + # Split the spec out by bangs and dots, and pretend that there is + # an implicit dot in between a release segment and a pre-release segment. + split_spec = _version_split(normalized_spec) + + # Split the prospective version out by bangs and dots, and pretend + # that there is an implicit dot in between a release segment and + # a pre-release segment. + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) + + # Shorten the prospective version to be the same length as the spec + # so that we can determine if the specifier is a prefix of the + # prospective version or not. + shortened_prospective = padded_prospective[: len(split_spec)] + + return shortened_prospective == split_spec + else: + # Convert our spec string into a Version + spec_version = Version(spec) + + # If the specifier does not have a local segment, then we want to + # act as if the prospective version also does not have a local + # segment. + if not spec_version.local: + prospective = Version(prospective.public) + + return prospective == spec_version + + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: + return not self._compare_equal(prospective, spec) + + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) <= Version(spec) + + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) >= Version(spec) + + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is less than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective < spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a pre-release version, that we do not accept pre-release + # versions for the version mentioned in the specifier (e.g. <3.1 should + # not match 3.1.dev0, but should match 3.0.dev0). + if not spec.is_prerelease and prospective.is_prerelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # less than the spec version *and* it's not a pre-release of the same + # version in the spec. + return True + + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is greater than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective > spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a post-release version, that we do not accept + # post-release versions for the version mentioned in the specifier + # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0). + if not spec.is_postrelease and prospective.is_postrelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # Ensure that we do not allow a local version of the version mentioned + # in the specifier, which is technically greater than, to match. + if prospective.local is not None: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # greater than the spec version *and* it's not a pre-release of the + # same version in the spec. + return True + + def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: + return str(prospective).lower() == str(spec).lower() + + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) + + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ + + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version + + +_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") + + +def _version_split(version: str) -> List[str]: + """Split version into components. + + The split components are intended for version comparison. The logic does + not attempt to retain the original version string, so joining the + components back with :func:`_version_join` may not produce the original + version string. + """ + result: List[str] = [] + + epoch, _, rest = version.rpartition("!") + result.append(epoch or "0") + + for item in rest.split("."): + match = _prefix_regex.search(item) + if match: + result.extend(match.groups()) + else: + result.append(item) + return result + + +def _version_join(components: List[str]) -> str: + """Join split version components into a version string. + + This function assumes the input came from :func:`_version_split`, where the + first component must be the epoch (either empty or numeric), and all other + components numeric. + """ + epoch, *rest = components + return f"{epoch}!{'.'.join(rest)}" + + +def _is_not_suffix(segment: str) -> bool: + return not any( + segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") + ) + + +def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]: + left_split, right_split = [], [] + + # Get the release segment of our versions + left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left))) + right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right))) + + # Get the rest of our versions + left_split.append(left[len(left_split[0]) :]) + right_split.append(right[len(right_split[0]) :]) + + # Insert our padding + left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0]))) + right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0]))) + + return ( + list(itertools.chain.from_iterable(left_split)), + list(itertools.chain.from_iterable(right_split)), + ) + + +class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + + def __init__( + self, specifiers: str = "", prereleases: Optional[bool] = None + ) -> None: + """Initialize a SpecifierSet instance. + + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ + + # Split on `,` to break each individual specifier into it's own item, and + # strip each item to remove leading/trailing whitespace. + split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] + + # Make each individual specifier a Specifier and save in a frozen set for later. + self._specs = frozenset(map(Specifier, split_specifiers)) + + # Store our prereleases value so we can use it later to determine if + # we accept prereleases or not. + self._prereleases = prereleases + + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"" + + def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ + return ",".join(sorted(str(s) for s in self._specs)) + + def __hash__(self) -> int: + return hash(self._specs) + + def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ + if isinstance(other, str): + other = SpecifierSet(other) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + specifier = SpecifierSet() + specifier._specs = frozenset(self._specs | other._specs) + + if self._prereleases is None and other._prereleases is not None: + specifier._prereleases = other._prereleases + elif self._prereleases is not None and other._prereleases is None: + specifier._prereleases = self._prereleases + elif self._prereleases == other._prereleases: + specifier._prereleases = self._prereleases + else: + raise ValueError( + "Cannot combine SpecifierSets with True and False prerelease " + "overrides." + ) + + return specifier + + def __eq__(self, other: object) -> bool: + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): + other = SpecifierSet(str(other)) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + return self._specs == other._specs + + def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" + return len(self._specs) + + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. + + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ + return iter(self._specs) + + def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ + return self.contains(item) + + def contains( + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, + ) -> bool: + """Return whether or not the item is contained in this SpecifierSet. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) + + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # We can determine if we're going to allow pre-releases by looking to + # see if any of the underlying items supports them. If none of them do + # and this item is a pre-release then we do not allow it and we can + # short circuit that here. + # Note: This means that 1.0.dev1 would not be contained in something + # like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0 + if not prereleases and item.is_prerelease: + return False + + if installed and item.is_prerelease: + item = Version(item.base_version) + + # We simply dispatch to the underlying specs here to make sure that the + # given version is contained within all of them. + # Note: This use of all() here means that an empty set of specifiers + # will always return True, this is an explicit design decision. + return all(s.contains(item, prereleases=prereleases) for s in self._specs) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # If we have any specifiers, then we want to wrap our iterable in the + # filter method for each one, this will act as a logical AND amongst + # each specifier. + if self._specs: + for spec in self._specs: + iterable = spec.filter(iterable, prereleases=bool(prereleases)) + return iter(iterable) + # If we do not have any specifiers, then we need to have a rough filter + # which will filter out any pre-releases, unless there are no final + # releases. + else: + filtered: List[UnparsedVersionVar] = [] + found_prereleases: List[UnparsedVersionVar] = [] + + for item in iterable: + parsed_version = _coerce_version(item) + + # Store any item which is a pre-release for later unless we've + # already found a final version or we are accepting prereleases + if parsed_version.is_prerelease and not prereleases: + if not filtered: + found_prereleases.append(item) + else: + filtered.append(item) + + # If we've found no items except for pre-releases, then we'll go + # ahead and use the pre-releases + if not filtered and found_prereleases and prereleases is None: + return iter(found_prereleases) + + return iter(filtered) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/tags.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/tags.py new file mode 100644 index 0000000000000000000000000000000000000000..89f1926137dd2d2a6bd63616bf5b9f722fc8d584 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/tags.py @@ -0,0 +1,571 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import logging +import platform +import re +import struct +import subprocess +import sys +import sysconfig +from importlib.machinery import EXTENSION_SUFFIXES +from typing import ( + Dict, + FrozenSet, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from . import _manylinux, _musllinux + +logger = logging.getLogger(__name__) + +PythonVersion = Sequence[int] +MacVersion = Tuple[int, int] + +INTERPRETER_SHORT_NAMES: Dict[str, str] = { + "python": "py", # Generic. + "cpython": "cp", + "pypy": "pp", + "ironpython": "ip", + "jython": "jy", +} + + +_32_BIT_INTERPRETER = struct.calcsize("P") == 4 + + +class Tag: + """ + A representation of the tag triple for a wheel. + + Instances are considered immutable and thus are hashable. Equality checking + is also supported. + """ + + __slots__ = ["_interpreter", "_abi", "_platform", "_hash"] + + def __init__(self, interpreter: str, abi: str, platform: str) -> None: + self._interpreter = interpreter.lower() + self._abi = abi.lower() + self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) + + @property + def interpreter(self) -> str: + return self._interpreter + + @property + def abi(self) -> str: + return self._abi + + @property + def platform(self) -> str: + return self._platform + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Tag): + return NotImplemented + + return ( + (self._hash == other._hash) # Short-circuit ASAP for perf reasons. + and (self._platform == other._platform) + and (self._abi == other._abi) + and (self._interpreter == other._interpreter) + ) + + def __hash__(self) -> int: + return self._hash + + def __str__(self) -> str: + return f"{self._interpreter}-{self._abi}-{self._platform}" + + def __repr__(self) -> str: + return f"<{self} @ {id(self)}>" + + +def parse_tag(tag: str) -> FrozenSet[Tag]: + """ + Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. + + Returning a set is required due to the possibility that the tag is a + compressed tag set. + """ + tags = set() + interpreters, abis, platforms = tag.split("-") + for interpreter in interpreters.split("."): + for abi in abis.split("."): + for platform_ in platforms.split("."): + tags.add(Tag(interpreter, abi, platform_)) + return frozenset(tags) + + +def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: + value: Union[int, str, None] = sysconfig.get_config_var(name) + if value is None and warn: + logger.debug( + "Config variable '%s' is unset, Python ABI tag may be incorrect", name + ) + return value + + +def _normalize_string(string: str) -> str: + return string.replace(".", "_").replace("-", "_").replace(" ", "_") + + +def _is_threaded_cpython(abis: List[str]) -> bool: + """ + Determine if the ABI corresponds to a threaded (`--disable-gil`) build. + + The threaded builds are indicated by a "t" in the abiflags. + """ + if len(abis) == 0: + return False + # expect e.g., cp313 + m = re.match(r"cp\d+(.*)", abis[0]) + if not m: + return False + abiflags = m.group(1) + return "t" in abiflags + + +def _abi3_applies(python_version: PythonVersion, threading: bool) -> bool: + """ + Determine if the Python version supports abi3. + + PEP 384 was first implemented in Python 3.2. The threaded (`--disable-gil`) + builds do not support abi3. + """ + return len(python_version) > 1 and tuple(python_version) >= (3, 2) and not threading + + +def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: + py_version = tuple(py_version) # To allow for version comparison. + abis = [] + version = _version_nodot(py_version[:2]) + threading = debug = pymalloc = ucs4 = "" + with_debug = _get_config_var("Py_DEBUG", warn) + has_refcount = hasattr(sys, "gettotalrefcount") + # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled + # extension modules is the best option. + # https://github.com/pypa/pip/issues/3383#issuecomment-173267692 + has_ext = "_d.pyd" in EXTENSION_SUFFIXES + if with_debug or (with_debug is None and (has_refcount or has_ext)): + debug = "d" + if py_version >= (3, 13) and _get_config_var("Py_GIL_DISABLED", warn): + threading = "t" + if py_version < (3, 8): + with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) + if with_pymalloc or with_pymalloc is None: + pymalloc = "m" + if py_version < (3, 3): + unicode_size = _get_config_var("Py_UNICODE_SIZE", warn) + if unicode_size == 4 or ( + unicode_size is None and sys.maxunicode == 0x10FFFF + ): + ucs4 = "u" + elif debug: + # Debug builds can also load "normal" extension modules. + # We can also assume no UCS-4 or pymalloc requirement. + abis.append(f"cp{version}{threading}") + abis.insert(0, f"cp{version}{threading}{debug}{pymalloc}{ucs4}") + return abis + + +def cpython_tags( + python_version: Optional[PythonVersion] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a CPython interpreter. + + The tags consist of: + - cp-- + - cp-abi3- + - cp-none- + - cp-abi3- # Older Python versions down to 3.2. + + If python_version only specifies a major version then user-provided ABIs and + the 'none' ABItag will be used. + + If 'abi3' or 'none' are specified in 'abis' then they will be yielded at + their normal position and not at the beginning. + """ + if not python_version: + python_version = sys.version_info[:2] + + interpreter = f"cp{_version_nodot(python_version[:2])}" + + if abis is None: + if len(python_version) > 1: + abis = _cpython_abis(python_version, warn) + else: + abis = [] + abis = list(abis) + # 'abi3' and 'none' are explicitly handled later. + for explicit_abi in ("abi3", "none"): + try: + abis.remove(explicit_abi) + except ValueError: + pass + + platforms = list(platforms or platform_tags()) + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + threading = _is_threaded_cpython(abis) + use_abi3 = _abi3_applies(python_version, threading) + if use_abi3: + yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) + yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) + + if use_abi3: + for minor_version in range(python_version[1] - 1, 1, -1): + for platform_ in platforms: + interpreter = "cp{version}".format( + version=_version_nodot((python_version[0], minor_version)) + ) + yield Tag(interpreter, "abi3", platform_) + + +def _generic_abi() -> List[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] + + +def generic_tags( + interpreter: Optional[str] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a generic interpreter. + + The tags consist of: + - -- + + The "none" ABI will be added if it was not explicitly provided. + """ + if not interpreter: + interp_name = interpreter_name() + interp_version = interpreter_version(warn=warn) + interpreter = "".join([interp_name, interp_version]) + if abis is None: + abis = _generic_abi() + else: + abis = list(abis) + platforms = list(platforms or platform_tags()) + if "none" not in abis: + abis.append("none") + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + +def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: + """ + Yields Python versions in descending order. + + After the latest version, the major-only version will be yielded, and then + all previous versions of that major version. + """ + if len(py_version) > 1: + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" + if len(py_version) > 1: + for minor in range(py_version[1] - 1, -1, -1): + yield f"py{_version_nodot((py_version[0], minor))}" + + +def compatible_tags( + python_version: Optional[PythonVersion] = None, + interpreter: Optional[str] = None, + platforms: Optional[Iterable[str]] = None, +) -> Iterator[Tag]: + """ + Yields the sequence of tags that are compatible with a specific version of Python. + + The tags consist of: + - py*-none- + - -none-any # ... if `interpreter` is provided. + - py*-none-any + """ + if not python_version: + python_version = sys.version_info[:2] + platforms = list(platforms or platform_tags()) + for version in _py_interpreter_range(python_version): + for platform_ in platforms: + yield Tag(version, "none", platform_) + if interpreter: + yield Tag(interpreter, "none", "any") + for version in _py_interpreter_range(python_version): + yield Tag(version, "none", "any") + + +def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str: + if not is_32bit: + return arch + + if arch.startswith("ppc"): + return "ppc" + + return "i386" + + +def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]: + formats = [cpu_arch] + if cpu_arch == "x86_64": + if version < (10, 4): + return [] + formats.extend(["intel", "fat64", "fat32"]) + + elif cpu_arch == "i386": + if version < (10, 4): + return [] + formats.extend(["intel", "fat32", "fat"]) + + elif cpu_arch == "ppc64": + # TODO: Need to care about 32-bit PPC for ppc64 through 10.2? + if version > (10, 5) or version < (10, 4): + return [] + formats.append("fat64") + + elif cpu_arch == "ppc": + if version > (10, 6): + return [] + formats.extend(["fat32", "fat"]) + + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + + return formats + + +def mac_platforms( + version: Optional[MacVersion] = None, arch: Optional[str] = None +) -> Iterator[str]: + """ + Yields the platform tags for a macOS system. + + The `version` parameter is a two-item tuple specifying the macOS version to + generate platform tags for. The `arch` parameter is the CPU architecture to + generate platform tags for. Both parameters default to the appropriate value + for the current system. + """ + version_str, _, cpu_arch = platform.mac_ver() + if version is None: + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + text=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + else: + version = version + if arch is None: + arch = _mac_arch(cpu_arch) + else: + arch = arch + + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + for minor_version in range(version[1], -1, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=10, minor=minor_version, binary_format=binary_format + ) + + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + for major_version in range(version[0], 10, -1): + compat_version = major_version, 0 + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=major_version, minor=0, binary_format=binary_format + ) + + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + else: + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_format = "universal2" + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + + +def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: + linux = _normalize_string(sysconfig.get_platform()) + if not linux.startswith("linux_"): + # we should never be here, just yield the sysconfig one and return + yield linux + return + if is_32bit: + if linux == "linux_x86_64": + linux = "linux_i686" + elif linux == "linux_aarch64": + linux = "linux_armv8l" + _, arch = linux.split("_", 1) + archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch]) + yield from _manylinux.platform_tags(archs) + yield from _musllinux.platform_tags(archs) + for arch in archs: + yield f"linux_{arch}" + + +def _generic_platforms() -> Iterator[str]: + yield _normalize_string(sysconfig.get_platform()) + + +def platform_tags() -> Iterator[str]: + """ + Provides the platform tags for this installation. + """ + if platform.system() == "Darwin": + return mac_platforms() + elif platform.system() == "Linux": + return _linux_platforms() + else: + return _generic_platforms() + + +def interpreter_name() -> str: + """ + Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. + """ + name = sys.implementation.name + return INTERPRETER_SHORT_NAMES.get(name) or name + + +def interpreter_version(*, warn: bool = False) -> str: + """ + Returns the version of the running interpreter. + """ + version = _get_config_var("py_version_nodot", warn=warn) + if version: + version = str(version) + else: + version = _version_nodot(sys.version_info[:2]) + return version + + +def _version_nodot(version: PythonVersion) -> str: + return "".join(map(str, version)) + + +def sys_tags(*, warn: bool = False) -> Iterator[Tag]: + """ + Returns the sequence of tag triples for the running interpreter. + + The order of the sequence corresponds to priority order for the + interpreter, from most to least important. + """ + + interp_name = interpreter_name() + if interp_name == "cp": + yield from cpython_tags(warn=warn) + else: + yield from generic_tags() + + if interp_name == "pp": + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) + else: + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c2c2f75aa806282d322c76c2117c0f0fdfb09d25 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/utils.py @@ -0,0 +1,172 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import re +from typing import FrozenSet, NewType, Tuple, Union, cast + +from .tags import Tag, parse_tag +from .version import InvalidVersion, Version + +BuildTag = Union[Tuple[()], Tuple[int, str]] +NormalizedName = NewType("NormalizedName", str) + + +class InvalidName(ValueError): + """ + An invalid distribution name; users should refer to the packaging user guide. + """ + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ + + +# Core metadata spec for `Name` +_validate_regex = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE +) +_canonicalize_regex = re.compile(r"[-_.]+") +_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") + + +def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName: + if validate and not _validate_regex.match(name): + raise InvalidName(f"name is invalid: {name!r}") + # This is taken from PEP 503. + value = _canonicalize_regex.sub("-", name).lower() + return cast(NormalizedName, value) + + +def is_normalized_name(name: str) -> bool: + return _normalized_regex.match(name) is not None + + +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: + """ + This is very similar to Version.__str__, but has one subtle difference + with the way it handles the release segment. + """ + if isinstance(version, str): + try: + parsed = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + else: + parsed = version + + parts = [] + + # Epoch + if parsed.epoch != 0: + parts.append(f"{parsed.epoch}!") + + # Release segment + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) + + # Pre-release + if parsed.pre is not None: + parts.append("".join(str(x) for x in parsed.pre)) + + # Post-release + if parsed.post is not None: + parts.append(f".post{parsed.post}") + + # Development release + if parsed.dev is not None: + parts.append(f".dev{parsed.dev}") + + # Local version segment + if parsed.local is not None: + parts.append(f"+{parsed.local}") + + return "".join(parts) + + +def parse_wheel_filename( + filename: str, +) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]: + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + f"Invalid wheel filename (extension must be '.whl'): {filename}" + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + f"Invalid wheel filename (wrong number of parts): {filename}" + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name. + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename(f"Invalid project name: {filename}") + name = canonicalize_name(name_part) + + try: + version = Version(parts[1]) + except InvalidVersion as e: + raise InvalidWheelFilename( + f"Invalid wheel filename (invalid version): {filename}" + ) from e + + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + f"Invalid build number: {build_part} in '{filename}'" + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: + if filename.endswith(".tar.gz"): + file_stem = filename[: -len(".tar.gz")] + elif filename.endswith(".zip"): + file_stem = filename[: -len(".zip")] + else: + raise InvalidSdistFilename( + f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):" + f" {filename}" + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = file_stem.rpartition("-") + if not sep: + raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") + + name = canonicalize_name(name_part) + + try: + version = Version(version_part) + except InvalidVersion as e: + raise InvalidSdistFilename( + f"Invalid sdist filename (invalid version): {filename}" + ) from e + + return (name, version) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/version.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/version.py new file mode 100644 index 0000000000000000000000000000000000000000..5faab9bd0dcf28847960162b2b4f13a8a556ef20 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/packaging/version.py @@ -0,0 +1,563 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +""" +.. testsetup:: + + from packaging.version import parse, Version +""" + +import itertools +import re +from typing import Any, Callable, NamedTuple, Optional, SupportsInt, Tuple, Union + +from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType + +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] + +LocalType = Tuple[Union[int, str], ...] + +CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]] +CmpLocalType = Union[ + NegativeInfinityType, + Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...], +] +CmpKey = Tuple[ + int, + Tuple[int, ...], + CmpPrePostDevType, + CmpPrePostDevType, + CmpPrePostDevType, + CmpLocalType, +] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] + + +class _Version(NamedTuple): + epoch: int + release: Tuple[int, ...] + dev: Optional[Tuple[str, int]] + pre: Optional[Tuple[str, int]] + post: Optional[Tuple[str, int]] + local: Optional[LocalType] + + +def parse(version: str) -> "Version": + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. + """ + return Version(version) + + +class InvalidVersion(ValueError): + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' + """ + + +class _BaseVersion: + _key: Tuple[Any, ...] + + def __hash__(self) -> int: + return hash(self._key) + + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key < other._key + + def __le__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key + + +# Deliberately not anchored to the start and end of the string, to make it +# easier for 3rd party code to reuse +_VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?Palpha|a|beta|b|preview|pre|c|rc)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+VERSION_PATTERN = _VERSION_PATTERN
+"""
+A string containing the regular expression used to match a valid version.
+
+The pattern is not anchored at either end, and is intended for embedding in larger
+expressions (for example, matching a version number as part of a file name). The
+regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
+flags set.
+
+:meta hide-value:
+"""
+
+
+class Version(_BaseVersion):
+    """This class abstracts handling of a project's versions.
+
+    A :class:`Version` instance is comparison aware and can be compared and
+    sorted using the standard Python interfaces.
+
+    >>> v1 = Version("1.0a5")
+    >>> v2 = Version("1.0")
+    >>> v1
+    
+    >>> v2
+    
+    >>> v1 < v2
+    True
+    >>> v1 == v2
+    False
+    >>> v1 > v2
+    False
+    >>> v1 >= v2
+    False
+    >>> v1 <= v2
+    True
+    """
+
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
+
+    def __init__(self, version: str) -> None:
+        """Initialize a Version object.
+
+        :param version:
+            The string representation of a version which will be parsed and normalized
+            before use.
+        :raises InvalidVersion:
+            If the ``version`` does not conform to PEP 440 in any way then this
+            exception will be raised.
+        """
+
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion(f"Invalid version: '{version}'")
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self) -> str:
+        """A representation of the Version that shows all internal state.
+
+        >>> Version('1.0.0')
+        
+        """
+        return f""
+
+    def __str__(self) -> str:
+        """A string representation of the version that can be rounded-tripped.
+
+        >>> str(Version("1.0a5"))
+        '1.0a5'
+        """
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join(str(x) for x in self.pre))
+
+        # Post-release
+        if self.post is not None:
+            parts.append(f".post{self.post}")
+
+        # Development release
+        if self.dev is not None:
+            parts.append(f".dev{self.dev}")
+
+        # Local version segment
+        if self.local is not None:
+            parts.append(f"+{self.local}")
+
+        return "".join(parts)
+
+    @property
+    def epoch(self) -> int:
+        """The epoch of the version.
+
+        >>> Version("2.0.0").epoch
+        0
+        >>> Version("1!2.0.0").epoch
+        1
+        """
+        return self._version.epoch
+
+    @property
+    def release(self) -> Tuple[int, ...]:
+        """The components of the "release" segment of the version.
+
+        >>> Version("1.2.3").release
+        (1, 2, 3)
+        >>> Version("2.0.0").release
+        (2, 0, 0)
+        >>> Version("1!2.0.0.post0").release
+        (2, 0, 0)
+
+        Includes trailing zeroes but not the epoch or any pre-release / development /
+        post-release suffixes.
+        """
+        return self._version.release
+
+    @property
+    def pre(self) -> Optional[Tuple[str, int]]:
+        """The pre-release segment of the version.
+
+        >>> print(Version("1.2.3").pre)
+        None
+        >>> Version("1.2.3a1").pre
+        ('a', 1)
+        >>> Version("1.2.3b1").pre
+        ('b', 1)
+        >>> Version("1.2.3rc1").pre
+        ('rc', 1)
+        """
+        return self._version.pre
+
+    @property
+    def post(self) -> Optional[int]:
+        """The post-release number of the version.
+
+        >>> print(Version("1.2.3").post)
+        None
+        >>> Version("1.2.3.post1").post
+        1
+        """
+        return self._version.post[1] if self._version.post else None
+
+    @property
+    def dev(self) -> Optional[int]:
+        """The development number of the version.
+
+        >>> print(Version("1.2.3").dev)
+        None
+        >>> Version("1.2.3.dev1").dev
+        1
+        """
+        return self._version.dev[1] if self._version.dev else None
+
+    @property
+    def local(self) -> Optional[str]:
+        """The local version segment of the version.
+
+        >>> print(Version("1.2.3").local)
+        None
+        >>> Version("1.2.3+abc").local
+        'abc'
+        """
+        if self._version.local:
+            return ".".join(str(x) for x in self._version.local)
+        else:
+            return None
+
+    @property
+    def public(self) -> str:
+        """The public portion of the version.
+
+        >>> Version("1.2.3").public
+        '1.2.3'
+        >>> Version("1.2.3+abc").public
+        '1.2.3'
+        >>> Version("1.2.3+abc.dev1").public
+        '1.2.3'
+        """
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self) -> str:
+        """The "base version" of the version.
+
+        >>> Version("1.2.3").base_version
+        '1.2.3'
+        >>> Version("1.2.3+abc").base_version
+        '1.2.3'
+        >>> Version("1!1.2.3+abc.dev1").base_version
+        '1!1.2.3'
+
+        The "base version" is the public version of the project without any pre or post
+        release markers.
+        """
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        return "".join(parts)
+
+    @property
+    def is_prerelease(self) -> bool:
+        """Whether this version is a pre-release.
+
+        >>> Version("1.2.3").is_prerelease
+        False
+        >>> Version("1.2.3a1").is_prerelease
+        True
+        >>> Version("1.2.3b1").is_prerelease
+        True
+        >>> Version("1.2.3rc1").is_prerelease
+        True
+        >>> Version("1.2.3dev1").is_prerelease
+        True
+        """
+        return self.dev is not None or self.pre is not None
+
+    @property
+    def is_postrelease(self) -> bool:
+        """Whether this version is a post-release.
+
+        >>> Version("1.2.3").is_postrelease
+        False
+        >>> Version("1.2.3.post1").is_postrelease
+        True
+        """
+        return self.post is not None
+
+    @property
+    def is_devrelease(self) -> bool:
+        """Whether this version is a development release.
+
+        >>> Version("1.2.3").is_devrelease
+        False
+        >>> Version("1.2.3.dev1").is_devrelease
+        True
+        """
+        return self.dev is not None
+
+    @property
+    def major(self) -> int:
+        """The first item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").major
+        1
+        """
+        return self.release[0] if len(self.release) >= 1 else 0
+
+    @property
+    def minor(self) -> int:
+        """The second item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").minor
+        2
+        >>> Version("1").minor
+        0
+        """
+        return self.release[1] if len(self.release) >= 2 else 0
+
+    @property
+    def micro(self) -> int:
+        """The third item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").micro
+        3
+        >>> Version("1").micro
+        0
+        """
+        return self.release[2] if len(self.release) >= 3 else 0
+
+
+def _parse_letter_version(
+    letter: Optional[str], number: Union[str, bytes, SupportsInt, None]
+) -> Optional[Tuple[str, int]]:
+
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+    return None
+
+
+_local_version_separators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local: Optional[str]) -> Optional[LocalType]:
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+
+
+def _cmpkey(
+    epoch: int,
+    release: Tuple[int, ...],
+    pre: Optional[Tuple[str, int]],
+    post: Optional[Tuple[str, int]],
+    dev: Optional[Tuple[str, int]],
+    local: Optional[LocalType],
+) -> CmpKey:
+
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre: CmpPrePostDevType = NegativeInfinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post: CmpPrePostDevType = NegativeInfinity
+
+    else:
+        _post = post
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev: CmpPrePostDevType = Infinity
+
+    else:
+        _dev = dev
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local: CmpLocalType = NegativeInfinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
+        )
+
+    return epoch, _release, _pre, _post, _dev, _local
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c6ec97ec6961bcf184b6e0b2437b9924db0b9de
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
+# Licensed to PSF under a Contributor Agreement.
+
+__all__ = ("loads", "load", "TOMLDecodeError")
+__version__ = "2.0.1"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
+
+from ._parser import TOMLDecodeError, load, loads
+
+# Pretend this exception was created here.
+TOMLDecodeError.__module__ = __name__
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..482c6e96ec3623590b7a0dfafa2045bc212b6d9e
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_parser.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_parser.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c04ea912e6730469968eccb99925787add019cfa
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_parser.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_re.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_re.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a50ae21937ff564ecfe39f7787ba8cce43cda713
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_re.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_types.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_types.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2c50ab1a466ada7ea90b932e51ae6849f834c4fd
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/__pycache__/_types.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_parser.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1bb0aa19a556725aa2ae2b8cea95489c99a9078
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_parser.py
@@ -0,0 +1,691 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
+# Licensed to PSF under a Contributor Agreement.
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+import string
+from types import MappingProxyType
+from typing import Any, BinaryIO, NamedTuple
+
+from ._re import (
+    RE_DATETIME,
+    RE_LOCALTIME,
+    RE_NUMBER,
+    match_to_datetime,
+    match_to_localtime,
+    match_to_number,
+)
+from ._types import Key, ParseFloat, Pos
+
+ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
+
+# Neither of these sets include quotation mark or backslash. They are
+# currently handled as separate cases in the parser functions.
+ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t")
+ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n")
+
+ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
+ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
+
+ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
+
+TOML_WS = frozenset(" \t")
+TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n")
+BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_")
+KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
+HEXDIGIT_CHARS = frozenset(string.hexdigits)
+
+BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
+    {
+        "\\b": "\u0008",  # backspace
+        "\\t": "\u0009",  # tab
+        "\\n": "\u000A",  # linefeed
+        "\\f": "\u000C",  # form feed
+        "\\r": "\u000D",  # carriage return
+        '\\"': "\u0022",  # quote
+        "\\\\": "\u005C",  # backslash
+    }
+)
+
+
+class TOMLDecodeError(ValueError):
+    """An error raised if a document is not valid TOML."""
+
+
+def load(__fp: BinaryIO, *, parse_float: ParseFloat = float) -> dict[str, Any]:
+    """Parse TOML from a binary file object."""
+    b = __fp.read()
+    try:
+        s = b.decode()
+    except AttributeError:
+        raise TypeError(
+            "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
+        ) from None
+    return loads(s, parse_float=parse_float)
+
+
+def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]:  # noqa: C901
+    """Parse TOML from a string."""
+
+    # The spec allows converting "\r\n" to "\n", even in string
+    # literals. Let's do so to simplify parsing.
+    src = __s.replace("\r\n", "\n")
+    pos = 0
+    out = Output(NestedDict(), Flags())
+    header: Key = ()
+    parse_float = make_safe_parse_float(parse_float)
+
+    # Parse one statement at a time
+    # (typically means one line in TOML source)
+    while True:
+        # 1. Skip line leading whitespace
+        pos = skip_chars(src, pos, TOML_WS)
+
+        # 2. Parse rules. Expect one of the following:
+        #    - end of file
+        #    - end of line
+        #    - comment
+        #    - key/value pair
+        #    - append dict to list (and move to its namespace)
+        #    - create dict (and move to its namespace)
+        # Skip trailing whitespace when applicable.
+        try:
+            char = src[pos]
+        except IndexError:
+            break
+        if char == "\n":
+            pos += 1
+            continue
+        if char in KEY_INITIAL_CHARS:
+            pos = key_value_rule(src, pos, out, header, parse_float)
+            pos = skip_chars(src, pos, TOML_WS)
+        elif char == "[":
+            try:
+                second_char: str | None = src[pos + 1]
+            except IndexError:
+                second_char = None
+            out.flags.finalize_pending()
+            if second_char == "[":
+                pos, header = create_list_rule(src, pos, out)
+            else:
+                pos, header = create_dict_rule(src, pos, out)
+            pos = skip_chars(src, pos, TOML_WS)
+        elif char != "#":
+            raise suffixed_err(src, pos, "Invalid statement")
+
+        # 3. Skip comment
+        pos = skip_comment(src, pos)
+
+        # 4. Expect end of line or end of file
+        try:
+            char = src[pos]
+        except IndexError:
+            break
+        if char != "\n":
+            raise suffixed_err(
+                src, pos, "Expected newline or end of document after a statement"
+            )
+        pos += 1
+
+    return out.data.dict
+
+
+class Flags:
+    """Flags that map to parsed keys/namespaces."""
+
+    # Marks an immutable namespace (inline array or inline table).
+    FROZEN = 0
+    # Marks a nest that has been explicitly created and can no longer
+    # be opened using the "[table]" syntax.
+    EXPLICIT_NEST = 1
+
+    def __init__(self) -> None:
+        self._flags: dict[str, dict] = {}
+        self._pending_flags: set[tuple[Key, int]] = set()
+
+    def add_pending(self, key: Key, flag: int) -> None:
+        self._pending_flags.add((key, flag))
+
+    def finalize_pending(self) -> None:
+        for key, flag in self._pending_flags:
+            self.set(key, flag, recursive=False)
+        self._pending_flags.clear()
+
+    def unset_all(self, key: Key) -> None:
+        cont = self._flags
+        for k in key[:-1]:
+            if k not in cont:
+                return
+            cont = cont[k]["nested"]
+        cont.pop(key[-1], None)
+
+    def set(self, key: Key, flag: int, *, recursive: bool) -> None:  # noqa: A003
+        cont = self._flags
+        key_parent, key_stem = key[:-1], key[-1]
+        for k in key_parent:
+            if k not in cont:
+                cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
+            cont = cont[k]["nested"]
+        if key_stem not in cont:
+            cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
+        cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
+
+    def is_(self, key: Key, flag: int) -> bool:
+        if not key:
+            return False  # document root has no flags
+        cont = self._flags
+        for k in key[:-1]:
+            if k not in cont:
+                return False
+            inner_cont = cont[k]
+            if flag in inner_cont["recursive_flags"]:
+                return True
+            cont = inner_cont["nested"]
+        key_stem = key[-1]
+        if key_stem in cont:
+            cont = cont[key_stem]
+            return flag in cont["flags"] or flag in cont["recursive_flags"]
+        return False
+
+
+class NestedDict:
+    def __init__(self) -> None:
+        # The parsed content of the TOML document
+        self.dict: dict[str, Any] = {}
+
+    def get_or_create_nest(
+        self,
+        key: Key,
+        *,
+        access_lists: bool = True,
+    ) -> dict:
+        cont: Any = self.dict
+        for k in key:
+            if k not in cont:
+                cont[k] = {}
+            cont = cont[k]
+            if access_lists and isinstance(cont, list):
+                cont = cont[-1]
+            if not isinstance(cont, dict):
+                raise KeyError("There is no nest behind this key")
+        return cont
+
+    def append_nest_to_list(self, key: Key) -> None:
+        cont = self.get_or_create_nest(key[:-1])
+        last_key = key[-1]
+        if last_key in cont:
+            list_ = cont[last_key]
+            if not isinstance(list_, list):
+                raise KeyError("An object other than list found behind this key")
+            list_.append({})
+        else:
+            cont[last_key] = [{}]
+
+
+class Output(NamedTuple):
+    data: NestedDict
+    flags: Flags
+
+
+def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
+    try:
+        while src[pos] in chars:
+            pos += 1
+    except IndexError:
+        pass
+    return pos
+
+
+def skip_until(
+    src: str,
+    pos: Pos,
+    expect: str,
+    *,
+    error_on: frozenset[str],
+    error_on_eof: bool,
+) -> Pos:
+    try:
+        new_pos = src.index(expect, pos)
+    except ValueError:
+        new_pos = len(src)
+        if error_on_eof:
+            raise suffixed_err(src, new_pos, f"Expected {expect!r}") from None
+
+    if not error_on.isdisjoint(src[pos:new_pos]):
+        while src[pos] not in error_on:
+            pos += 1
+        raise suffixed_err(src, pos, f"Found invalid character {src[pos]!r}")
+    return new_pos
+
+
+def skip_comment(src: str, pos: Pos) -> Pos:
+    try:
+        char: str | None = src[pos]
+    except IndexError:
+        char = None
+    if char == "#":
+        return skip_until(
+            src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
+        )
+    return pos
+
+
+def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
+    while True:
+        pos_before_skip = pos
+        pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
+        pos = skip_comment(src, pos)
+        if pos == pos_before_skip:
+            return pos
+
+
+def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
+    pos += 1  # Skip "["
+    pos = skip_chars(src, pos, TOML_WS)
+    pos, key = parse_key(src, pos)
+
+    if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
+        raise suffixed_err(src, pos, f"Cannot declare {key} twice")
+    out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
+    try:
+        out.data.get_or_create_nest(key)
+    except KeyError:
+        raise suffixed_err(src, pos, "Cannot overwrite a value") from None
+
+    if not src.startswith("]", pos):
+        raise suffixed_err(src, pos, "Expected ']' at the end of a table declaration")
+    return pos + 1, key
+
+
+def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
+    pos += 2  # Skip "[["
+    pos = skip_chars(src, pos, TOML_WS)
+    pos, key = parse_key(src, pos)
+
+    if out.flags.is_(key, Flags.FROZEN):
+        raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}")
+    # Free the namespace now that it points to another empty list item...
+    out.flags.unset_all(key)
+    # ...but this key precisely is still prohibited from table declaration
+    out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
+    try:
+        out.data.append_nest_to_list(key)
+    except KeyError:
+        raise suffixed_err(src, pos, "Cannot overwrite a value") from None
+
+    if not src.startswith("]]", pos):
+        raise suffixed_err(src, pos, "Expected ']]' at the end of an array declaration")
+    return pos + 2, key
+
+
+def key_value_rule(
+    src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
+) -> Pos:
+    pos, key, value = parse_key_value_pair(src, pos, parse_float)
+    key_parent, key_stem = key[:-1], key[-1]
+    abs_key_parent = header + key_parent
+
+    relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
+    for cont_key in relative_path_cont_keys:
+        # Check that dotted key syntax does not redefine an existing table
+        if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
+            raise suffixed_err(src, pos, f"Cannot redefine namespace {cont_key}")
+        # Containers in the relative path can't be opened with the table syntax or
+        # dotted key/value syntax in following table sections.
+        out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
+
+    if out.flags.is_(abs_key_parent, Flags.FROZEN):
+        raise suffixed_err(
+            src, pos, f"Cannot mutate immutable namespace {abs_key_parent}"
+        )
+
+    try:
+        nest = out.data.get_or_create_nest(abs_key_parent)
+    except KeyError:
+        raise suffixed_err(src, pos, "Cannot overwrite a value") from None
+    if key_stem in nest:
+        raise suffixed_err(src, pos, "Cannot overwrite a value")
+    # Mark inline table and array namespaces recursively immutable
+    if isinstance(value, (dict, list)):
+        out.flags.set(header + key, Flags.FROZEN, recursive=True)
+    nest[key_stem] = value
+    return pos
+
+
+def parse_key_value_pair(
+    src: str, pos: Pos, parse_float: ParseFloat
+) -> tuple[Pos, Key, Any]:
+    pos, key = parse_key(src, pos)
+    try:
+        char: str | None = src[pos]
+    except IndexError:
+        char = None
+    if char != "=":
+        raise suffixed_err(src, pos, "Expected '=' after a key in a key/value pair")
+    pos += 1
+    pos = skip_chars(src, pos, TOML_WS)
+    pos, value = parse_value(src, pos, parse_float)
+    return pos, key, value
+
+
+def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
+    pos, key_part = parse_key_part(src, pos)
+    key: Key = (key_part,)
+    pos = skip_chars(src, pos, TOML_WS)
+    while True:
+        try:
+            char: str | None = src[pos]
+        except IndexError:
+            char = None
+        if char != ".":
+            return pos, key
+        pos += 1
+        pos = skip_chars(src, pos, TOML_WS)
+        pos, key_part = parse_key_part(src, pos)
+        key += (key_part,)
+        pos = skip_chars(src, pos, TOML_WS)
+
+
+def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
+    try:
+        char: str | None = src[pos]
+    except IndexError:
+        char = None
+    if char in BARE_KEY_CHARS:
+        start_pos = pos
+        pos = skip_chars(src, pos, BARE_KEY_CHARS)
+        return pos, src[start_pos:pos]
+    if char == "'":
+        return parse_literal_str(src, pos)
+    if char == '"':
+        return parse_one_line_basic_str(src, pos)
+    raise suffixed_err(src, pos, "Invalid initial character for a key part")
+
+
+def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
+    pos += 1
+    return parse_basic_str(src, pos, multiline=False)
+
+
+def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]:
+    pos += 1
+    array: list = []
+
+    pos = skip_comments_and_array_ws(src, pos)
+    if src.startswith("]", pos):
+        return pos + 1, array
+    while True:
+        pos, val = parse_value(src, pos, parse_float)
+        array.append(val)
+        pos = skip_comments_and_array_ws(src, pos)
+
+        c = src[pos : pos + 1]
+        if c == "]":
+            return pos + 1, array
+        if c != ",":
+            raise suffixed_err(src, pos, "Unclosed array")
+        pos += 1
+
+        pos = skip_comments_and_array_ws(src, pos)
+        if src.startswith("]", pos):
+            return pos + 1, array
+
+
+def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]:
+    pos += 1
+    nested_dict = NestedDict()
+    flags = Flags()
+
+    pos = skip_chars(src, pos, TOML_WS)
+    if src.startswith("}", pos):
+        return pos + 1, nested_dict.dict
+    while True:
+        pos, key, value = parse_key_value_pair(src, pos, parse_float)
+        key_parent, key_stem = key[:-1], key[-1]
+        if flags.is_(key, Flags.FROZEN):
+            raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}")
+        try:
+            nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
+        except KeyError:
+            raise suffixed_err(src, pos, "Cannot overwrite a value") from None
+        if key_stem in nest:
+            raise suffixed_err(src, pos, f"Duplicate inline table key {key_stem!r}")
+        nest[key_stem] = value
+        pos = skip_chars(src, pos, TOML_WS)
+        c = src[pos : pos + 1]
+        if c == "}":
+            return pos + 1, nested_dict.dict
+        if c != ",":
+            raise suffixed_err(src, pos, "Unclosed inline table")
+        if isinstance(value, (dict, list)):
+            flags.set(key, Flags.FROZEN, recursive=True)
+        pos += 1
+        pos = skip_chars(src, pos, TOML_WS)
+
+
+def parse_basic_str_escape(
+    src: str, pos: Pos, *, multiline: bool = False
+) -> tuple[Pos, str]:
+    escape_id = src[pos : pos + 2]
+    pos += 2
+    if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
+        # Skip whitespace until next non-whitespace character or end of
+        # the doc. Error if non-whitespace is found before newline.
+        if escape_id != "\\\n":
+            pos = skip_chars(src, pos, TOML_WS)
+            try:
+                char = src[pos]
+            except IndexError:
+                return pos, ""
+            if char != "\n":
+                raise suffixed_err(src, pos, "Unescaped '\\' in a string")
+            pos += 1
+        pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
+        return pos, ""
+    if escape_id == "\\u":
+        return parse_hex_char(src, pos, 4)
+    if escape_id == "\\U":
+        return parse_hex_char(src, pos, 8)
+    try:
+        return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
+    except KeyError:
+        raise suffixed_err(src, pos, "Unescaped '\\' in a string") from None
+
+
+def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
+    return parse_basic_str_escape(src, pos, multiline=True)
+
+
+def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
+    hex_str = src[pos : pos + hex_len]
+    if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
+        raise suffixed_err(src, pos, "Invalid hex value")
+    pos += hex_len
+    hex_int = int(hex_str, 16)
+    if not is_unicode_scalar_value(hex_int):
+        raise suffixed_err(src, pos, "Escaped character is not a Unicode scalar value")
+    return pos, chr(hex_int)
+
+
+def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
+    pos += 1  # Skip starting apostrophe
+    start_pos = pos
+    pos = skip_until(
+        src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
+    )
+    return pos + 1, src[start_pos:pos]  # Skip ending apostrophe
+
+
+def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
+    pos += 3
+    if src.startswith("\n", pos):
+        pos += 1
+
+    if literal:
+        delim = "'"
+        end_pos = skip_until(
+            src,
+            pos,
+            "'''",
+            error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
+            error_on_eof=True,
+        )
+        result = src[pos:end_pos]
+        pos = end_pos + 3
+    else:
+        delim = '"'
+        pos, result = parse_basic_str(src, pos, multiline=True)
+
+    # Add at maximum two extra apostrophes/quotes if the end sequence
+    # is 4 or 5 chars long instead of just 3.
+    if not src.startswith(delim, pos):
+        return pos, result
+    pos += 1
+    if not src.startswith(delim, pos):
+        return pos, result + delim
+    pos += 1
+    return pos, result + (delim * 2)
+
+
+def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
+    if multiline:
+        error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
+        parse_escapes = parse_basic_str_escape_multiline
+    else:
+        error_on = ILLEGAL_BASIC_STR_CHARS
+        parse_escapes = parse_basic_str_escape
+    result = ""
+    start_pos = pos
+    while True:
+        try:
+            char = src[pos]
+        except IndexError:
+            raise suffixed_err(src, pos, "Unterminated string") from None
+        if char == '"':
+            if not multiline:
+                return pos + 1, result + src[start_pos:pos]
+            if src.startswith('"""', pos):
+                return pos + 3, result + src[start_pos:pos]
+            pos += 1
+            continue
+        if char == "\\":
+            result += src[start_pos:pos]
+            pos, parsed_escape = parse_escapes(src, pos)
+            result += parsed_escape
+            start_pos = pos
+            continue
+        if char in error_on:
+            raise suffixed_err(src, pos, f"Illegal character {char!r}")
+        pos += 1
+
+
+def parse_value(  # noqa: C901
+    src: str, pos: Pos, parse_float: ParseFloat
+) -> tuple[Pos, Any]:
+    try:
+        char: str | None = src[pos]
+    except IndexError:
+        char = None
+
+    # IMPORTANT: order conditions based on speed of checking and likelihood
+
+    # Basic strings
+    if char == '"':
+        if src.startswith('"""', pos):
+            return parse_multiline_str(src, pos, literal=False)
+        return parse_one_line_basic_str(src, pos)
+
+    # Literal strings
+    if char == "'":
+        if src.startswith("'''", pos):
+            return parse_multiline_str(src, pos, literal=True)
+        return parse_literal_str(src, pos)
+
+    # Booleans
+    if char == "t":
+        if src.startswith("true", pos):
+            return pos + 4, True
+    if char == "f":
+        if src.startswith("false", pos):
+            return pos + 5, False
+
+    # Arrays
+    if char == "[":
+        return parse_array(src, pos, parse_float)
+
+    # Inline tables
+    if char == "{":
+        return parse_inline_table(src, pos, parse_float)
+
+    # Dates and times
+    datetime_match = RE_DATETIME.match(src, pos)
+    if datetime_match:
+        try:
+            datetime_obj = match_to_datetime(datetime_match)
+        except ValueError as e:
+            raise suffixed_err(src, pos, "Invalid date or datetime") from e
+        return datetime_match.end(), datetime_obj
+    localtime_match = RE_LOCALTIME.match(src, pos)
+    if localtime_match:
+        return localtime_match.end(), match_to_localtime(localtime_match)
+
+    # Integers and "normal" floats.
+    # The regex will greedily match any type starting with a decimal
+    # char, so needs to be located after handling of dates and times.
+    number_match = RE_NUMBER.match(src, pos)
+    if number_match:
+        return number_match.end(), match_to_number(number_match, parse_float)
+
+    # Special floats
+    first_three = src[pos : pos + 3]
+    if first_three in {"inf", "nan"}:
+        return pos + 3, parse_float(first_three)
+    first_four = src[pos : pos + 4]
+    if first_four in {"-inf", "+inf", "-nan", "+nan"}:
+        return pos + 4, parse_float(first_four)
+
+    raise suffixed_err(src, pos, "Invalid value")
+
+
+def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError:
+    """Return a `TOMLDecodeError` where error message is suffixed with
+    coordinates in source."""
+
+    def coord_repr(src: str, pos: Pos) -> str:
+        if pos >= len(src):
+            return "end of document"
+        line = src.count("\n", 0, pos) + 1
+        if line == 1:
+            column = pos + 1
+        else:
+            column = pos - src.rindex("\n", 0, pos)
+        return f"line {line}, column {column}"
+
+    return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})")
+
+
+def is_unicode_scalar_value(codepoint: int) -> bool:
+    return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
+
+
+def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
+    """A decorator to make `parse_float` safe.
+
+    `parse_float` must not return dicts or lists, because these types
+    would be mixed with parsed TOML tables and arrays, thus confusing
+    the parser. The returned decorated callable raises `ValueError`
+    instead of returning illegal types.
+    """
+    # The default `float` callable never returns illegal types. Optimize it.
+    if parse_float is float:  # type: ignore[comparison-overlap]
+        return float
+
+    def safe_parse_float(float_str: str) -> Any:
+        float_value = parse_float(float_str)
+        if isinstance(float_value, (dict, list)):
+            raise ValueError("parse_float must not return dicts or lists")
+        return float_value
+
+    return safe_parse_float
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_re.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_re.py
new file mode 100644
index 0000000000000000000000000000000000000000..994bb7493fd92865e6ab87c277ba5741b44c31a9
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_re.py
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
+# Licensed to PSF under a Contributor Agreement.
+
+from __future__ import annotations
+
+from datetime import date, datetime, time, timedelta, timezone, tzinfo
+from functools import lru_cache
+import re
+from typing import Any
+
+from ._types import ParseFloat
+
+# E.g.
+# - 00:32:00.999999
+# - 00:32:00
+_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?"
+
+RE_NUMBER = re.compile(
+    r"""
+0
+(?:
+    x[0-9A-Fa-f](?:_?[0-9A-Fa-f])*   # hex
+    |
+    b[01](?:_?[01])*                 # bin
+    |
+    o[0-7](?:_?[0-7])*               # oct
+)
+|
+[+-]?(?:0|[1-9](?:_?[0-9])*)         # dec, integer part
+(?P
+    (?:\.[0-9](?:_?[0-9])*)?         # optional fractional part
+    (?:[eE][+-]?[0-9](?:_?[0-9])*)?  # optional exponent part
+)
+""",
+    flags=re.VERBOSE,
+)
+RE_LOCALTIME = re.compile(_TIME_RE_STR)
+RE_DATETIME = re.compile(
+    rf"""
+([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])  # date, e.g. 1988-10-27
+(?:
+    [Tt ]
+    {_TIME_RE_STR}
+    (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))?  # optional time offset
+)?
+""",
+    flags=re.VERBOSE,
+)
+
+
+def match_to_datetime(match: re.Match) -> datetime | date:
+    """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
+
+    Raises ValueError if the match does not correspond to a valid date
+    or datetime.
+    """
+    (
+        year_str,
+        month_str,
+        day_str,
+        hour_str,
+        minute_str,
+        sec_str,
+        micros_str,
+        zulu_time,
+        offset_sign_str,
+        offset_hour_str,
+        offset_minute_str,
+    ) = match.groups()
+    year, month, day = int(year_str), int(month_str), int(day_str)
+    if hour_str is None:
+        return date(year, month, day)
+    hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
+    micros = int(micros_str.ljust(6, "0")) if micros_str else 0
+    if offset_sign_str:
+        tz: tzinfo | None = cached_tz(
+            offset_hour_str, offset_minute_str, offset_sign_str
+        )
+    elif zulu_time:
+        tz = timezone.utc
+    else:  # local date-time
+        tz = None
+    return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
+
+
+@lru_cache(maxsize=None)
+def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone:
+    sign = 1 if sign_str == "+" else -1
+    return timezone(
+        timedelta(
+            hours=sign * int(hour_str),
+            minutes=sign * int(minute_str),
+        )
+    )
+
+
+def match_to_localtime(match: re.Match) -> time:
+    hour_str, minute_str, sec_str, micros_str = match.groups()
+    micros = int(micros_str.ljust(6, "0")) if micros_str else 0
+    return time(int(hour_str), int(minute_str), int(sec_str), micros)
+
+
+def match_to_number(match: re.Match, parse_float: ParseFloat) -> Any:
+    if match.group("floatpart"):
+        return parse_float(match.group())
+    return int(match.group(), 0)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_types.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_types.py
new file mode 100644
index 0000000000000000000000000000000000000000..d949412e03b29d70592c7721fe747e5085c2e280
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/_types.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
+# Licensed to PSF under a Contributor Agreement.
+
+from typing import Any, Callable, Tuple
+
+# Type annotations
+ParseFloat = Callable[[str], Any]
+Key = Tuple[str, ...]
+Pos = int
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/py.typed
new file mode 100644
index 0000000000000000000000000000000000000000..7632ecf77545c5e5501cb3fc5719df0761104ca2
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/tomli/py.typed
@@ -0,0 +1 @@
+# Marker file for PEP 561
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a773bbbcd7dc57395e09571b14084272c1c7bd71
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__init__.py
@@ -0,0 +1,3 @@
+from __future__ import annotations
+
+__version__ = "0.43.0"
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..39064fbb36d4c63bd41c84e83cd7b16f289e833a
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/macosx_libfile.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/macosx_libfile.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7467240fb964a3fe489ed4e0fd425d6856a5801a
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/macosx_libfile.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/metadata.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/metadata.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9becb61eededc6cc113cbf7b88a2b025e055826c
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/metadata.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/util.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..39f0f4b9424af43ec361bbec0000e1cb3a25e3ce
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/util.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/wheelfile.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/wheelfile.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f327e3a2286bd6619567245fb544a0bae15254f1
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/__pycache__/wheelfile.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/macosx_libfile.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/macosx_libfile.py
new file mode 100644
index 0000000000000000000000000000000000000000..8953c3f8051909dca648b9afef210f5763aa55f9
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/macosx_libfile.py
@@ -0,0 +1,469 @@
+"""
+This module contains function to analyse dynamic library
+headers to extract system information
+
+Currently only for MacOSX
+
+Library file on macosx system starts with Mach-O or Fat field.
+This can be distinguish by first 32 bites and it is called magic number.
+Proper value of magic number is with suffix _MAGIC. Suffix _CIGAM means
+reversed bytes order.
+Both fields can occur in two types: 32 and 64 bytes.
+
+FAT field inform that this library contains few version of library
+(typically for different types version). It contains
+information where Mach-O headers starts.
+
+Each section started with Mach-O header contains one library
+(So if file starts with this field it contains only one version).
+
+After filed Mach-O there are section fields.
+Each of them starts with two fields:
+cmd - magic number for this command
+cmdsize - total size occupied by this section information.
+
+In this case only sections LC_VERSION_MIN_MACOSX (for macosx 10.13 and earlier)
+and LC_BUILD_VERSION (for macosx 10.14 and newer) are interesting,
+because them contains information about minimal system version.
+
+Important remarks:
+- For fat files this implementation looks for maximum number version.
+  It not check if it is 32 or 64 and do not compare it with currently built package.
+  So it is possible to false report higher version that needed.
+- All structures signatures are taken form macosx header files.
+- I think that binary format will be more stable than `otool` output.
+  and if apple introduce some changes both implementation will need to be updated.
+- The system compile will set the deployment target no lower than
+  11.0 for arm64 builds. For "Universal 2" builds use the x86_64 deployment
+  target when the arm64 target is 11.0.
+"""
+
+from __future__ import annotations
+
+import ctypes
+import os
+import sys
+
+"""here the needed const and struct from mach-o header files"""
+
+FAT_MAGIC = 0xCAFEBABE
+FAT_CIGAM = 0xBEBAFECA
+FAT_MAGIC_64 = 0xCAFEBABF
+FAT_CIGAM_64 = 0xBFBAFECA
+MH_MAGIC = 0xFEEDFACE
+MH_CIGAM = 0xCEFAEDFE
+MH_MAGIC_64 = 0xFEEDFACF
+MH_CIGAM_64 = 0xCFFAEDFE
+
+LC_VERSION_MIN_MACOSX = 0x24
+LC_BUILD_VERSION = 0x32
+
+CPU_TYPE_ARM64 = 0x0100000C
+
+mach_header_fields = [
+    ("magic", ctypes.c_uint32),
+    ("cputype", ctypes.c_int),
+    ("cpusubtype", ctypes.c_int),
+    ("filetype", ctypes.c_uint32),
+    ("ncmds", ctypes.c_uint32),
+    ("sizeofcmds", ctypes.c_uint32),
+    ("flags", ctypes.c_uint32),
+]
+"""
+struct mach_header {
+    uint32_t	magic;		/* mach magic number identifier */
+    cpu_type_t	cputype;	/* cpu specifier */
+    cpu_subtype_t	cpusubtype;	/* machine specifier */
+    uint32_t	filetype;	/* type of file */
+    uint32_t	ncmds;		/* number of load commands */
+    uint32_t	sizeofcmds;	/* the size of all the load commands */
+    uint32_t	flags;		/* flags */
+};
+typedef integer_t cpu_type_t;
+typedef integer_t cpu_subtype_t;
+"""
+
+mach_header_fields_64 = mach_header_fields + [("reserved", ctypes.c_uint32)]
+"""
+struct mach_header_64 {
+    uint32_t	magic;		/* mach magic number identifier */
+    cpu_type_t	cputype;	/* cpu specifier */
+    cpu_subtype_t	cpusubtype;	/* machine specifier */
+    uint32_t	filetype;	/* type of file */
+    uint32_t	ncmds;		/* number of load commands */
+    uint32_t	sizeofcmds;	/* the size of all the load commands */
+    uint32_t	flags;		/* flags */
+    uint32_t	reserved;	/* reserved */
+};
+"""
+
+fat_header_fields = [("magic", ctypes.c_uint32), ("nfat_arch", ctypes.c_uint32)]
+"""
+struct fat_header {
+    uint32_t	magic;		/* FAT_MAGIC or FAT_MAGIC_64 */
+    uint32_t	nfat_arch;	/* number of structs that follow */
+};
+"""
+
+fat_arch_fields = [
+    ("cputype", ctypes.c_int),
+    ("cpusubtype", ctypes.c_int),
+    ("offset", ctypes.c_uint32),
+    ("size", ctypes.c_uint32),
+    ("align", ctypes.c_uint32),
+]
+"""
+struct fat_arch {
+    cpu_type_t	cputype;	/* cpu specifier (int) */
+    cpu_subtype_t	cpusubtype;	/* machine specifier (int) */
+    uint32_t	offset;		/* file offset to this object file */
+    uint32_t	size;		/* size of this object file */
+    uint32_t	align;		/* alignment as a power of 2 */
+};
+"""
+
+fat_arch_64_fields = [
+    ("cputype", ctypes.c_int),
+    ("cpusubtype", ctypes.c_int),
+    ("offset", ctypes.c_uint64),
+    ("size", ctypes.c_uint64),
+    ("align", ctypes.c_uint32),
+    ("reserved", ctypes.c_uint32),
+]
+"""
+struct fat_arch_64 {
+    cpu_type_t	cputype;	/* cpu specifier (int) */
+    cpu_subtype_t	cpusubtype;	/* machine specifier (int) */
+    uint64_t	offset;		/* file offset to this object file */
+    uint64_t	size;		/* size of this object file */
+    uint32_t	align;		/* alignment as a power of 2 */
+    uint32_t	reserved;	/* reserved */
+};
+"""
+
+segment_base_fields = [("cmd", ctypes.c_uint32), ("cmdsize", ctypes.c_uint32)]
+"""base for reading segment info"""
+
+segment_command_fields = [
+    ("cmd", ctypes.c_uint32),
+    ("cmdsize", ctypes.c_uint32),
+    ("segname", ctypes.c_char * 16),
+    ("vmaddr", ctypes.c_uint32),
+    ("vmsize", ctypes.c_uint32),
+    ("fileoff", ctypes.c_uint32),
+    ("filesize", ctypes.c_uint32),
+    ("maxprot", ctypes.c_int),
+    ("initprot", ctypes.c_int),
+    ("nsects", ctypes.c_uint32),
+    ("flags", ctypes.c_uint32),
+]
+"""
+struct segment_command { /* for 32-bit architectures */
+    uint32_t	cmd;		/* LC_SEGMENT */
+    uint32_t	cmdsize;	/* includes sizeof section structs */
+    char		segname[16];	/* segment name */
+    uint32_t	vmaddr;		/* memory address of this segment */
+    uint32_t	vmsize;		/* memory size of this segment */
+    uint32_t	fileoff;	/* file offset of this segment */
+    uint32_t	filesize;	/* amount to map from the file */
+    vm_prot_t	maxprot;	/* maximum VM protection */
+    vm_prot_t	initprot;	/* initial VM protection */
+    uint32_t	nsects;		/* number of sections in segment */
+    uint32_t	flags;		/* flags */
+};
+typedef int vm_prot_t;
+"""
+
+segment_command_fields_64 = [
+    ("cmd", ctypes.c_uint32),
+    ("cmdsize", ctypes.c_uint32),
+    ("segname", ctypes.c_char * 16),
+    ("vmaddr", ctypes.c_uint64),
+    ("vmsize", ctypes.c_uint64),
+    ("fileoff", ctypes.c_uint64),
+    ("filesize", ctypes.c_uint64),
+    ("maxprot", ctypes.c_int),
+    ("initprot", ctypes.c_int),
+    ("nsects", ctypes.c_uint32),
+    ("flags", ctypes.c_uint32),
+]
+"""
+struct segment_command_64 { /* for 64-bit architectures */
+    uint32_t	cmd;		/* LC_SEGMENT_64 */
+    uint32_t	cmdsize;	/* includes sizeof section_64 structs */
+    char		segname[16];	/* segment name */
+    uint64_t	vmaddr;		/* memory address of this segment */
+    uint64_t	vmsize;		/* memory size of this segment */
+    uint64_t	fileoff;	/* file offset of this segment */
+    uint64_t	filesize;	/* amount to map from the file */
+    vm_prot_t	maxprot;	/* maximum VM protection */
+    vm_prot_t	initprot;	/* initial VM protection */
+    uint32_t	nsects;		/* number of sections in segment */
+    uint32_t	flags;		/* flags */
+};
+"""
+
+version_min_command_fields = segment_base_fields + [
+    ("version", ctypes.c_uint32),
+    ("sdk", ctypes.c_uint32),
+]
+"""
+struct version_min_command {
+    uint32_t	cmd;		/* LC_VERSION_MIN_MACOSX or
+                               LC_VERSION_MIN_IPHONEOS or
+                               LC_VERSION_MIN_WATCHOS or
+                               LC_VERSION_MIN_TVOS */
+    uint32_t	cmdsize;	/* sizeof(struct min_version_command) */
+    uint32_t	version;	/* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+    uint32_t	sdk;		/* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+};
+"""
+
+build_version_command_fields = segment_base_fields + [
+    ("platform", ctypes.c_uint32),
+    ("minos", ctypes.c_uint32),
+    ("sdk", ctypes.c_uint32),
+    ("ntools", ctypes.c_uint32),
+]
+"""
+struct build_version_command {
+    uint32_t	cmd;		/* LC_BUILD_VERSION */
+    uint32_t	cmdsize;	/* sizeof(struct build_version_command) plus */
+                                /* ntools * sizeof(struct build_tool_version) */
+    uint32_t	platform;	/* platform */
+    uint32_t	minos;		/* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+    uint32_t	sdk;		/* X.Y.Z is encoded in nibbles xxxx.yy.zz */
+    uint32_t	ntools;		/* number of tool entries following this */
+};
+"""
+
+
+def swap32(x):
+    return (
+        ((x << 24) & 0xFF000000)
+        | ((x << 8) & 0x00FF0000)
+        | ((x >> 8) & 0x0000FF00)
+        | ((x >> 24) & 0x000000FF)
+    )
+
+
+def get_base_class_and_magic_number(lib_file, seek=None):
+    if seek is None:
+        seek = lib_file.tell()
+    else:
+        lib_file.seek(seek)
+    magic_number = ctypes.c_uint32.from_buffer_copy(
+        lib_file.read(ctypes.sizeof(ctypes.c_uint32))
+    ).value
+
+    # Handle wrong byte order
+    if magic_number in [FAT_CIGAM, FAT_CIGAM_64, MH_CIGAM, MH_CIGAM_64]:
+        if sys.byteorder == "little":
+            BaseClass = ctypes.BigEndianStructure
+        else:
+            BaseClass = ctypes.LittleEndianStructure
+
+        magic_number = swap32(magic_number)
+    else:
+        BaseClass = ctypes.Structure
+
+    lib_file.seek(seek)
+    return BaseClass, magic_number
+
+
+def read_data(struct_class, lib_file):
+    return struct_class.from_buffer_copy(lib_file.read(ctypes.sizeof(struct_class)))
+
+
+def extract_macosx_min_system_version(path_to_lib):
+    with open(path_to_lib, "rb") as lib_file:
+        BaseClass, magic_number = get_base_class_and_magic_number(lib_file, 0)
+        if magic_number not in [FAT_MAGIC, FAT_MAGIC_64, MH_MAGIC, MH_MAGIC_64]:
+            return
+
+        if magic_number in [FAT_MAGIC, FAT_CIGAM_64]:
+
+            class FatHeader(BaseClass):
+                _fields_ = fat_header_fields
+
+            fat_header = read_data(FatHeader, lib_file)
+            if magic_number == FAT_MAGIC:
+
+                class FatArch(BaseClass):
+                    _fields_ = fat_arch_fields
+
+            else:
+
+                class FatArch(BaseClass):
+                    _fields_ = fat_arch_64_fields
+
+            fat_arch_list = [
+                read_data(FatArch, lib_file) for _ in range(fat_header.nfat_arch)
+            ]
+
+            versions_list = []
+            for el in fat_arch_list:
+                try:
+                    version = read_mach_header(lib_file, el.offset)
+                    if version is not None:
+                        if el.cputype == CPU_TYPE_ARM64 and len(fat_arch_list) != 1:
+                            # Xcode will not set the deployment target below 11.0.0
+                            # for the arm64 architecture. Ignore the arm64 deployment
+                            # in fat binaries when the target is 11.0.0, that way
+                            # the other architectures can select a lower deployment
+                            # target.
+                            # This is safe because there is no arm64 variant for
+                            # macOS 10.15 or earlier.
+                            if version == (11, 0, 0):
+                                continue
+                        versions_list.append(version)
+                except ValueError:
+                    pass
+
+            if len(versions_list) > 0:
+                return max(versions_list)
+            else:
+                return None
+
+        else:
+            try:
+                return read_mach_header(lib_file, 0)
+            except ValueError:
+                """when some error during read library files"""
+                return None
+
+
+def read_mach_header(lib_file, seek=None):
+    """
+    This function parses a Mach-O header and extracts
+    information about the minimal macOS version.
+
+    :param lib_file: reference to opened library file with pointer
+    """
+    base_class, magic_number = get_base_class_and_magic_number(lib_file, seek)
+    arch = "32" if magic_number == MH_MAGIC else "64"
+
+    class SegmentBase(base_class):
+        _fields_ = segment_base_fields
+
+    if arch == "32":
+
+        class MachHeader(base_class):
+            _fields_ = mach_header_fields
+
+    else:
+
+        class MachHeader(base_class):
+            _fields_ = mach_header_fields_64
+
+    mach_header = read_data(MachHeader, lib_file)
+    for _i in range(mach_header.ncmds):
+        pos = lib_file.tell()
+        segment_base = read_data(SegmentBase, lib_file)
+        lib_file.seek(pos)
+        if segment_base.cmd == LC_VERSION_MIN_MACOSX:
+
+            class VersionMinCommand(base_class):
+                _fields_ = version_min_command_fields
+
+            version_info = read_data(VersionMinCommand, lib_file)
+            return parse_version(version_info.version)
+        elif segment_base.cmd == LC_BUILD_VERSION:
+
+            class VersionBuild(base_class):
+                _fields_ = build_version_command_fields
+
+            version_info = read_data(VersionBuild, lib_file)
+            return parse_version(version_info.minos)
+        else:
+            lib_file.seek(pos + segment_base.cmdsize)
+            continue
+
+
+def parse_version(version):
+    x = (version & 0xFFFF0000) >> 16
+    y = (version & 0x0000FF00) >> 8
+    z = version & 0x000000FF
+    return x, y, z
+
+
+def calculate_macosx_platform_tag(archive_root, platform_tag):
+    """
+    Calculate proper macosx platform tag basing on files which are included to wheel
+
+    Example platform tag `macosx-10.14-x86_64`
+    """
+    prefix, base_version, suffix = platform_tag.split("-")
+    base_version = tuple(int(x) for x in base_version.split("."))
+    base_version = base_version[:2]
+    if base_version[0] > 10:
+        base_version = (base_version[0], 0)
+    assert len(base_version) == 2
+    if "MACOSX_DEPLOYMENT_TARGET" in os.environ:
+        deploy_target = tuple(
+            int(x) for x in os.environ["MACOSX_DEPLOYMENT_TARGET"].split(".")
+        )
+        deploy_target = deploy_target[:2]
+        if deploy_target[0] > 10:
+            deploy_target = (deploy_target[0], 0)
+        if deploy_target < base_version:
+            sys.stderr.write(
+                "[WARNING] MACOSX_DEPLOYMENT_TARGET is set to a lower value ({}) than "
+                "the version on which the Python interpreter was compiled ({}), and "
+                "will be ignored.\n".format(
+                    ".".join(str(x) for x in deploy_target),
+                    ".".join(str(x) for x in base_version),
+                )
+            )
+        else:
+            base_version = deploy_target
+
+    assert len(base_version) == 2
+    start_version = base_version
+    versions_dict = {}
+    for dirpath, _dirnames, filenames in os.walk(archive_root):
+        for filename in filenames:
+            if filename.endswith(".dylib") or filename.endswith(".so"):
+                lib_path = os.path.join(dirpath, filename)
+                min_ver = extract_macosx_min_system_version(lib_path)
+                if min_ver is not None:
+                    min_ver = min_ver[0:2]
+                    if min_ver[0] > 10:
+                        min_ver = (min_ver[0], 0)
+                    versions_dict[lib_path] = min_ver
+
+    if len(versions_dict) > 0:
+        base_version = max(base_version, max(versions_dict.values()))
+
+    # macosx platform tag do not support minor bugfix release
+    fin_base_version = "_".join([str(x) for x in base_version])
+    if start_version < base_version:
+        problematic_files = [k for k, v in versions_dict.items() if v > start_version]
+        problematic_files = "\n".join(problematic_files)
+        if len(problematic_files) == 1:
+            files_form = "this file"
+        else:
+            files_form = "these files"
+        error_message = (
+            "[WARNING] This wheel needs a higher macOS version than {}  "
+            "To silence this warning, set MACOSX_DEPLOYMENT_TARGET to at least "
+            + fin_base_version
+            + " or recreate "
+            + files_form
+            + " with lower "
+            "MACOSX_DEPLOYMENT_TARGET:  \n" + problematic_files
+        )
+
+        if "MACOSX_DEPLOYMENT_TARGET" in os.environ:
+            error_message = error_message.format(
+                "is set in MACOSX_DEPLOYMENT_TARGET variable."
+            )
+        else:
+            error_message = error_message.format(
+                "the version your Python interpreter is compiled against."
+            )
+
+        sys.stderr.write(error_message)
+
+    platform_tag = prefix + "_" + fin_base_version + "_" + suffix
+    return platform_tag
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/metadata.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/metadata.py
new file mode 100644
index 0000000000000000000000000000000000000000..341f614ceb398baf18d0573e6e65cf47cefc6642
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/metadata.py
@@ -0,0 +1,180 @@
+"""
+Tools for converting old- to new-style metadata.
+"""
+
+from __future__ import annotations
+
+import functools
+import itertools
+import os.path
+import re
+import textwrap
+from email.message import Message
+from email.parser import Parser
+from typing import Iterator
+
+from ..packaging.requirements import Requirement
+
+
+def _nonblank(str):
+    return str and not str.startswith("#")
+
+
+@functools.singledispatch
+def yield_lines(iterable):
+    r"""
+    Yield valid lines of a string or iterable.
+    >>> list(yield_lines(''))
+    []
+    >>> list(yield_lines(['foo', 'bar']))
+    ['foo', 'bar']
+    >>> list(yield_lines('foo\nbar'))
+    ['foo', 'bar']
+    >>> list(yield_lines('\nfoo\n#bar\nbaz #comment'))
+    ['foo', 'baz #comment']
+    >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n']))
+    ['foo', 'bar', 'baz', 'bing']
+    """
+    return itertools.chain.from_iterable(map(yield_lines, iterable))
+
+
+@yield_lines.register(str)
+def _(text):
+    return filter(_nonblank, map(str.strip, text.splitlines()))
+
+
+def split_sections(s):
+    """Split a string or iterable thereof into (section, content) pairs
+    Each ``section`` is a stripped version of the section header ("[section]")
+    and each ``content`` is a list of stripped lines excluding blank lines and
+    comment-only lines.  If there are any such lines before the first section
+    header, they're returned in a first ``section`` of ``None``.
+    """
+    section = None
+    content = []
+    for line in yield_lines(s):
+        if line.startswith("["):
+            if line.endswith("]"):
+                if section or content:
+                    yield section, content
+                section = line[1:-1].strip()
+                content = []
+            else:
+                raise ValueError("Invalid section heading", line)
+        else:
+            content.append(line)
+
+    # wrap up last segment
+    yield section, content
+
+
+def safe_extra(extra):
+    """Convert an arbitrary string to a standard 'extra' name
+    Any runs of non-alphanumeric characters are replaced with a single '_',
+    and the result is always lowercased.
+    """
+    return re.sub("[^A-Za-z0-9.-]+", "_", extra).lower()
+
+
+def safe_name(name):
+    """Convert an arbitrary string to a standard distribution name
+    Any runs of non-alphanumeric/. characters are replaced with a single '-'.
+    """
+    return re.sub("[^A-Za-z0-9.]+", "-", name)
+
+
+def requires_to_requires_dist(requirement: Requirement) -> str:
+    """Return the version specifier for a requirement in PEP 345/566 fashion."""
+    if getattr(requirement, "url", None):
+        return " @ " + requirement.url
+
+    requires_dist = []
+    for spec in requirement.specifier:
+        requires_dist.append(spec.operator + spec.version)
+
+    if requires_dist:
+        return " " + ",".join(sorted(requires_dist))
+    else:
+        return ""
+
+
+def convert_requirements(requirements: list[str]) -> Iterator[str]:
+    """Yield Requires-Dist: strings for parsed requirements strings."""
+    for req in requirements:
+        parsed_requirement = Requirement(req)
+        spec = requires_to_requires_dist(parsed_requirement)
+        extras = ",".join(sorted(safe_extra(e) for e in parsed_requirement.extras))
+        if extras:
+            extras = f"[{extras}]"
+
+        yield safe_name(parsed_requirement.name) + extras + spec
+
+
+def generate_requirements(
+    extras_require: dict[str, list[str]],
+) -> Iterator[tuple[str, str]]:
+    """
+    Convert requirements from a setup()-style dictionary to
+    ('Requires-Dist', 'requirement') and ('Provides-Extra', 'extra') tuples.
+
+    extras_require is a dictionary of {extra: [requirements]} as passed to setup(),
+    using the empty extra {'': [requirements]} to hold install_requires.
+    """
+    for extra, depends in extras_require.items():
+        condition = ""
+        extra = extra or ""
+        if ":" in extra:  # setuptools extra:condition syntax
+            extra, condition = extra.split(":", 1)
+
+        extra = safe_extra(extra)
+        if extra:
+            yield "Provides-Extra", extra
+            if condition:
+                condition = "(" + condition + ") and "
+            condition += "extra == '%s'" % extra
+
+        if condition:
+            condition = " ; " + condition
+
+        for new_req in convert_requirements(depends):
+            yield "Requires-Dist", new_req + condition
+
+
+def pkginfo_to_metadata(egg_info_path: str, pkginfo_path: str) -> Message:
+    """
+    Convert .egg-info directory with PKG-INFO to the Metadata 2.1 format
+    """
+    with open(pkginfo_path, encoding="utf-8") as headers:
+        pkg_info = Parser().parse(headers)
+
+    pkg_info.replace_header("Metadata-Version", "2.1")
+    # Those will be regenerated from `requires.txt`.
+    del pkg_info["Provides-Extra"]
+    del pkg_info["Requires-Dist"]
+    requires_path = os.path.join(egg_info_path, "requires.txt")
+    if os.path.exists(requires_path):
+        with open(requires_path, encoding="utf-8") as requires_file:
+            requires = requires_file.read()
+
+        parsed_requirements = sorted(split_sections(requires), key=lambda x: x[0] or "")
+        for extra, reqs in parsed_requirements:
+            for key, value in generate_requirements({extra: reqs}):
+                if (key, value) not in pkg_info.items():
+                    pkg_info[key] = value
+
+    description = pkg_info["Description"]
+    if description:
+        description_lines = pkg_info["Description"].splitlines()
+        dedented_description = "\n".join(
+            # if the first line of long_description is blank,
+            # the first line here will be indented.
+            (
+                description_lines[0].lstrip(),
+                textwrap.dedent("\n".join(description_lines[1:])),
+                "\n",
+            )
+        )
+        pkg_info.set_payload(dedented_description)
+        del pkg_info["Description"]
+
+    return pkg_info
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..d98d98cb52bef152098bd4277dabcd99b0f528bc
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/util.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+import base64
+import logging
+
+log = logging.getLogger("wheel")
+
+# ensure Python logging is configured
+try:
+    __import__("setuptools.logging")
+except ImportError:
+    # setuptools < ??
+    from . import _setuptools_logging
+
+    _setuptools_logging.configure()
+
+
+def urlsafe_b64encode(data: bytes) -> bytes:
+    """urlsafe_b64encode without padding"""
+    return base64.urlsafe_b64encode(data).rstrip(b"=")
+
+
+def urlsafe_b64decode(data: bytes) -> bytes:
+    """urlsafe_b64decode without padding"""
+    pad = b"=" * (4 - (len(data) & 3))
+    return base64.urlsafe_b64decode(data + pad)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/wheelfile.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/wheelfile.py
new file mode 100644
index 0000000000000000000000000000000000000000..83a31772bda8ad1fe5d872ead8966b0f955d9fea
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/wheel/wheelfile.py
@@ -0,0 +1,199 @@
+from __future__ import annotations
+
+import csv
+import hashlib
+import os.path
+import re
+import stat
+import time
+from io import StringIO, TextIOWrapper
+from zipfile import ZIP_DEFLATED, ZipFile, ZipInfo
+
+from .util import log, urlsafe_b64decode, urlsafe_b64encode
+
+# Non-greedy matching of an optional build number may be too clever (more
+# invalid wheel filenames will match). Separate regex for .dist-info?
+WHEEL_INFO_RE = re.compile(
+    r"""^(?P(?P[^\s-]+?)-(?P[^\s-]+?))(-(?P\d[^\s-]*))?
+     -(?P[^\s-]+?)-(?P[^\s-]+?)-(?P\S+)\.whl$""",
+    re.VERBOSE,
+)
+MINIMUM_TIMESTAMP = 315532800  # 1980-01-01 00:00:00 UTC
+
+
+def get_zipinfo_datetime(timestamp=None):
+    # Some applications need reproducible .whl files, but they can't do this without
+    # forcing the timestamp of the individual ZipInfo objects. See issue #143.
+    timestamp = int(os.environ.get("SOURCE_DATE_EPOCH", timestamp or time.time()))
+    timestamp = max(timestamp, MINIMUM_TIMESTAMP)
+    return time.gmtime(timestamp)[0:6]
+
+
+class WheelFile(ZipFile):
+    """A ZipFile derivative class that also reads SHA-256 hashes from
+    .dist-info/RECORD and checks any read files against those.
+    """
+
+    _default_algorithm = hashlib.sha256
+
+    def __init__(self, file, mode="r", compression=ZIP_DEFLATED):
+        basename = os.path.basename(file)
+        self.parsed_filename = WHEEL_INFO_RE.match(basename)
+        if not basename.endswith(".whl") or self.parsed_filename is None:
+            raise WheelError(f"Bad wheel filename {basename!r}")
+
+        ZipFile.__init__(self, file, mode, compression=compression, allowZip64=True)
+
+        self.dist_info_path = "{}.dist-info".format(
+            self.parsed_filename.group("namever")
+        )
+        self.record_path = self.dist_info_path + "/RECORD"
+        self._file_hashes = {}
+        self._file_sizes = {}
+        if mode == "r":
+            # Ignore RECORD and any embedded wheel signatures
+            self._file_hashes[self.record_path] = None, None
+            self._file_hashes[self.record_path + ".jws"] = None, None
+            self._file_hashes[self.record_path + ".p7s"] = None, None
+
+            # Fill in the expected hashes by reading them from RECORD
+            try:
+                record = self.open(self.record_path)
+            except KeyError:
+                raise WheelError(f"Missing {self.record_path} file") from None
+
+            with record:
+                for line in csv.reader(
+                    TextIOWrapper(record, newline="", encoding="utf-8")
+                ):
+                    path, hash_sum, size = line
+                    if not hash_sum:
+                        continue
+
+                    algorithm, hash_sum = hash_sum.split("=")
+                    try:
+                        hashlib.new(algorithm)
+                    except ValueError:
+                        raise WheelError(
+                            f"Unsupported hash algorithm: {algorithm}"
+                        ) from None
+
+                    if algorithm.lower() in {"md5", "sha1"}:
+                        raise WheelError(
+                            f"Weak hash algorithm ({algorithm}) is not permitted by "
+                            f"PEP 427"
+                        )
+
+                    self._file_hashes[path] = (
+                        algorithm,
+                        urlsafe_b64decode(hash_sum.encode("ascii")),
+                    )
+
+    def open(self, name_or_info, mode="r", pwd=None):
+        def _update_crc(newdata):
+            eof = ef._eof
+            update_crc_orig(newdata)
+            running_hash.update(newdata)
+            if eof and running_hash.digest() != expected_hash:
+                raise WheelError(f"Hash mismatch for file '{ef_name}'")
+
+        ef_name = (
+            name_or_info.filename if isinstance(name_or_info, ZipInfo) else name_or_info
+        )
+        if (
+            mode == "r"
+            and not ef_name.endswith("/")
+            and ef_name not in self._file_hashes
+        ):
+            raise WheelError(f"No hash found for file '{ef_name}'")
+
+        ef = ZipFile.open(self, name_or_info, mode, pwd)
+        if mode == "r" and not ef_name.endswith("/"):
+            algorithm, expected_hash = self._file_hashes[ef_name]
+            if expected_hash is not None:
+                # Monkey patch the _update_crc method to also check for the hash from
+                # RECORD
+                running_hash = hashlib.new(algorithm)
+                update_crc_orig, ef._update_crc = ef._update_crc, _update_crc
+
+        return ef
+
+    def write_files(self, base_dir):
+        log.info(f"creating '{self.filename}' and adding '{base_dir}' to it")
+        deferred = []
+        for root, dirnames, filenames in os.walk(base_dir):
+            # Sort the directory names so that `os.walk` will walk them in a
+            # defined order on the next iteration.
+            dirnames.sort()
+            for name in sorted(filenames):
+                path = os.path.normpath(os.path.join(root, name))
+                if os.path.isfile(path):
+                    arcname = os.path.relpath(path, base_dir).replace(os.path.sep, "/")
+                    if arcname == self.record_path:
+                        pass
+                    elif root.endswith(".dist-info"):
+                        deferred.append((path, arcname))
+                    else:
+                        self.write(path, arcname)
+
+        deferred.sort()
+        for path, arcname in deferred:
+            self.write(path, arcname)
+
+    def write(self, filename, arcname=None, compress_type=None):
+        with open(filename, "rb") as f:
+            st = os.fstat(f.fileno())
+            data = f.read()
+
+        zinfo = ZipInfo(
+            arcname or filename, date_time=get_zipinfo_datetime(st.st_mtime)
+        )
+        zinfo.external_attr = (stat.S_IMODE(st.st_mode) | stat.S_IFMT(st.st_mode)) << 16
+        zinfo.compress_type = compress_type or self.compression
+        self.writestr(zinfo, data, compress_type)
+
+    def writestr(self, zinfo_or_arcname, data, compress_type=None):
+        if isinstance(zinfo_or_arcname, str):
+            zinfo_or_arcname = ZipInfo(
+                zinfo_or_arcname, date_time=get_zipinfo_datetime()
+            )
+            zinfo_or_arcname.compress_type = self.compression
+            zinfo_or_arcname.external_attr = (0o664 | stat.S_IFREG) << 16
+
+        if isinstance(data, str):
+            data = data.encode("utf-8")
+
+        ZipFile.writestr(self, zinfo_or_arcname, data, compress_type)
+        fname = (
+            zinfo_or_arcname.filename
+            if isinstance(zinfo_or_arcname, ZipInfo)
+            else zinfo_or_arcname
+        )
+        log.info(f"adding '{fname}'")
+        if fname != self.record_path:
+            hash_ = self._default_algorithm(data)
+            self._file_hashes[fname] = (
+                hash_.name,
+                urlsafe_b64encode(hash_.digest()).decode("ascii"),
+            )
+            self._file_sizes[fname] = len(data)
+
+    def close(self):
+        # Write RECORD
+        if self.fp is not None and self.mode == "w" and self._file_hashes:
+            data = StringIO()
+            writer = csv.writer(data, delimiter=",", quotechar='"', lineterminator="\n")
+            writer.writerows(
+                (
+                    (fname, algorithm + "=" + hash_, self._file_sizes[fname])
+                    for fname, (algorithm, hash_) in self._file_hashes.items()
+                )
+            )
+            writer.writerow((format(self.record_path), "", ""))
+            self.writestr(self.record_path, data.getvalue())
+
+        ZipFile.close(self)
+
+
+class WheelError(Exception):
+    pass
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/zipp.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/zipp.py
new file mode 100644
index 0000000000000000000000000000000000000000..26b723c1fd3e25740e0268b8c9b50905c58c3d4a
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/_vendor/zipp.py
@@ -0,0 +1,329 @@
+import io
+import posixpath
+import zipfile
+import itertools
+import contextlib
+import sys
+import pathlib
+
+if sys.version_info < (3, 7):
+    from collections import OrderedDict
+else:
+    OrderedDict = dict
+
+
+__all__ = ['Path']
+
+
+def _parents(path):
+    """
+    Given a path with elements separated by
+    posixpath.sep, generate all parents of that path.
+
+    >>> list(_parents('b/d'))
+    ['b']
+    >>> list(_parents('/b/d/'))
+    ['/b']
+    >>> list(_parents('b/d/f/'))
+    ['b/d', 'b']
+    >>> list(_parents('b'))
+    []
+    >>> list(_parents(''))
+    []
+    """
+    return itertools.islice(_ancestry(path), 1, None)
+
+
+def _ancestry(path):
+    """
+    Given a path with elements separated by
+    posixpath.sep, generate all elements of that path
+
+    >>> list(_ancestry('b/d'))
+    ['b/d', 'b']
+    >>> list(_ancestry('/b/d/'))
+    ['/b/d', '/b']
+    >>> list(_ancestry('b/d/f/'))
+    ['b/d/f', 'b/d', 'b']
+    >>> list(_ancestry('b'))
+    ['b']
+    >>> list(_ancestry(''))
+    []
+    """
+    path = path.rstrip(posixpath.sep)
+    while path and path != posixpath.sep:
+        yield path
+        path, tail = posixpath.split(path)
+
+
+_dedupe = OrderedDict.fromkeys
+"""Deduplicate an iterable in original order"""
+
+
+def _difference(minuend, subtrahend):
+    """
+    Return items in minuend not in subtrahend, retaining order
+    with O(1) lookup.
+    """
+    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
+
+
+class CompleteDirs(zipfile.ZipFile):
+    """
+    A ZipFile subclass that ensures that implied directories
+    are always included in the namelist.
+    """
+
+    @staticmethod
+    def _implied_dirs(names):
+        parents = itertools.chain.from_iterable(map(_parents, names))
+        as_dirs = (p + posixpath.sep for p in parents)
+        return _dedupe(_difference(as_dirs, names))
+
+    def namelist(self):
+        names = super(CompleteDirs, self).namelist()
+        return names + list(self._implied_dirs(names))
+
+    def _name_set(self):
+        return set(self.namelist())
+
+    def resolve_dir(self, name):
+        """
+        If the name represents a directory, return that name
+        as a directory (with the trailing slash).
+        """
+        names = self._name_set()
+        dirname = name + '/'
+        dir_match = name not in names and dirname in names
+        return dirname if dir_match else name
+
+    @classmethod
+    def make(cls, source):
+        """
+        Given a source (filename or zipfile), return an
+        appropriate CompleteDirs subclass.
+        """
+        if isinstance(source, CompleteDirs):
+            return source
+
+        if not isinstance(source, zipfile.ZipFile):
+            return cls(_pathlib_compat(source))
+
+        # Only allow for FastLookup when supplied zipfile is read-only
+        if 'r' not in source.mode:
+            cls = CompleteDirs
+
+        source.__class__ = cls
+        return source
+
+
+class FastLookup(CompleteDirs):
+    """
+    ZipFile subclass to ensure implicit
+    dirs exist and are resolved rapidly.
+    """
+
+    def namelist(self):
+        with contextlib.suppress(AttributeError):
+            return self.__names
+        self.__names = super(FastLookup, self).namelist()
+        return self.__names
+
+    def _name_set(self):
+        with contextlib.suppress(AttributeError):
+            return self.__lookup
+        self.__lookup = super(FastLookup, self)._name_set()
+        return self.__lookup
+
+
+def _pathlib_compat(path):
+    """
+    For path-like objects, convert to a filename for compatibility
+    on Python 3.6.1 and earlier.
+    """
+    try:
+        return path.__fspath__()
+    except AttributeError:
+        return str(path)
+
+
+class Path:
+    """
+    A pathlib-compatible interface for zip files.
+
+    Consider a zip file with this structure::
+
+        .
+        ├── a.txt
+        └── b
+            ├── c.txt
+            └── d
+                └── e.txt
+
+    >>> data = io.BytesIO()
+    >>> zf = zipfile.ZipFile(data, 'w')
+    >>> zf.writestr('a.txt', 'content of a')
+    >>> zf.writestr('b/c.txt', 'content of c')
+    >>> zf.writestr('b/d/e.txt', 'content of e')
+    >>> zf.filename = 'mem/abcde.zip'
+
+    Path accepts the zipfile object itself or a filename
+
+    >>> root = Path(zf)
+
+    From there, several path operations are available.
+
+    Directory iteration (including the zip file itself):
+
+    >>> a, b = root.iterdir()
+    >>> a
+    Path('mem/abcde.zip', 'a.txt')
+    >>> b
+    Path('mem/abcde.zip', 'b/')
+
+    name property:
+
+    >>> b.name
+    'b'
+
+    join with divide operator:
+
+    >>> c = b / 'c.txt'
+    >>> c
+    Path('mem/abcde.zip', 'b/c.txt')
+    >>> c.name
+    'c.txt'
+
+    Read text:
+
+    >>> c.read_text()
+    'content of c'
+
+    existence:
+
+    >>> c.exists()
+    True
+    >>> (b / 'missing.txt').exists()
+    False
+
+    Coercion to string:
+
+    >>> import os
+    >>> str(c).replace(os.sep, posixpath.sep)
+    'mem/abcde.zip/b/c.txt'
+
+    At the root, ``name``, ``filename``, and ``parent``
+    resolve to the zipfile. Note these attributes are not
+    valid and will raise a ``ValueError`` if the zipfile
+    has no filename.
+
+    >>> root.name
+    'abcde.zip'
+    >>> str(root.filename).replace(os.sep, posixpath.sep)
+    'mem/abcde.zip'
+    >>> str(root.parent)
+    'mem'
+    """
+
+    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
+
+    def __init__(self, root, at=""):
+        """
+        Construct a Path from a ZipFile or filename.
+
+        Note: When the source is an existing ZipFile object,
+        its type (__class__) will be mutated to a
+        specialized type. If the caller wishes to retain the
+        original type, the caller should either create a
+        separate ZipFile object or pass a filename.
+        """
+        self.root = FastLookup.make(root)
+        self.at = at
+
+    def open(self, mode='r', *args, pwd=None, **kwargs):
+        """
+        Open this entry as text or binary following the semantics
+        of ``pathlib.Path.open()`` by passing arguments through
+        to io.TextIOWrapper().
+        """
+        if self.is_dir():
+            raise IsADirectoryError(self)
+        zip_mode = mode[0]
+        if not self.exists() and zip_mode == 'r':
+            raise FileNotFoundError(self)
+        stream = self.root.open(self.at, zip_mode, pwd=pwd)
+        if 'b' in mode:
+            if args or kwargs:
+                raise ValueError("encoding args invalid for binary operation")
+            return stream
+        return io.TextIOWrapper(stream, *args, **kwargs)
+
+    @property
+    def name(self):
+        return pathlib.Path(self.at).name or self.filename.name
+
+    @property
+    def suffix(self):
+        return pathlib.Path(self.at).suffix or self.filename.suffix
+
+    @property
+    def suffixes(self):
+        return pathlib.Path(self.at).suffixes or self.filename.suffixes
+
+    @property
+    def stem(self):
+        return pathlib.Path(self.at).stem or self.filename.stem
+
+    @property
+    def filename(self):
+        return pathlib.Path(self.root.filename).joinpath(self.at)
+
+    def read_text(self, *args, **kwargs):
+        with self.open('r', *args, **kwargs) as strm:
+            return strm.read()
+
+    def read_bytes(self):
+        with self.open('rb') as strm:
+            return strm.read()
+
+    def _is_child(self, path):
+        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
+
+    def _next(self, at):
+        return self.__class__(self.root, at)
+
+    def is_dir(self):
+        return not self.at or self.at.endswith("/")
+
+    def is_file(self):
+        return self.exists() and not self.is_dir()
+
+    def exists(self):
+        return self.at in self.root._name_set()
+
+    def iterdir(self):
+        if not self.is_dir():
+            raise ValueError("Can't listdir a file")
+        subs = map(self._next, self.root.namelist())
+        return filter(self._is_child, subs)
+
+    def __str__(self):
+        return posixpath.join(self.root.filename, self.at)
+
+    def __repr__(self):
+        return self.__repr.format(self=self)
+
+    def joinpath(self, *other):
+        next = posixpath.join(self.at, *map(_pathlib_compat, other))
+        return self._next(self.root.resolve_dir(next))
+
+    __truediv__ = joinpath
+
+    @property
+    def parent(self):
+        if not self.at:
+            return self.filename.parent
+        parent_at = posixpath.dirname(self.at.rstrip('/'))
+        if parent_at:
+            parent_at += '/'
+        return self._next(parent_at)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5acd7687d642f06de84b38f5842c41ae14d5f24a
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__init__.py
@@ -0,0 +1,12 @@
+from distutils.command.bdist import bdist
+import sys
+
+if 'egg' not in bdist.format_commands:
+    try:
+        bdist.format_commands['egg'] = ('bdist_egg', "Python .egg file")
+    except TypeError:
+        # For backward compatibility with older distutils (stdlib)
+        bdist.format_command['egg'] = ('bdist_egg', "Python .egg file")
+        bdist.format_commands.append('egg')
+
+del bdist, sys
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9dd9824d9aa3bf77aed6090769631471688aa3c4
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/_requirestxt.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/_requirestxt.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d944b4f1d9259dbb8b23c11068d0f8329bccb007
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/_requirestxt.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/alias.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/alias.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db9b693b6d8b61fbd824861a8cd4de369fb28a1d
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/alias.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_egg.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_egg.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ced756481d5f5764ccbd1f29a64b4cbf48ec3055
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_egg.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_rpm.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_rpm.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c82b84ef4b31c87d534c066ff281da3518bddb5e
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_rpm.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_wheel.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_wheel.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..76841505b220184638e4767991fb5bda19ea3cbb
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/bdist_wheel.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0acc4ba82f7e9686399a4c4ed5356cb7ddd7febc
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_clib.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_clib.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f1902c359ac294fb2cd549dbc4ab62a13bd7c77
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_clib.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_ext.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_ext.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d0f176a35fd4b978ef15ad136724cde15ad83ef4
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_ext.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_py.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_py.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d41613d5f6bd09e02fc32afd88b8ac47b7c6d
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/build_py.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/develop.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/develop.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dc3204305022f6f32a729a155204d3ed6fa813fb
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/develop.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/dist_info.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/dist_info.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..998b7a8697583ec3864ff737ab8866f34166da7a
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/dist_info.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/editable_wheel.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/editable_wheel.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4123293943adf273ca2b8303d4e33f935d706c8f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/editable_wheel.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/egg_info.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/egg_info.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..052bde4b861f8ea7f3433fc1ad784ea9bddbc225
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/egg_info.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..76d7cea558d85f61c64fa083e0397ce0b6be4a56
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_egg_info.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_egg_info.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..13f7665d0c7a506813f74fc44b28113a76771a17
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_egg_info.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_lib.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_lib.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..406064e6e75e834fc4a482d5949d1a4951b2fa63
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_lib.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_scripts.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_scripts.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93510e46d1c5e5eb0fed5d4e935e13f014a6bd29
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/install_scripts.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/register.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/register.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eccb0f377e493ad4013f06045453973b138788a8
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/register.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/rotate.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/rotate.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..79dc3c4db89cbff276717969d83279ea623498b4
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/rotate.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/saveopts.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/saveopts.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2b9260a97338aa4987ee0e9ebbfa0c233461d080
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/saveopts.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/sdist.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/sdist.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e7aa3d4b8ddc27c5bf35375ac6258985421662c
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/sdist.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/setopt.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/setopt.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8b48e8c2695dd797c6741a60ab1a5ac76bbd602
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/setopt.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/test.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/test.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..51b3e73f7c9130a53561dd9af59e987568569e90
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/test.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/upload.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/upload.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bc6794ef611cc94d5c2a13dfca8130c86b16ce9a
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/upload.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/upload_docs.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/upload_docs.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ce4d3207a5e1d5b47a3f062077c8cb0cd16bd905
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/__pycache__/upload_docs.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/alias.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/alias.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7b4d5456a7fa18d31d8ab4e9ef120b42e61ef13
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/alias.py
@@ -0,0 +1,78 @@
+from distutils.errors import DistutilsOptionError
+
+from setuptools.command.setopt import edit_config, option_base, config_file
+
+
+def shquote(arg):
+    """Quote an argument for later parsing by shlex.split()"""
+    for c in '"', "'", "\\", "#":
+        if c in arg:
+            return repr(arg)
+    if arg.split() != [arg]:
+        return repr(arg)
+    return arg
+
+
+class alias(option_base):
+    """Define a shortcut that invokes one or more commands"""
+
+    description = "define a shortcut to invoke one or more commands"
+    command_consumes_arguments = True
+
+    user_options = [
+        ('remove', 'r', 'remove (unset) the alias'),
+    ] + option_base.user_options
+
+    boolean_options = option_base.boolean_options + ['remove']
+
+    def initialize_options(self):
+        option_base.initialize_options(self)
+        self.args = None
+        self.remove = None
+
+    def finalize_options(self):
+        option_base.finalize_options(self)
+        if self.remove and len(self.args) != 1:
+            raise DistutilsOptionError(
+                "Must specify exactly one argument (the alias name) when "
+                "using --remove"
+            )
+
+    def run(self):
+        aliases = self.distribution.get_option_dict('aliases')
+
+        if not self.args:
+            print("Command Aliases")
+            print("---------------")
+            for alias in aliases:
+                print("setup.py alias", format_alias(alias, aliases))
+            return
+
+        elif len(self.args) == 1:
+            (alias,) = self.args
+            if self.remove:
+                command = None
+            elif alias in aliases:
+                print("setup.py alias", format_alias(alias, aliases))
+                return
+            else:
+                print("No alias definition found for %r" % alias)
+                return
+        else:
+            alias = self.args[0]
+            command = ' '.join(map(shquote, self.args[1:]))
+
+        edit_config(self.filename, {'aliases': {alias: command}}, self.dry_run)
+
+
+def format_alias(name, aliases):
+    source, command = aliases[name]
+    if source == config_file('global'):
+        source = '--global-config '
+    elif source == config_file('user'):
+        source = '--user-config '
+    elif source == config_file('local'):
+        source = ''
+    else:
+        source = '--filename=%r' % source
+    return source + name + ' ' + command
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc765a17aecdc1cf5fc3bba18341c46463e1edc0
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+from typing import Protocol
+from distutils.command.build import build as _build
+
+_ORIGINAL_SUBCOMMANDS = {"build_py", "build_clib", "build_ext", "build_scripts"}
+
+
+class build(_build):
+    # copy to avoid sharing the object with parent class
+    sub_commands = _build.sub_commands[:]
+
+
+class SubCommand(Protocol):
+    """In order to support editable installations (see :pep:`660`) all
+    build subcommands **SHOULD** implement this protocol. They also **MUST** inherit
+    from ``setuptools.Command``.
+
+    When creating an :pep:`editable wheel <660>`, ``setuptools`` will try to evaluate
+    custom ``build`` subcommands using the following procedure:
+
+    1. ``setuptools`` will set the ``editable_mode`` attribute to ``True``
+    2. ``setuptools`` will execute the ``run()`` command.
+
+       .. important::
+          Subcommands **SHOULD** take advantage of ``editable_mode=True`` to adequate
+          its behaviour or perform optimisations.
+
+          For example, if a subcommand doesn't need to generate an extra file and
+          all it does is to copy a source file into the build directory,
+          ``run()`` **SHOULD** simply "early return".
+
+          Similarly, if the subcommand creates files that would be placed alongside
+          Python files in the final distribution, during an editable install
+          the command **SHOULD** generate these files "in place" (i.e. write them to
+          the original source directory, instead of using the build directory).
+          Note that ``get_output_mapping()`` should reflect that and include mappings
+          for "in place" builds accordingly.
+
+    3. ``setuptools`` use any knowledge it can derive from the return values of
+       ``get_outputs()`` and ``get_output_mapping()`` to create an editable wheel.
+       When relevant ``setuptools`` **MAY** attempt to use file links based on the value
+       of ``get_output_mapping()``. Alternatively, ``setuptools`` **MAY** attempt to use
+       :doc:`import hooks ` to redirect any attempt to import
+       to the directory with the original source code and other files built in place.
+
+    Please note that custom sub-commands **SHOULD NOT** rely on ``run()`` being
+    executed (or not) to provide correct return values for ``get_outputs()``,
+    ``get_output_mapping()`` or ``get_source_files()``. The ``get_*`` methods should
+    work independently of ``run()``.
+    """
+
+    editable_mode: bool = False
+    """Boolean flag that will be set to ``True`` when setuptools is used for an
+    editable installation (see :pep:`660`).
+    Implementations **SHOULD** explicitly set the default value of this attribute to
+    ``False``.
+    When subcommands run, they can use this flag to perform optimizations or change
+    their behaviour accordingly.
+    """
+
+    build_lib: str
+    """String representing the directory where the build artifacts should be stored,
+    e.g. ``build/lib``.
+    For example, if a distribution wants to provide a Python module named ``pkg.mod``,
+    then a corresponding file should be written to ``{build_lib}/package/module.py``.
+    A way of thinking about this is that the files saved under ``build_lib``
+    would be eventually copied to one of the directories in :obj:`site.PREFIXES`
+    upon installation.
+
+    A command that produces platform-independent files (e.g. compiling text templates
+    into Python functions), **CAN** initialize ``build_lib`` by copying its value from
+    the ``build_py`` command. On the other hand, a command that produces
+    platform-specific files **CAN** initialize ``build_lib`` by copying its value from
+    the ``build_ext`` command. In general this is done inside the ``finalize_options``
+    method with the help of the ``set_undefined_options`` command::
+
+        def finalize_options(self):
+            self.set_undefined_options("build_py", ("build_lib", "build_lib"))
+            ...
+    """
+
+    def initialize_options(self):
+        """(Required by the original :class:`setuptools.Command` interface)"""
+
+    def finalize_options(self):
+        """(Required by the original :class:`setuptools.Command` interface)"""
+
+    def run(self):
+        """(Required by the original :class:`setuptools.Command` interface)"""
+
+    def get_source_files(self) -> list[str]:
+        """
+        Return a list of all files that are used by the command to create the expected
+        outputs.
+        For example, if your build command transpiles Java files into Python, you should
+        list here all the Java files.
+        The primary purpose of this function is to help populating the ``sdist``
+        with all the files necessary to build the distribution.
+        All files should be strings relative to the project root directory.
+        """
+
+    def get_outputs(self) -> list[str]:
+        """
+        Return a list of files intended for distribution as they would have been
+        produced by the build.
+        These files should be strings in the form of
+        ``"{build_lib}/destination/file/path"``.
+
+        .. note::
+           The return value of ``get_output()`` should include all files used as keys
+           in ``get_output_mapping()`` plus files that are generated during the build
+           and don't correspond to any source file already present in the project.
+        """
+
+    def get_output_mapping(self) -> dict[str, str]:
+        """
+        Return a mapping between destination files as they would be produced by the
+        build (dict keys) into the respective existing (source) files (dict values).
+        Existing (source) files should be represented as strings relative to the project
+        root directory.
+        Destination files should be strings in the form of
+        ``"{build_lib}/destination/file/path"``.
+        """
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build_clib.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build_clib.py
new file mode 100644
index 0000000000000000000000000000000000000000..acd4d1d3bae0973b845c988f1387aff5d3b5057d
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build_clib.py
@@ -0,0 +1,104 @@
+import distutils.command.build_clib as orig
+from distutils.errors import DistutilsSetupError
+from distutils import log
+
+try:
+    from distutils._modified import newer_pairwise_group
+except ImportError:
+    # fallback for SETUPTOOLS_USE_DISTUTILS=stdlib
+    from .._distutils._modified import newer_pairwise_group
+
+
+class build_clib(orig.build_clib):
+    """
+    Override the default build_clib behaviour to do the following:
+
+    1. Implement a rudimentary timestamp-based dependency system
+       so 'compile()' doesn't run every time.
+    2. Add more keys to the 'build_info' dictionary:
+        * obj_deps - specify dependencies for each object compiled.
+                     this should be a dictionary mapping a key
+                     with the source filename to a list of
+                     dependencies. Use an empty string for global
+                     dependencies.
+        * cflags   - specify a list of additional flags to pass to
+                     the compiler.
+    """
+
+    def build_libraries(self, libraries):
+        for lib_name, build_info in libraries:
+            sources = build_info.get('sources')
+            if sources is None or not isinstance(sources, (list, tuple)):
+                raise DistutilsSetupError(
+                    "in 'libraries' option (library '%s'), "
+                    "'sources' must be present and must be "
+                    "a list of source filenames" % lib_name
+                )
+            sources = sorted(list(sources))
+
+            log.info("building '%s' library", lib_name)
+
+            # Make sure everything is the correct type.
+            # obj_deps should be a dictionary of keys as sources
+            # and a list/tuple of files that are its dependencies.
+            obj_deps = build_info.get('obj_deps', dict())
+            if not isinstance(obj_deps, dict):
+                raise DistutilsSetupError(
+                    "in 'libraries' option (library '%s'), "
+                    "'obj_deps' must be a dictionary of "
+                    "type 'source: list'" % lib_name
+                )
+            dependencies = []
+
+            # Get the global dependencies that are specified by the '' key.
+            # These will go into every source's dependency list.
+            global_deps = obj_deps.get('', list())
+            if not isinstance(global_deps, (list, tuple)):
+                raise DistutilsSetupError(
+                    "in 'libraries' option (library '%s'), "
+                    "'obj_deps' must be a dictionary of "
+                    "type 'source: list'" % lib_name
+                )
+
+            # Build the list to be used by newer_pairwise_group
+            # each source will be auto-added to its dependencies.
+            for source in sources:
+                src_deps = [source]
+                src_deps.extend(global_deps)
+                extra_deps = obj_deps.get(source, list())
+                if not isinstance(extra_deps, (list, tuple)):
+                    raise DistutilsSetupError(
+                        "in 'libraries' option (library '%s'), "
+                        "'obj_deps' must be a dictionary of "
+                        "type 'source: list'" % lib_name
+                    )
+                src_deps.extend(extra_deps)
+                dependencies.append(src_deps)
+
+            expected_objects = self.compiler.object_filenames(
+                sources,
+                output_dir=self.build_temp,
+            )
+
+            if newer_pairwise_group(dependencies, expected_objects) != ([], []):
+                # First, compile the source code to object files in the library
+                # directory.  (This should probably change to putting object
+                # files in a temporary build directory.)
+                macros = build_info.get('macros')
+                include_dirs = build_info.get('include_dirs')
+                cflags = build_info.get('cflags')
+                self.compiler.compile(
+                    sources,
+                    output_dir=self.build_temp,
+                    macros=macros,
+                    include_dirs=include_dirs,
+                    extra_postargs=cflags,
+                    debug=self.debug,
+                )
+
+            # Now "link" the object files together into a static library.
+            # (On Unix at least, this isn't really linking -- it just
+            # builds an archive.  Whatever.)
+            self.compiler.create_static_lib(
+                expected_objects, lib_name, output_dir=self.build_clib, debug=self.debug
+            )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build_ext.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build_ext.py
new file mode 100644
index 0000000000000000000000000000000000000000..508704f3c040b2602ab0ca4ca8cb4c7a893a9014
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/build_ext.py
@@ -0,0 +1,459 @@
+from __future__ import annotations
+
+import os
+import sys
+import itertools
+from importlib.machinery import EXTENSION_SUFFIXES
+from importlib.util import cache_from_source as _compiled_file_name
+from typing import Iterator
+from pathlib import Path
+
+from distutils.command.build_ext import build_ext as _du_build_ext
+from distutils.ccompiler import new_compiler
+from distutils.sysconfig import customize_compiler, get_config_var
+from distutils import log
+
+from setuptools.errors import BaseError
+from setuptools.extension import Extension, Library
+
+try:
+    # Attempt to use Cython for building extensions, if available
+    from Cython.Distutils.build_ext import build_ext as _build_ext  # type: ignore[import-not-found] # Cython not installed on CI tests
+
+    # Additionally, assert that the compiler module will load
+    # also. Ref #1229.
+    __import__('Cython.Compiler.Main')
+except ImportError:
+    _build_ext = _du_build_ext
+
+# make sure _config_vars is initialized
+get_config_var("LDSHARED")
+# Not publicly exposed in typeshed distutils stubs, but this is done on purpose
+# See https://github.com/pypa/setuptools/pull/4228#issuecomment-1959856400
+from distutils.sysconfig import _config_vars as _CONFIG_VARS  # type: ignore # noqa
+
+
+def _customize_compiler_for_shlib(compiler):
+    if sys.platform == "darwin":
+        # building .dylib requires additional compiler flags on OSX; here we
+        # temporarily substitute the pyconfig.h variables so that distutils'
+        # 'customize_compiler' uses them before we build the shared libraries.
+        tmp = _CONFIG_VARS.copy()
+        try:
+            # XXX Help!  I don't have any idea whether these are right...
+            _CONFIG_VARS['LDSHARED'] = (
+                "gcc -Wl,-x -dynamiclib -undefined dynamic_lookup"
+            )
+            _CONFIG_VARS['CCSHARED'] = " -dynamiclib"
+            _CONFIG_VARS['SO'] = ".dylib"
+            customize_compiler(compiler)
+        finally:
+            _CONFIG_VARS.clear()
+            _CONFIG_VARS.update(tmp)
+    else:
+        customize_compiler(compiler)
+
+
+have_rtld = False
+use_stubs = False
+libtype = 'shared'
+
+if sys.platform == "darwin":
+    use_stubs = True
+elif os.name != 'nt':
+    try:
+        import dl  # type: ignore[import-not-found] # https://github.com/python/mypy/issues/13002
+
+        use_stubs = have_rtld = hasattr(dl, 'RTLD_NOW')
+    except ImportError:
+        pass
+
+
+def if_dl(s):
+    return s if have_rtld else ''
+
+
+def get_abi3_suffix():
+    """Return the file extension for an abi3-compliant Extension()"""
+    for suffix in EXTENSION_SUFFIXES:
+        if '.abi3' in suffix:  # Unix
+            return suffix
+        elif suffix == '.pyd':  # Windows
+            return suffix
+    return None
+
+
+class build_ext(_build_ext):
+    editable_mode: bool = False
+    inplace: bool = False
+
+    def run(self):
+        """Build extensions in build directory, then copy if --inplace"""
+        old_inplace, self.inplace = self.inplace, 0
+        _build_ext.run(self)
+        self.inplace = old_inplace
+        if old_inplace:
+            self.copy_extensions_to_source()
+
+    def _get_inplace_equivalent(self, build_py, ext: Extension) -> tuple[str, str]:
+        fullname = self.get_ext_fullname(ext.name)
+        filename = self.get_ext_filename(fullname)
+        modpath = fullname.split('.')
+        package = '.'.join(modpath[:-1])
+        package_dir = build_py.get_package_dir(package)
+        inplace_file = os.path.join(package_dir, os.path.basename(filename))
+        regular_file = os.path.join(self.build_lib, filename)
+        return (inplace_file, regular_file)
+
+    def copy_extensions_to_source(self):
+        build_py = self.get_finalized_command('build_py')
+        for ext in self.extensions:
+            inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext)
+
+            # Always copy, even if source is older than destination, to ensure
+            # that the right extensions for the current Python/platform are
+            # used.
+            if os.path.exists(regular_file) or not ext.optional:
+                self.copy_file(regular_file, inplace_file, level=self.verbose)
+
+            if ext._needs_stub:
+                inplace_stub = self._get_equivalent_stub(ext, inplace_file)
+                self._write_stub_file(inplace_stub, ext, compile=True)
+                # Always compile stub and remove the original (leave the cache behind)
+                # (this behaviour was observed in previous iterations of the code)
+
+    def _get_equivalent_stub(self, ext: Extension, output_file: str) -> str:
+        dir_ = os.path.dirname(output_file)
+        _, _, name = ext.name.rpartition(".")
+        return f"{os.path.join(dir_, name)}.py"
+
+    def _get_output_mapping(self) -> Iterator[tuple[str, str]]:
+        if not self.inplace:
+            return
+
+        build_py = self.get_finalized_command('build_py')
+        opt = self.get_finalized_command('install_lib').optimize or ""
+
+        for ext in self.extensions:
+            inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext)
+            yield (regular_file, inplace_file)
+
+            if ext._needs_stub:
+                # This version of `build_ext` always builds artifacts in another dir,
+                # when "inplace=True" is given it just copies them back.
+                # This is done in the `copy_extensions_to_source` function, which
+                # always compile stub files via `_compile_and_remove_stub`.
+                # At the end of the process, a `.pyc` stub file is created without the
+                # corresponding `.py`.
+
+                inplace_stub = self._get_equivalent_stub(ext, inplace_file)
+                regular_stub = self._get_equivalent_stub(ext, regular_file)
+                inplace_cache = _compiled_file_name(inplace_stub, optimization=opt)
+                output_cache = _compiled_file_name(regular_stub, optimization=opt)
+                yield (output_cache, inplace_cache)
+
+    def get_ext_filename(self, fullname):
+        so_ext = os.getenv('SETUPTOOLS_EXT_SUFFIX')
+        if so_ext:
+            filename = os.path.join(*fullname.split('.')) + so_ext
+        else:
+            filename = _build_ext.get_ext_filename(self, fullname)
+            so_ext = get_config_var('EXT_SUFFIX')
+
+        if fullname in self.ext_map:
+            ext = self.ext_map[fullname]
+            use_abi3 = ext.py_limited_api and get_abi3_suffix()
+            if use_abi3:
+                filename = filename[: -len(so_ext)]
+                so_ext = get_abi3_suffix()
+                filename = filename + so_ext
+            if isinstance(ext, Library):
+                fn, ext = os.path.splitext(filename)
+                return self.shlib_compiler.library_filename(fn, libtype)
+            elif use_stubs and ext._links_to_dynamic:
+                d, fn = os.path.split(filename)
+                return os.path.join(d, 'dl-' + fn)
+        return filename
+
+    def initialize_options(self):
+        _build_ext.initialize_options(self)
+        self.shlib_compiler = None
+        self.shlibs = []
+        self.ext_map = {}
+        self.editable_mode = False
+
+    def finalize_options(self):
+        _build_ext.finalize_options(self)
+        self.extensions = self.extensions or []
+        self.check_extensions_list(self.extensions)
+        self.shlibs = [ext for ext in self.extensions if isinstance(ext, Library)]
+        if self.shlibs:
+            self.setup_shlib_compiler()
+        for ext in self.extensions:
+            ext._full_name = self.get_ext_fullname(ext.name)
+        for ext in self.extensions:
+            fullname = ext._full_name
+            self.ext_map[fullname] = ext
+
+            # distutils 3.1 will also ask for module names
+            # XXX what to do with conflicts?
+            self.ext_map[fullname.split('.')[-1]] = ext
+
+            ltd = self.shlibs and self.links_to_dynamic(ext) or False
+            ns = ltd and use_stubs and not isinstance(ext, Library)
+            ext._links_to_dynamic = ltd
+            ext._needs_stub = ns
+            filename = ext._file_name = self.get_ext_filename(fullname)
+            libdir = os.path.dirname(os.path.join(self.build_lib, filename))
+            if ltd and libdir not in ext.library_dirs:
+                ext.library_dirs.append(libdir)
+            if ltd and use_stubs and os.curdir not in ext.runtime_library_dirs:
+                ext.runtime_library_dirs.append(os.curdir)
+
+        if self.editable_mode:
+            self.inplace = True
+
+    def setup_shlib_compiler(self):
+        compiler = self.shlib_compiler = new_compiler(
+            compiler=self.compiler, dry_run=self.dry_run, force=self.force
+        )
+        _customize_compiler_for_shlib(compiler)
+
+        if self.include_dirs is not None:
+            compiler.set_include_dirs(self.include_dirs)
+        if self.define is not None:
+            # 'define' option is a list of (name,value) tuples
+            for name, value in self.define:
+                compiler.define_macro(name, value)
+        if self.undef is not None:
+            for macro in self.undef:
+                compiler.undefine_macro(macro)
+        if self.libraries is not None:
+            compiler.set_libraries(self.libraries)
+        if self.library_dirs is not None:
+            compiler.set_library_dirs(self.library_dirs)
+        if self.rpath is not None:
+            compiler.set_runtime_library_dirs(self.rpath)
+        if self.link_objects is not None:
+            compiler.set_link_objects(self.link_objects)
+
+        # hack so distutils' build_extension() builds a library instead
+        compiler.link_shared_object = link_shared_object.__get__(compiler)
+
+    def get_export_symbols(self, ext):
+        if isinstance(ext, Library):
+            return ext.export_symbols
+        return _build_ext.get_export_symbols(self, ext)
+
+    def build_extension(self, ext):
+        ext._convert_pyx_sources_to_lang()
+        _compiler = self.compiler
+        try:
+            if isinstance(ext, Library):
+                self.compiler = self.shlib_compiler
+            _build_ext.build_extension(self, ext)
+            if ext._needs_stub:
+                build_lib = self.get_finalized_command('build_py').build_lib
+                self.write_stub(build_lib, ext)
+        finally:
+            self.compiler = _compiler
+
+    def links_to_dynamic(self, ext):
+        """Return true if 'ext' links to a dynamic lib in the same package"""
+        # XXX this should check to ensure the lib is actually being built
+        # XXX as dynamic, and not just using a locally-found version or a
+        # XXX static-compiled version
+        libnames = dict.fromkeys([lib._full_name for lib in self.shlibs])
+        pkg = '.'.join(ext._full_name.split('.')[:-1] + [''])
+        return any(pkg + libname in libnames for libname in ext.libraries)
+
+    def get_source_files(self) -> list[str]:
+        return [*_build_ext.get_source_files(self), *self._get_internal_depends()]
+
+    def _get_internal_depends(self) -> Iterator[str]:
+        """Yield ``ext.depends`` that are contained by the project directory"""
+        project_root = Path(self.distribution.src_root or os.curdir).resolve()
+        depends = (dep for ext in self.extensions for dep in ext.depends)
+
+        def skip(orig_path: str, reason: str) -> None:
+            log.info(
+                "dependency %s won't be automatically "
+                "included in the manifest: the path %s",
+                orig_path,
+                reason,
+            )
+
+        for dep in depends:
+            path = Path(dep)
+
+            if path.is_absolute():
+                skip(dep, "must be relative")
+                continue
+
+            if ".." in path.parts:
+                skip(dep, "can't have `..` segments")
+                continue
+
+            try:
+                resolved = (project_root / path).resolve(strict=True)
+            except OSError:
+                skip(dep, "doesn't exist")
+                continue
+
+            try:
+                resolved.relative_to(project_root)
+            except ValueError:
+                skip(dep, "must be inside the project root")
+                continue
+
+            yield path.as_posix()
+
+    def get_outputs(self) -> list[str]:
+        if self.inplace:
+            return list(self.get_output_mapping().keys())
+        return sorted(_build_ext.get_outputs(self) + self.__get_stubs_outputs())
+
+    def get_output_mapping(self) -> dict[str, str]:
+        """See :class:`setuptools.commands.build.SubCommand`"""
+        mapping = self._get_output_mapping()
+        return dict(sorted(mapping, key=lambda x: x[0]))
+
+    def __get_stubs_outputs(self):
+        # assemble the base name for each extension that needs a stub
+        ns_ext_bases = (
+            os.path.join(self.build_lib, *ext._full_name.split('.'))
+            for ext in self.extensions
+            if ext._needs_stub
+        )
+        # pair each base with the extension
+        pairs = itertools.product(ns_ext_bases, self.__get_output_extensions())
+        return list(base + fnext for base, fnext in pairs)
+
+    def __get_output_extensions(self):
+        yield '.py'
+        yield '.pyc'
+        if self.get_finalized_command('build_py').optimize:
+            yield '.pyo'
+
+    def write_stub(self, output_dir, ext, compile=False):
+        stub_file = os.path.join(output_dir, *ext._full_name.split('.')) + '.py'
+        self._write_stub_file(stub_file, ext, compile)
+
+    def _write_stub_file(self, stub_file: str, ext: Extension, compile=False):
+        log.info("writing stub loader for %s to %s", ext._full_name, stub_file)
+        if compile and os.path.exists(stub_file):
+            raise BaseError(stub_file + " already exists! Please delete.")
+        if not self.dry_run:
+            with open(stub_file, 'w', encoding="utf-8") as f:
+                content = '\n'.join([
+                    "def __bootstrap__():",
+                    "   global __bootstrap__, __file__, __loader__",
+                    "   import sys, os, pkg_resources, importlib.util" + if_dl(", dl"),
+                    "   __file__ = pkg_resources.resource_filename"
+                    "(__name__,%r)" % os.path.basename(ext._file_name),
+                    "   del __bootstrap__",
+                    "   if '__loader__' in globals():",
+                    "       del __loader__",
+                    if_dl("   old_flags = sys.getdlopenflags()"),
+                    "   old_dir = os.getcwd()",
+                    "   try:",
+                    "     os.chdir(os.path.dirname(__file__))",
+                    if_dl("     sys.setdlopenflags(dl.RTLD_NOW)"),
+                    "     spec = importlib.util.spec_from_file_location(",
+                    "                __name__, __file__)",
+                    "     mod = importlib.util.module_from_spec(spec)",
+                    "     spec.loader.exec_module(mod)",
+                    "   finally:",
+                    if_dl("     sys.setdlopenflags(old_flags)"),
+                    "     os.chdir(old_dir)",
+                    "__bootstrap__()",
+                    "",  # terminal \n
+                ])
+                f.write(content)
+        if compile:
+            self._compile_and_remove_stub(stub_file)
+
+    def _compile_and_remove_stub(self, stub_file: str):
+        from distutils.util import byte_compile
+
+        byte_compile([stub_file], optimize=0, force=True, dry_run=self.dry_run)
+        optimize = self.get_finalized_command('install_lib').optimize
+        if optimize > 0:
+            byte_compile(
+                [stub_file],
+                optimize=optimize,
+                force=True,
+                dry_run=self.dry_run,
+            )
+        if os.path.exists(stub_file) and not self.dry_run:
+            os.unlink(stub_file)
+
+
+if use_stubs or os.name == 'nt':
+    # Build shared libraries
+    #
+    def link_shared_object(
+        self,
+        objects,
+        output_libname,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        self.link(
+            self.SHARED_LIBRARY,
+            objects,
+            output_libname,
+            output_dir,
+            libraries,
+            library_dirs,
+            runtime_library_dirs,
+            export_symbols,
+            debug,
+            extra_preargs,
+            extra_postargs,
+            build_temp,
+            target_lang,
+        )
+
+else:
+    # Build static libraries everywhere else
+    libtype = 'static'
+
+    def link_shared_object(
+        self,
+        objects,
+        output_libname,
+        output_dir=None,
+        libraries=None,
+        library_dirs=None,
+        runtime_library_dirs=None,
+        export_symbols=None,
+        debug=False,
+        extra_preargs=None,
+        extra_postargs=None,
+        build_temp=None,
+        target_lang=None,
+    ):
+        # XXX we need to either disallow these attrs on Library instances,
+        # or warn/abort here if set, or something...
+        # libraries=None, library_dirs=None, runtime_library_dirs=None,
+        # export_symbols=None, extra_preargs=None, extra_postargs=None,
+        # build_temp=None
+
+        assert output_dir is None  # distutils build_ext doesn't pass this
+        output_dir, filename = os.path.split(output_libname)
+        basename, ext = os.path.splitext(filename)
+        if self.library_filename("x").startswith('lib'):
+            # strip 'lib' prefix; this is kludgy if some platform uses
+            # a different prefix
+            basename = basename[3:]
+
+        self.create_static_lib(objects, basename, output_dir, debug, target_lang)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/easy_install.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/easy_install.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6ce3fcc0557286233db1d29c014a56e2ebb1ac2
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/easy_install.py
@@ -0,0 +1,2362 @@
+"""
+Easy Install
+------------
+
+A tool for doing automatic download/extract/build of distutils-based Python
+packages.  For detailed documentation, see the accompanying EasyInstall.txt
+file, or visit the `EasyInstall home page`__.
+
+__ https://setuptools.pypa.io/en/latest/deprecated/easy_install.html
+
+"""
+
+from __future__ import annotations
+
+from glob import glob
+from distutils.util import get_platform
+from distutils.util import convert_path, subst_vars
+from distutils.errors import (
+    DistutilsArgError,
+    DistutilsOptionError,
+    DistutilsError,
+    DistutilsPlatformError,
+)
+from distutils import log, dir_util
+from distutils.command.build_scripts import first_line_re
+from distutils.command import install
+import sys
+import os
+import zipimport
+import shutil
+import tempfile
+import zipfile
+import re
+import stat
+import random
+import textwrap
+import warnings
+import site
+import struct
+import contextlib
+import subprocess
+import shlex
+import io
+import configparser
+import sysconfig
+
+from sysconfig import get_path
+
+from setuptools import Command
+from setuptools.sandbox import run_setup
+from setuptools.command import setopt
+from setuptools.archive_util import unpack_archive
+from setuptools.package_index import (
+    PackageIndex,
+    parse_requirement_arg,
+    URL_SCHEME,
+)
+from setuptools.command import bdist_egg, egg_info
+from setuptools.warnings import SetuptoolsDeprecationWarning, SetuptoolsWarning
+from setuptools.wheel import Wheel
+from pkg_resources import (
+    normalize_path,
+    resource_string,
+    get_distribution,
+    find_distributions,
+    Environment,
+    Requirement,
+    Distribution,
+    PathMetadata,
+    EggMetadata,
+    WorkingSet,
+    DistributionNotFound,
+    VersionConflict,
+    DEVELOP_DIST,
+)
+import pkg_resources
+from ..compat import py39, py311
+from .._path import ensure_directory
+from ..extern.jaraco.text import yield_lines
+
+
+# Turn on PEP440Warnings
+warnings.filterwarnings("default", category=pkg_resources.PEP440Warning)
+
+__all__ = [
+    'easy_install',
+    'PthDistributions',
+    'extract_wininst_cfg',
+    'get_exe_prefixes',
+]
+
+
+def is_64bit():
+    return struct.calcsize("P") == 8
+
+
+def _to_bytes(s):
+    return s.encode('utf8')
+
+
+def isascii(s):
+    try:
+        s.encode('ascii')
+    except UnicodeError:
+        return False
+    return True
+
+
+def _one_liner(text):
+    return textwrap.dedent(text).strip().replace('\n', '; ')
+
+
+class easy_install(Command):
+    """Manage a download/build/install process"""
+
+    description = "Find/get/install Python packages"
+    command_consumes_arguments = True
+
+    user_options = [
+        ('prefix=', None, "installation prefix"),
+        ("zip-ok", "z", "install package as a zipfile"),
+        ("multi-version", "m", "make apps have to require() a version"),
+        ("upgrade", "U", "force upgrade (searches PyPI for latest versions)"),
+        ("install-dir=", "d", "install package to DIR"),
+        ("script-dir=", "s", "install scripts to DIR"),
+        ("exclude-scripts", "x", "Don't install scripts"),
+        ("always-copy", "a", "Copy all needed packages to install dir"),
+        ("index-url=", "i", "base URL of Python Package Index"),
+        ("find-links=", "f", "additional URL(s) to search for packages"),
+        ("build-directory=", "b", "download/extract/build in DIR; keep the results"),
+        (
+            'optimize=',
+            'O',
+            "also compile with optimization: -O1 for \"python -O\", "
+            "-O2 for \"python -OO\", and -O0 to disable [default: -O0]",
+        ),
+        ('record=', None, "filename in which to record list of installed files"),
+        ('always-unzip', 'Z', "don't install as a zipfile, no matter what"),
+        ('site-dirs=', 'S', "list of directories where .pth files work"),
+        ('editable', 'e', "Install specified packages in editable form"),
+        ('no-deps', 'N', "don't install dependencies"),
+        ('allow-hosts=', 'H', "pattern(s) that hostnames must match"),
+        ('local-snapshots-ok', 'l', "allow building eggs from local checkouts"),
+        ('version', None, "print version information and exit"),
+        (
+            'no-find-links',
+            None,
+            "Don't load find-links defined in packages being installed",
+        ),
+        ('user', None, "install in user site-package '%s'" % site.USER_SITE),
+    ]
+    boolean_options = [
+        'zip-ok',
+        'multi-version',
+        'exclude-scripts',
+        'upgrade',
+        'always-copy',
+        'editable',
+        'no-deps',
+        'local-snapshots-ok',
+        'version',
+        'user',
+    ]
+
+    negative_opt = {'always-unzip': 'zip-ok'}
+    create_index = PackageIndex
+
+    def initialize_options(self):
+        EasyInstallDeprecationWarning.emit()
+
+        # the --user option seems to be an opt-in one,
+        # so the default should be False.
+        self.user = False
+        self.zip_ok = self.local_snapshots_ok = None
+        self.install_dir = self.script_dir = self.exclude_scripts = None
+        self.index_url = None
+        self.find_links = None
+        self.build_directory = None
+        self.args = None
+        self.optimize = self.record = None
+        self.upgrade = self.always_copy = self.multi_version = None
+        self.editable = self.no_deps = self.allow_hosts = None
+        self.root = self.prefix = self.no_report = None
+        self.version = None
+        self.install_purelib = None  # for pure module distributions
+        self.install_platlib = None  # non-pure (dists w/ extensions)
+        self.install_headers = None  # for C/C++ headers
+        self.install_lib = None  # set to either purelib or platlib
+        self.install_scripts = None
+        self.install_data = None
+        self.install_base = None
+        self.install_platbase = None
+        self.install_userbase = site.USER_BASE
+        self.install_usersite = site.USER_SITE
+        self.no_find_links = None
+
+        # Options not specifiable via command line
+        self.package_index = None
+        self.pth_file = self.always_copy_from = None
+        self.site_dirs = None
+        self.installed_projects = {}
+        # Always read easy_install options, even if we are subclassed, or have
+        # an independent instance created.  This ensures that defaults will
+        # always come from the standard configuration file(s)' "easy_install"
+        # section, even if this is a "develop" or "install" command, or some
+        # other embedding.
+        self._dry_run = None
+        self.verbose = self.distribution.verbose
+        self.distribution._set_command_options(
+            self, self.distribution.get_option_dict('easy_install')
+        )
+
+    def delete_blockers(self, blockers):
+        extant_blockers = (
+            filename
+            for filename in blockers
+            if os.path.exists(filename) or os.path.islink(filename)
+        )
+        list(map(self._delete_path, extant_blockers))
+
+    def _delete_path(self, path):
+        log.info("Deleting %s", path)
+        if self.dry_run:
+            return
+
+        is_tree = os.path.isdir(path) and not os.path.islink(path)
+        remover = _rmtree if is_tree else os.unlink
+        remover(path)
+
+    @staticmethod
+    def _render_version():
+        """
+        Render the Setuptools version and installation details, then exit.
+        """
+        ver = '{}.{}'.format(*sys.version_info)
+        dist = get_distribution('setuptools')
+        tmpl = 'setuptools {dist.version} from {dist.location} (Python {ver})'
+        print(tmpl.format(**locals()))
+        raise SystemExit
+
+    def finalize_options(self):  # noqa: C901  # is too complex (25)  # FIXME
+        self.version and self._render_version()
+
+        py_version = sys.version.split()[0]
+
+        self.config_vars = dict(sysconfig.get_config_vars())
+
+        self.config_vars.update({
+            'dist_name': self.distribution.get_name(),
+            'dist_version': self.distribution.get_version(),
+            'dist_fullname': self.distribution.get_fullname(),
+            'py_version': py_version,
+            'py_version_short': f'{sys.version_info.major}.{sys.version_info.minor}',
+            'py_version_nodot': f'{sys.version_info.major}{sys.version_info.minor}',
+            'sys_prefix': self.config_vars['prefix'],
+            'sys_exec_prefix': self.config_vars['exec_prefix'],
+            # Only POSIX systems have abiflags
+            'abiflags': getattr(sys, 'abiflags', ''),
+            # Only python 3.9+ has platlibdir
+            'platlibdir': getattr(sys, 'platlibdir', 'lib'),
+        })
+        with contextlib.suppress(AttributeError):
+            # only for distutils outside stdlib
+            self.config_vars.update({
+                'implementation_lower': install._get_implementation().lower(),
+                'implementation': install._get_implementation(),
+            })
+
+        # pypa/distutils#113 Python 3.9 compat
+        self.config_vars.setdefault(
+            'py_version_nodot_plat',
+            getattr(sys, 'windir', '').replace('.', ''),
+        )
+
+        self.config_vars['userbase'] = self.install_userbase
+        self.config_vars['usersite'] = self.install_usersite
+        if self.user and not site.ENABLE_USER_SITE:
+            log.warn("WARNING: The user site-packages directory is disabled.")
+
+        self._fix_install_dir_for_user_site()
+
+        self.expand_basedirs()
+        self.expand_dirs()
+
+        self._expand(
+            'install_dir',
+            'script_dir',
+            'build_directory',
+            'site_dirs',
+        )
+        # If a non-default installation directory was specified, default the
+        # script directory to match it.
+        if self.script_dir is None:
+            self.script_dir = self.install_dir
+
+        if self.no_find_links is None:
+            self.no_find_links = False
+
+        # Let install_dir get set by install_lib command, which in turn
+        # gets its info from the install command, and takes into account
+        # --prefix and --home and all that other crud.
+        self.set_undefined_options('install_lib', ('install_dir', 'install_dir'))
+        # Likewise, set default script_dir from 'install_scripts.install_dir'
+        self.set_undefined_options('install_scripts', ('install_dir', 'script_dir'))
+
+        if self.user and self.install_purelib:
+            self.install_dir = self.install_purelib
+            self.script_dir = self.install_scripts
+        # default --record from the install command
+        self.set_undefined_options('install', ('record', 'record'))
+        self.all_site_dirs = get_site_dirs()
+        self.all_site_dirs.extend(self._process_site_dirs(self.site_dirs))
+
+        if not self.editable:
+            self.check_site_dir()
+        default_index = os.getenv("__EASYINSTALL_INDEX", "https://pypi.org/simple/")
+        # ^ Private API for testing purposes only
+        self.index_url = self.index_url or default_index
+        self.shadow_path = self.all_site_dirs[:]
+        for path_item in self.install_dir, normalize_path(self.script_dir):
+            if path_item not in self.shadow_path:
+                self.shadow_path.insert(0, path_item)
+
+        if self.allow_hosts is not None:
+            hosts = [s.strip() for s in self.allow_hosts.split(',')]
+        else:
+            hosts = ['*']
+        if self.package_index is None:
+            self.package_index = self.create_index(
+                self.index_url,
+                search_path=self.shadow_path,
+                hosts=hosts,
+            )
+        self.local_index = Environment(self.shadow_path + sys.path)
+
+        if self.find_links is not None:
+            if isinstance(self.find_links, str):
+                self.find_links = self.find_links.split()
+        else:
+            self.find_links = []
+        if self.local_snapshots_ok:
+            self.package_index.scan_egg_links(self.shadow_path + sys.path)
+        if not self.no_find_links:
+            self.package_index.add_find_links(self.find_links)
+        self.set_undefined_options('install_lib', ('optimize', 'optimize'))
+        self.optimize = self._validate_optimize(self.optimize)
+
+        if self.editable and not self.build_directory:
+            raise DistutilsArgError(
+                "Must specify a build directory (-b) when using --editable"
+            )
+        if not self.args:
+            raise DistutilsArgError(
+                "No urls, filenames, or requirements specified (see --help)"
+            )
+
+        self.outputs = []
+
+    @staticmethod
+    def _process_site_dirs(site_dirs):
+        if site_dirs is None:
+            return
+
+        normpath = map(normalize_path, sys.path)
+        site_dirs = [os.path.expanduser(s.strip()) for s in site_dirs.split(',')]
+        for d in site_dirs:
+            if not os.path.isdir(d):
+                log.warn("%s (in --site-dirs) does not exist", d)
+            elif normalize_path(d) not in normpath:
+                raise DistutilsOptionError(d + " (in --site-dirs) is not on sys.path")
+            else:
+                yield normalize_path(d)
+
+    @staticmethod
+    def _validate_optimize(value):
+        try:
+            value = int(value)
+            if value not in range(3):
+                raise ValueError
+        except ValueError as e:
+            raise DistutilsOptionError("--optimize must be 0, 1, or 2") from e
+
+        return value
+
+    def _fix_install_dir_for_user_site(self):
+        """
+        Fix the install_dir if "--user" was used.
+        """
+        if not self.user:
+            return
+
+        self.create_home_path()
+        if self.install_userbase is None:
+            msg = "User base directory is not specified"
+            raise DistutilsPlatformError(msg)
+        self.install_base = self.install_platbase = self.install_userbase
+        scheme_name = f'{os.name}_user'
+        self.select_scheme(scheme_name)
+
+    def _expand_attrs(self, attrs):
+        for attr in attrs:
+            val = getattr(self, attr)
+            if val is not None:
+                if os.name == 'posix' or os.name == 'nt':
+                    val = os.path.expanduser(val)
+                val = subst_vars(val, self.config_vars)
+                setattr(self, attr, val)
+
+    def expand_basedirs(self):
+        """Calls `os.path.expanduser` on install_base, install_platbase and
+        root."""
+        self._expand_attrs(['install_base', 'install_platbase', 'root'])
+
+    def expand_dirs(self):
+        """Calls `os.path.expanduser` on install dirs."""
+        dirs = [
+            'install_purelib',
+            'install_platlib',
+            'install_lib',
+            'install_headers',
+            'install_scripts',
+            'install_data',
+        ]
+        self._expand_attrs(dirs)
+
+    def run(self, show_deprecation=True):
+        if show_deprecation:
+            self.announce(
+                "WARNING: The easy_install command is deprecated "
+                "and will be removed in a future version.",
+                log.WARN,
+            )
+        if self.verbose != self.distribution.verbose:
+            log.set_verbosity(self.verbose)
+        try:
+            for spec in self.args:
+                self.easy_install(spec, not self.no_deps)
+            if self.record:
+                outputs = self.outputs
+                if self.root:  # strip any package prefix
+                    root_len = len(self.root)
+                    for counter in range(len(outputs)):
+                        outputs[counter] = outputs[counter][root_len:]
+                from distutils import file_util
+
+                self.execute(
+                    file_util.write_file,
+                    (self.record, outputs),
+                    "writing list of installed files to '%s'" % self.record,
+                )
+            self.warn_deprecated_options()
+        finally:
+            log.set_verbosity(self.distribution.verbose)
+
+    def pseudo_tempname(self):
+        """Return a pseudo-tempname base in the install directory.
+        This code is intentionally naive; if a malicious party can write to
+        the target directory you're already in deep doodoo.
+        """
+        try:
+            pid = os.getpid()
+        except Exception:
+            pid = random.randint(0, sys.maxsize)
+        return os.path.join(self.install_dir, "test-easy-install-%s" % pid)
+
+    def warn_deprecated_options(self):
+        pass
+
+    def check_site_dir(self):  # is too complex (12)  # FIXME
+        """Verify that self.install_dir is .pth-capable dir, if needed"""
+
+        instdir = normalize_path(self.install_dir)
+        pth_file = os.path.join(instdir, 'easy-install.pth')
+
+        if not os.path.exists(instdir):
+            try:
+                os.makedirs(instdir)
+            except OSError:
+                self.cant_write_to_target()
+
+        # Is it a configured, PYTHONPATH, implicit, or explicit site dir?
+        is_site_dir = instdir in self.all_site_dirs
+
+        if not is_site_dir and not self.multi_version:
+            # No?  Then directly test whether it does .pth file processing
+            is_site_dir = self.check_pth_processing()
+        else:
+            # make sure we can write to target dir
+            testfile = self.pseudo_tempname() + '.write-test'
+            test_exists = os.path.exists(testfile)
+            try:
+                if test_exists:
+                    os.unlink(testfile)
+                open(testfile, 'wb').close()
+                os.unlink(testfile)
+            except OSError:
+                self.cant_write_to_target()
+
+        if not is_site_dir and not self.multi_version:
+            # Can't install non-multi to non-site dir with easy_install
+            pythonpath = os.environ.get('PYTHONPATH', '')
+            log.warn(self.__no_default_msg, self.install_dir, pythonpath)
+
+        if is_site_dir:
+            if self.pth_file is None:
+                self.pth_file = PthDistributions(pth_file, self.all_site_dirs)
+        else:
+            self.pth_file = None
+
+        if self.multi_version and not os.path.exists(pth_file):
+            self.pth_file = None  # don't create a .pth file
+        self.install_dir = instdir
+
+    __cant_write_msg = textwrap.dedent(
+        """
+        can't create or remove files in install directory
+
+        The following error occurred while trying to add or remove files in the
+        installation directory:
+
+            %s
+
+        The installation directory you specified (via --install-dir, --prefix, or
+        the distutils default setting) was:
+
+            %s
+        """
+    ).lstrip()
+
+    __not_exists_id = textwrap.dedent(
+        """
+        This directory does not currently exist.  Please create it and try again, or
+        choose a different installation directory (using the -d or --install-dir
+        option).
+        """
+    ).lstrip()
+
+    __access_msg = textwrap.dedent(
+        """
+        Perhaps your account does not have write access to this directory?  If the
+        installation directory is a system-owned directory, you may need to sign in
+        as the administrator or "root" account.  If you do not have administrative
+        access to this machine, you may wish to choose a different installation
+        directory, preferably one that is listed in your PYTHONPATH environment
+        variable.
+
+        For information on other options, you may wish to consult the
+        documentation at:
+
+          https://setuptools.pypa.io/en/latest/deprecated/easy_install.html
+
+        Please make the appropriate changes for your system and try again.
+        """
+    ).lstrip()
+
+    def cant_write_to_target(self):
+        msg = self.__cant_write_msg % (
+            sys.exc_info()[1],
+            self.install_dir,
+        )
+
+        if not os.path.exists(self.install_dir):
+            msg += '\n' + self.__not_exists_id
+        else:
+            msg += '\n' + self.__access_msg
+        raise DistutilsError(msg)
+
+    def check_pth_processing(self):  # noqa: C901
+        """Empirically verify whether .pth files are supported in inst. dir"""
+        instdir = self.install_dir
+        log.info("Checking .pth file support in %s", instdir)
+        pth_file = self.pseudo_tempname() + ".pth"
+        ok_file = pth_file + '.ok'
+        ok_exists = os.path.exists(ok_file)
+        tmpl = (
+            _one_liner(
+                """
+            import os
+            f = open({ok_file!r}, 'w', encoding="utf-8")
+            f.write('OK')
+            f.close()
+            """
+            )
+            + '\n'
+        )
+        try:
+            if ok_exists:
+                os.unlink(ok_file)
+            dirname = os.path.dirname(ok_file)
+            os.makedirs(dirname, exist_ok=True)
+            f = open(pth_file, 'w', encoding=py39.LOCALE_ENCODING)
+            # ^-- Requires encoding="locale" instead of "utf-8" (python/cpython#77102).
+        except OSError:
+            self.cant_write_to_target()
+        else:
+            try:
+                f.write(tmpl.format(**locals()))
+                f.close()
+                f = None
+                executable = sys.executable
+                if os.name == 'nt':
+                    dirname, basename = os.path.split(executable)
+                    alt = os.path.join(dirname, 'pythonw.exe')
+                    use_alt = basename.lower() == 'python.exe' and os.path.exists(alt)
+                    if use_alt:
+                        # use pythonw.exe to avoid opening a console window
+                        executable = alt
+
+                from distutils.spawn import spawn
+
+                spawn([executable, '-E', '-c', 'pass'], 0)
+
+                if os.path.exists(ok_file):
+                    log.info("TEST PASSED: %s appears to support .pth files", instdir)
+                    return True
+            finally:
+                if f:
+                    f.close()
+                if os.path.exists(ok_file):
+                    os.unlink(ok_file)
+                if os.path.exists(pth_file):
+                    os.unlink(pth_file)
+        if not self.multi_version:
+            log.warn("TEST FAILED: %s does NOT support .pth files", instdir)
+        return False
+
+    def install_egg_scripts(self, dist):
+        """Write all the scripts for `dist`, unless scripts are excluded"""
+        if not self.exclude_scripts and dist.metadata_isdir('scripts'):
+            for script_name in dist.metadata_listdir('scripts'):
+                if dist.metadata_isdir('scripts/' + script_name):
+                    # The "script" is a directory, likely a Python 3
+                    # __pycache__ directory, so skip it.
+                    continue
+                self.install_script(
+                    dist, script_name, dist.get_metadata('scripts/' + script_name)
+                )
+        self.install_wrapper_scripts(dist)
+
+    def add_output(self, path):
+        if os.path.isdir(path):
+            for base, dirs, files in os.walk(path):
+                for filename in files:
+                    self.outputs.append(os.path.join(base, filename))
+        else:
+            self.outputs.append(path)
+
+    def not_editable(self, spec):
+        if self.editable:
+            raise DistutilsArgError(
+                "Invalid argument %r: you can't use filenames or URLs "
+                "with --editable (except via the --find-links option)." % (spec,)
+            )
+
+    def check_editable(self, spec):
+        if not self.editable:
+            return
+
+        if os.path.exists(os.path.join(self.build_directory, spec.key)):
+            raise DistutilsArgError(
+                "%r already exists in %s; can't do a checkout there"
+                % (spec.key, self.build_directory)
+            )
+
+    @contextlib.contextmanager
+    def _tmpdir(self):
+        tmpdir = tempfile.mkdtemp(prefix="easy_install-")
+        try:
+            # cast to str as workaround for #709 and #710 and #712
+            yield str(tmpdir)
+        finally:
+            os.path.exists(tmpdir) and _rmtree(tmpdir)
+
+    def easy_install(self, spec, deps=False):
+        with self._tmpdir() as tmpdir:
+            if not isinstance(spec, Requirement):
+                if URL_SCHEME(spec):
+                    # It's a url, download it to tmpdir and process
+                    self.not_editable(spec)
+                    dl = self.package_index.download(spec, tmpdir)
+                    return self.install_item(None, dl, tmpdir, deps, True)
+
+                elif os.path.exists(spec):
+                    # Existing file or directory, just process it directly
+                    self.not_editable(spec)
+                    return self.install_item(None, spec, tmpdir, deps, True)
+                else:
+                    spec = parse_requirement_arg(spec)
+
+            self.check_editable(spec)
+            dist = self.package_index.fetch_distribution(
+                spec,
+                tmpdir,
+                self.upgrade,
+                self.editable,
+                not self.always_copy,
+                self.local_index,
+            )
+            if dist is None:
+                msg = "Could not find suitable distribution for %r" % spec
+                if self.always_copy:
+                    msg += " (--always-copy skips system and development eggs)"
+                raise DistutilsError(msg)
+            elif dist.precedence == DEVELOP_DIST:
+                # .egg-info dists don't need installing, just process deps
+                self.process_distribution(spec, dist, deps, "Using")
+                return dist
+            else:
+                return self.install_item(spec, dist.location, tmpdir, deps)
+
+    def install_item(self, spec, download, tmpdir, deps, install_needed=False):
+        # Installation is also needed if file in tmpdir or is not an egg
+        install_needed = install_needed or self.always_copy
+        install_needed = install_needed or os.path.dirname(download) == tmpdir
+        install_needed = install_needed or not download.endswith('.egg')
+        install_needed = install_needed or (
+            self.always_copy_from is not None
+            and os.path.dirname(normalize_path(download))
+            == normalize_path(self.always_copy_from)
+        )
+
+        if spec and not install_needed:
+            # at this point, we know it's a local .egg, we just don't know if
+            # it's already installed.
+            for dist in self.local_index[spec.project_name]:
+                if dist.location == download:
+                    break
+            else:
+                install_needed = True  # it's not in the local index
+
+        log.info("Processing %s", os.path.basename(download))
+
+        if install_needed:
+            dists = self.install_eggs(spec, download, tmpdir)
+            for dist in dists:
+                self.process_distribution(spec, dist, deps)
+        else:
+            dists = [self.egg_distribution(download)]
+            self.process_distribution(spec, dists[0], deps, "Using")
+
+        if spec is not None:
+            for dist in dists:
+                if dist in spec:
+                    return dist
+        return None
+
+    def select_scheme(self, name):
+        try:
+            install._select_scheme(self, name)
+        except AttributeError:
+            # stdlib distutils
+            install.install.select_scheme(self, name.replace('posix', 'unix'))
+
+    # FIXME: 'easy_install.process_distribution' is too complex (12)
+    def process_distribution(  # noqa: C901
+        self,
+        requirement,
+        dist,
+        deps=True,
+        *info,
+    ):
+        self.update_pth(dist)
+        self.package_index.add(dist)
+        if dist in self.local_index[dist.key]:
+            self.local_index.remove(dist)
+        self.local_index.add(dist)
+        self.install_egg_scripts(dist)
+        self.installed_projects[dist.key] = dist
+        log.info(self.installation_report(requirement, dist, *info))
+        if dist.has_metadata('dependency_links.txt') and not self.no_find_links:
+            self.package_index.add_find_links(
+                dist.get_metadata_lines('dependency_links.txt')
+            )
+        if not deps and not self.always_copy:
+            return
+        elif requirement is not None and dist.key != requirement.key:
+            log.warn("Skipping dependencies for %s", dist)
+            return  # XXX this is not the distribution we were looking for
+        elif requirement is None or dist not in requirement:
+            # if we wound up with a different version, resolve what we've got
+            distreq = dist.as_requirement()
+            requirement = Requirement(str(distreq))
+        log.info("Processing dependencies for %s", requirement)
+        try:
+            distros = WorkingSet([]).resolve(
+                [requirement], self.local_index, self.easy_install
+            )
+        except DistributionNotFound as e:
+            raise DistutilsError(str(e)) from e
+        except VersionConflict as e:
+            raise DistutilsError(e.report()) from e
+        if self.always_copy or self.always_copy_from:
+            # Force all the relevant distros to be copied or activated
+            for dist in distros:
+                if dist.key not in self.installed_projects:
+                    self.easy_install(dist.as_requirement())
+        log.info("Finished processing dependencies for %s", requirement)
+
+    def should_unzip(self, dist):
+        if self.zip_ok is not None:
+            return not self.zip_ok
+        if dist.has_metadata('not-zip-safe'):
+            return True
+        if not dist.has_metadata('zip-safe'):
+            return True
+        return False
+
+    def maybe_move(self, spec, dist_filename, setup_base):
+        dst = os.path.join(self.build_directory, spec.key)
+        if os.path.exists(dst):
+            msg = "%r already exists in %s; build directory %s will not be kept"
+            log.warn(msg, spec.key, self.build_directory, setup_base)
+            return setup_base
+        if os.path.isdir(dist_filename):
+            setup_base = dist_filename
+        else:
+            if os.path.dirname(dist_filename) == setup_base:
+                os.unlink(dist_filename)  # get it out of the tmp dir
+            contents = os.listdir(setup_base)
+            if len(contents) == 1:
+                dist_filename = os.path.join(setup_base, contents[0])
+                if os.path.isdir(dist_filename):
+                    # if the only thing there is a directory, move it instead
+                    setup_base = dist_filename
+        ensure_directory(dst)
+        shutil.move(setup_base, dst)
+        return dst
+
+    def install_wrapper_scripts(self, dist):
+        if self.exclude_scripts:
+            return
+        for args in ScriptWriter.best().get_args(dist):
+            self.write_script(*args)
+
+    def install_script(self, dist, script_name, script_text, dev_path=None):
+        """Generate a legacy script wrapper and install it"""
+        spec = str(dist.as_requirement())
+        is_script = is_python_script(script_text, script_name)
+
+        if is_script:
+            body = self._load_template(dev_path) % locals()
+            script_text = ScriptWriter.get_header(script_text) + body
+        self.write_script(script_name, _to_bytes(script_text), 'b')
+
+    @staticmethod
+    def _load_template(dev_path):
+        """
+        There are a couple of template scripts in the package. This
+        function loads one of them and prepares it for use.
+        """
+        # See https://github.com/pypa/setuptools/issues/134 for info
+        # on script file naming and downstream issues with SVR4
+        name = 'script.tmpl'
+        if dev_path:
+            name = name.replace('.tmpl', ' (dev).tmpl')
+
+        raw_bytes = resource_string('setuptools', name)
+        return raw_bytes.decode('utf-8')
+
+    def write_script(self, script_name, contents, mode="t", blockers=()):
+        """Write an executable file to the scripts directory"""
+        self.delete_blockers(  # clean up old .py/.pyw w/o a script
+            [os.path.join(self.script_dir, x) for x in blockers]
+        )
+        log.info("Installing %s script to %s", script_name, self.script_dir)
+        target = os.path.join(self.script_dir, script_name)
+        self.add_output(target)
+
+        if self.dry_run:
+            return
+
+        mask = current_umask()
+        ensure_directory(target)
+        if os.path.exists(target):
+            os.unlink(target)
+
+        encoding = None if "b" in mode else "utf-8"
+        with open(target, "w" + mode, encoding=encoding) as f:
+            f.write(contents)
+        chmod(target, 0o777 - mask)
+
+    def install_eggs(self, spec, dist_filename, tmpdir):
+        # .egg dirs or files are already built, so just return them
+        installer_map = {
+            '.egg': self.install_egg,
+            '.exe': self.install_exe,
+            '.whl': self.install_wheel,
+        }
+        try:
+            install_dist = installer_map[dist_filename.lower()[-4:]]
+        except KeyError:
+            pass
+        else:
+            return [install_dist(dist_filename, tmpdir)]
+
+        # Anything else, try to extract and build
+        setup_base = tmpdir
+        if os.path.isfile(dist_filename) and not dist_filename.endswith('.py'):
+            unpack_archive(dist_filename, tmpdir, self.unpack_progress)
+        elif os.path.isdir(dist_filename):
+            setup_base = os.path.abspath(dist_filename)
+
+        if (
+            setup_base.startswith(tmpdir)  # something we downloaded
+            and self.build_directory
+            and spec is not None
+        ):
+            setup_base = self.maybe_move(spec, dist_filename, setup_base)
+
+        # Find the setup.py file
+        setup_script = os.path.join(setup_base, 'setup.py')
+
+        if not os.path.exists(setup_script):
+            setups = glob(os.path.join(setup_base, '*', 'setup.py'))
+            if not setups:
+                raise DistutilsError(
+                    "Couldn't find a setup script in %s"
+                    % os.path.abspath(dist_filename)
+                )
+            if len(setups) > 1:
+                raise DistutilsError(
+                    "Multiple setup scripts in %s" % os.path.abspath(dist_filename)
+                )
+            setup_script = setups[0]
+
+        # Now run it, and return the result
+        if self.editable:
+            log.info(self.report_editable(spec, setup_script))
+            return []
+        else:
+            return self.build_and_install(setup_script, setup_base)
+
+    def egg_distribution(self, egg_path):
+        if os.path.isdir(egg_path):
+            metadata = PathMetadata(egg_path, os.path.join(egg_path, 'EGG-INFO'))
+        else:
+            metadata = EggMetadata(zipimport.zipimporter(egg_path))
+        return Distribution.from_filename(egg_path, metadata=metadata)
+
+    # FIXME: 'easy_install.install_egg' is too complex (11)
+    def install_egg(self, egg_path, tmpdir):
+        destination = os.path.join(
+            self.install_dir,
+            os.path.basename(egg_path),
+        )
+        destination = os.path.abspath(destination)
+        if not self.dry_run:
+            ensure_directory(destination)
+
+        dist = self.egg_distribution(egg_path)
+        if not (
+            os.path.exists(destination) and os.path.samefile(egg_path, destination)
+        ):
+            if os.path.isdir(destination) and not os.path.islink(destination):
+                dir_util.remove_tree(destination, dry_run=self.dry_run)
+            elif os.path.exists(destination):
+                self.execute(
+                    os.unlink,
+                    (destination,),
+                    "Removing " + destination,
+                )
+            try:
+                new_dist_is_zipped = False
+                if os.path.isdir(egg_path):
+                    if egg_path.startswith(tmpdir):
+                        f, m = shutil.move, "Moving"
+                    else:
+                        f, m = shutil.copytree, "Copying"
+                elif self.should_unzip(dist):
+                    self.mkpath(destination)
+                    f, m = self.unpack_and_compile, "Extracting"
+                else:
+                    new_dist_is_zipped = True
+                    if egg_path.startswith(tmpdir):
+                        f, m = shutil.move, "Moving"
+                    else:
+                        f, m = shutil.copy2, "Copying"
+                self.execute(
+                    f,
+                    (egg_path, destination),
+                    (m + " %s to %s")
+                    % (os.path.basename(egg_path), os.path.dirname(destination)),
+                )
+                update_dist_caches(
+                    destination,
+                    fix_zipimporter_caches=new_dist_is_zipped,
+                )
+            except Exception:
+                update_dist_caches(destination, fix_zipimporter_caches=False)
+                raise
+
+        self.add_output(destination)
+        return self.egg_distribution(destination)
+
+    def install_exe(self, dist_filename, tmpdir):
+        # See if it's valid, get data
+        cfg = extract_wininst_cfg(dist_filename)
+        if cfg is None:
+            raise DistutilsError(
+                "%s is not a valid distutils Windows .exe" % dist_filename
+            )
+        # Create a dummy distribution object until we build the real distro
+        dist = Distribution(
+            None,
+            project_name=cfg.get('metadata', 'name'),
+            version=cfg.get('metadata', 'version'),
+            platform=get_platform(),
+        )
+
+        # Convert the .exe to an unpacked egg
+        egg_path = os.path.join(tmpdir, dist.egg_name() + '.egg')
+        dist.location = egg_path
+        egg_tmp = egg_path + '.tmp'
+        _egg_info = os.path.join(egg_tmp, 'EGG-INFO')
+        pkg_inf = os.path.join(_egg_info, 'PKG-INFO')
+        ensure_directory(pkg_inf)  # make sure EGG-INFO dir exists
+        dist._provider = PathMetadata(egg_tmp, _egg_info)  # XXX
+        self.exe_to_egg(dist_filename, egg_tmp)
+
+        # Write EGG-INFO/PKG-INFO
+        if not os.path.exists(pkg_inf):
+            with open(pkg_inf, 'w', encoding="utf-8") as f:
+                f.write('Metadata-Version: 1.0\n')
+                for k, v in cfg.items('metadata'):
+                    if k != 'target_version':
+                        f.write('%s: %s\n' % (k.replace('_', '-').title(), v))
+        script_dir = os.path.join(_egg_info, 'scripts')
+        # delete entry-point scripts to avoid duping
+        self.delete_blockers([
+            os.path.join(script_dir, args[0]) for args in ScriptWriter.get_args(dist)
+        ])
+        # Build .egg file from tmpdir
+        bdist_egg.make_zipfile(
+            egg_path,
+            egg_tmp,
+            verbose=self.verbose,
+            dry_run=self.dry_run,
+        )
+        # install the .egg
+        return self.install_egg(egg_path, tmpdir)
+
+    # FIXME: 'easy_install.exe_to_egg' is too complex (12)
+    def exe_to_egg(self, dist_filename, egg_tmp):  # noqa: C901
+        """Extract a bdist_wininst to the directories an egg would use"""
+        # Check for .pth file and set up prefix translations
+        prefixes = get_exe_prefixes(dist_filename)
+        to_compile = []
+        native_libs = []
+        top_level = set()
+
+        def process(src, dst):
+            s = src.lower()
+            for old, new in prefixes:
+                if s.startswith(old):
+                    src = new + src[len(old) :]
+                    parts = src.split('/')
+                    dst = os.path.join(egg_tmp, *parts)
+                    dl = dst.lower()
+                    if dl.endswith('.pyd') or dl.endswith('.dll'):
+                        parts[-1] = bdist_egg.strip_module(parts[-1])
+                        top_level.add([os.path.splitext(parts[0])[0]])
+                        native_libs.append(src)
+                    elif dl.endswith('.py') and old != 'SCRIPTS/':
+                        top_level.add([os.path.splitext(parts[0])[0]])
+                        to_compile.append(dst)
+                    return dst
+            if not src.endswith('.pth'):
+                log.warn("WARNING: can't process %s", src)
+            return None
+
+        # extract, tracking .pyd/.dll->native_libs and .py -> to_compile
+        unpack_archive(dist_filename, egg_tmp, process)
+        stubs = []
+        for res in native_libs:
+            if res.lower().endswith('.pyd'):  # create stubs for .pyd's
+                parts = res.split('/')
+                resource = parts[-1]
+                parts[-1] = bdist_egg.strip_module(parts[-1]) + '.py'
+                pyfile = os.path.join(egg_tmp, *parts)
+                to_compile.append(pyfile)
+                stubs.append(pyfile)
+                bdist_egg.write_stub(resource, pyfile)
+        self.byte_compile(to_compile)  # compile .py's
+        bdist_egg.write_safety_flag(
+            os.path.join(egg_tmp, 'EGG-INFO'), bdist_egg.analyze_egg(egg_tmp, stubs)
+        )  # write zip-safety flag
+
+        for name in 'top_level', 'native_libs':
+            if locals()[name]:
+                txt = os.path.join(egg_tmp, 'EGG-INFO', name + '.txt')
+                if not os.path.exists(txt):
+                    with open(txt, 'w', encoding="utf-8") as f:
+                        f.write('\n'.join(locals()[name]) + '\n')
+
+    def install_wheel(self, wheel_path, tmpdir):
+        wheel = Wheel(wheel_path)
+        assert wheel.is_compatible()
+        destination = os.path.join(self.install_dir, wheel.egg_name())
+        destination = os.path.abspath(destination)
+        if not self.dry_run:
+            ensure_directory(destination)
+        if os.path.isdir(destination) and not os.path.islink(destination):
+            dir_util.remove_tree(destination, dry_run=self.dry_run)
+        elif os.path.exists(destination):
+            self.execute(
+                os.unlink,
+                (destination,),
+                "Removing " + destination,
+            )
+        try:
+            self.execute(
+                wheel.install_as_egg,
+                (destination,),
+                ("Installing %s to %s")
+                % (os.path.basename(wheel_path), os.path.dirname(destination)),
+            )
+        finally:
+            update_dist_caches(destination, fix_zipimporter_caches=False)
+        self.add_output(destination)
+        return self.egg_distribution(destination)
+
+    __mv_warning = textwrap.dedent(
+        """
+        Because this distribution was installed --multi-version, before you can
+        import modules from this package in an application, you will need to
+        'import pkg_resources' and then use a 'require()' call similar to one of
+        these examples, in order to select the desired version:
+
+            pkg_resources.require("%(name)s")  # latest installed version
+            pkg_resources.require("%(name)s==%(version)s")  # this exact version
+            pkg_resources.require("%(name)s>=%(version)s")  # this version or higher
+        """
+    ).lstrip()
+
+    __id_warning = textwrap.dedent(
+        """
+        Note also that the installation directory must be on sys.path at runtime for
+        this to work.  (e.g. by being the application's script directory, by being on
+        PYTHONPATH, or by being added to sys.path by your code.)
+        """
+    )
+
+    def installation_report(self, req, dist, what="Installed"):
+        """Helpful installation message for display to package users"""
+        msg = "\n%(what)s %(eggloc)s%(extras)s"
+        if self.multi_version and not self.no_report:
+            msg += '\n' + self.__mv_warning
+            if self.install_dir not in map(normalize_path, sys.path):
+                msg += '\n' + self.__id_warning
+
+        eggloc = dist.location
+        name = dist.project_name
+        version = dist.version
+        extras = ''  # TODO: self.report_extras(req, dist)
+        return msg % locals()
+
+    __editable_msg = textwrap.dedent(
+        """
+        Extracted editable version of %(spec)s to %(dirname)s
+
+        If it uses setuptools in its setup script, you can activate it in
+        "development" mode by going to that directory and running::
+
+            %(python)s setup.py develop
+
+        See the setuptools documentation for the "develop" command for more info.
+        """
+    ).lstrip()
+
+    def report_editable(self, spec, setup_script):
+        dirname = os.path.dirname(setup_script)
+        python = sys.executable
+        return '\n' + self.__editable_msg % locals()
+
+    def run_setup(self, setup_script, setup_base, args):
+        sys.modules.setdefault('distutils.command.bdist_egg', bdist_egg)
+        sys.modules.setdefault('distutils.command.egg_info', egg_info)
+
+        args = list(args)
+        if self.verbose > 2:
+            v = 'v' * (self.verbose - 1)
+            args.insert(0, '-' + v)
+        elif self.verbose < 2:
+            args.insert(0, '-q')
+        if self.dry_run:
+            args.insert(0, '-n')
+        log.info("Running %s %s", setup_script[len(setup_base) + 1 :], ' '.join(args))
+        try:
+            run_setup(setup_script, args)
+        except SystemExit as v:
+            raise DistutilsError("Setup script exited with %s" % (v.args[0],)) from v
+
+    def build_and_install(self, setup_script, setup_base):
+        args = ['bdist_egg', '--dist-dir']
+
+        dist_dir = tempfile.mkdtemp(
+            prefix='egg-dist-tmp-', dir=os.path.dirname(setup_script)
+        )
+        try:
+            self._set_fetcher_options(os.path.dirname(setup_script))
+            args.append(dist_dir)
+
+            self.run_setup(setup_script, setup_base, args)
+            all_eggs = Environment([dist_dir])
+            eggs = [
+                self.install_egg(dist.location, setup_base)
+                for key in all_eggs
+                for dist in all_eggs[key]
+            ]
+            if not eggs and not self.dry_run:
+                log.warn("No eggs found in %s (setup script problem?)", dist_dir)
+            return eggs
+        finally:
+            _rmtree(dist_dir)
+            log.set_verbosity(self.verbose)  # restore our log verbosity
+
+    def _set_fetcher_options(self, base):
+        """
+        When easy_install is about to run bdist_egg on a source dist, that
+        source dist might have 'setup_requires' directives, requiring
+        additional fetching. Ensure the fetcher options given to easy_install
+        are available to that command as well.
+        """
+        # find the fetch options from easy_install and write them out
+        # to the setup.cfg file.
+        ei_opts = self.distribution.get_option_dict('easy_install').copy()
+        fetch_directives = (
+            'find_links',
+            'site_dirs',
+            'index_url',
+            'optimize',
+            'allow_hosts',
+        )
+        fetch_options = {}
+        for key, val in ei_opts.items():
+            if key not in fetch_directives:
+                continue
+            fetch_options[key] = val[1]
+        # create a settings dictionary suitable for `edit_config`
+        settings = dict(easy_install=fetch_options)
+        cfg_filename = os.path.join(base, 'setup.cfg')
+        setopt.edit_config(cfg_filename, settings)
+
+    def update_pth(self, dist):  # noqa: C901  # is too complex (11)  # FIXME
+        if self.pth_file is None:
+            return
+
+        for d in self.pth_file[dist.key]:  # drop old entries
+            if not self.multi_version and d.location == dist.location:
+                continue
+
+            log.info("Removing %s from easy-install.pth file", d)
+            self.pth_file.remove(d)
+            if d.location in self.shadow_path:
+                self.shadow_path.remove(d.location)
+
+        if not self.multi_version:
+            if dist.location in self.pth_file.paths:
+                log.info(
+                    "%s is already the active version in easy-install.pth",
+                    dist,
+                )
+            else:
+                log.info("Adding %s to easy-install.pth file", dist)
+                self.pth_file.add(dist)  # add new entry
+                if dist.location not in self.shadow_path:
+                    self.shadow_path.append(dist.location)
+
+        if self.dry_run:
+            return
+
+        self.pth_file.save()
+
+        if dist.key != 'setuptools':
+            return
+
+        # Ensure that setuptools itself never becomes unavailable!
+        # XXX should this check for latest version?
+        filename = os.path.join(self.install_dir, 'setuptools.pth')
+        if os.path.islink(filename):
+            os.unlink(filename)
+
+        with open(filename, 'wt', encoding=py39.LOCALE_ENCODING) as f:
+            # Requires encoding="locale" instead of "utf-8" (python/cpython#77102).
+            f.write(self.pth_file.make_relative(dist.location) + '\n')
+
+    def unpack_progress(self, src, dst):
+        # Progress filter for unpacking
+        log.debug("Unpacking %s to %s", src, dst)
+        return dst  # only unpack-and-compile skips files for dry run
+
+    def unpack_and_compile(self, egg_path, destination):
+        to_compile = []
+        to_chmod = []
+
+        def pf(src, dst):
+            if dst.endswith('.py') and not src.startswith('EGG-INFO/'):
+                to_compile.append(dst)
+            elif dst.endswith('.dll') or dst.endswith('.so'):
+                to_chmod.append(dst)
+            self.unpack_progress(src, dst)
+            return not self.dry_run and dst or None
+
+        unpack_archive(egg_path, destination, pf)
+        self.byte_compile(to_compile)
+        if not self.dry_run:
+            for f in to_chmod:
+                mode = ((os.stat(f)[stat.ST_MODE]) | 0o555) & 0o7755
+                chmod(f, mode)
+
+    def byte_compile(self, to_compile):
+        if sys.dont_write_bytecode:
+            return
+
+        from distutils.util import byte_compile
+
+        try:
+            # try to make the byte compile messages quieter
+            log.set_verbosity(self.verbose - 1)
+
+            byte_compile(to_compile, optimize=0, force=True, dry_run=self.dry_run)
+            if self.optimize:
+                byte_compile(
+                    to_compile,
+                    optimize=self.optimize,
+                    force=True,
+                    dry_run=self.dry_run,
+                )
+        finally:
+            log.set_verbosity(self.verbose)  # restore original verbosity
+
+    __no_default_msg = textwrap.dedent(
+        """
+        bad install directory or PYTHONPATH
+
+        You are attempting to install a package to a directory that is not
+        on PYTHONPATH and which Python does not read ".pth" files from.  The
+        installation directory you specified (via --install-dir, --prefix, or
+        the distutils default setting) was:
+
+            %s
+
+        and your PYTHONPATH environment variable currently contains:
+
+            %r
+
+        Here are some of your options for correcting the problem:
+
+        * You can choose a different installation directory, i.e., one that is
+          on PYTHONPATH or supports .pth files
+
+        * You can add the installation directory to the PYTHONPATH environment
+          variable.  (It must then also be on PYTHONPATH whenever you run
+          Python and want to use the package(s) you are installing.)
+
+        * You can set up the installation directory to support ".pth" files by
+          using one of the approaches described here:
+
+          https://setuptools.pypa.io/en/latest/deprecated/easy_install.html#custom-installation-locations
+
+
+        Please make the appropriate changes for your system and try again.
+        """
+    ).strip()
+
+    def create_home_path(self):
+        """Create directories under ~."""
+        if not self.user:
+            return
+        home = convert_path(os.path.expanduser("~"))
+        for path in only_strs(self.config_vars.values()):
+            if path.startswith(home) and not os.path.isdir(path):
+                self.debug_print("os.makedirs('%s', 0o700)" % path)
+                os.makedirs(path, 0o700)
+
+    INSTALL_SCHEMES = dict(
+        posix=dict(
+            install_dir='$base/lib/python$py_version_short/site-packages',
+            script_dir='$base/bin',
+        ),
+    )
+
+    DEFAULT_SCHEME = dict(
+        install_dir='$base/Lib/site-packages',
+        script_dir='$base/Scripts',
+    )
+
+    def _expand(self, *attrs):
+        config_vars = self.get_finalized_command('install').config_vars
+
+        if self.prefix:
+            # Set default install_dir/scripts from --prefix
+            config_vars = dict(config_vars)
+            config_vars['base'] = self.prefix
+            scheme = self.INSTALL_SCHEMES.get(os.name, self.DEFAULT_SCHEME)
+            for attr, val in scheme.items():
+                if getattr(self, attr, None) is None:
+                    setattr(self, attr, val)
+
+        from distutils.util import subst_vars
+
+        for attr in attrs:
+            val = getattr(self, attr)
+            if val is not None:
+                val = subst_vars(val, config_vars)
+                if os.name == 'posix':
+                    val = os.path.expanduser(val)
+                setattr(self, attr, val)
+
+
+def _pythonpath():
+    items = os.environ.get('PYTHONPATH', '').split(os.pathsep)
+    return filter(None, items)
+
+
+def get_site_dirs():
+    """
+    Return a list of 'site' dirs
+    """
+
+    sitedirs = []
+
+    # start with PYTHONPATH
+    sitedirs.extend(_pythonpath())
+
+    prefixes = [sys.prefix]
+    if sys.exec_prefix != sys.prefix:
+        prefixes.append(sys.exec_prefix)
+    for prefix in prefixes:
+        if not prefix:
+            continue
+
+        if sys.platform in ('os2emx', 'riscos'):
+            sitedirs.append(os.path.join(prefix, "Lib", "site-packages"))
+        elif os.sep == '/':
+            sitedirs.extend([
+                os.path.join(
+                    prefix,
+                    "lib",
+                    "python{}.{}".format(*sys.version_info),
+                    "site-packages",
+                ),
+                os.path.join(prefix, "lib", "site-python"),
+            ])
+        else:
+            sitedirs.extend([
+                prefix,
+                os.path.join(prefix, "lib", "site-packages"),
+            ])
+        if sys.platform != 'darwin':
+            continue
+
+        # for framework builds *only* we add the standard Apple
+        # locations. Currently only per-user, but /Library and
+        # /Network/Library could be added too
+        if 'Python.framework' not in prefix:
+            continue
+
+        home = os.environ.get('HOME')
+        if not home:
+            continue
+
+        home_sp = os.path.join(
+            home,
+            'Library',
+            'Python',
+            '{}.{}'.format(*sys.version_info),
+            'site-packages',
+        )
+        sitedirs.append(home_sp)
+    lib_paths = get_path('purelib'), get_path('platlib')
+
+    sitedirs.extend(s for s in lib_paths if s not in sitedirs)
+
+    if site.ENABLE_USER_SITE:
+        sitedirs.append(site.USER_SITE)
+
+    with contextlib.suppress(AttributeError):
+        sitedirs.extend(site.getsitepackages())
+
+    return list(map(normalize_path, sitedirs))
+
+
+def expand_paths(inputs):  # noqa: C901  # is too complex (11)  # FIXME
+    """Yield sys.path directories that might contain "old-style" packages"""
+
+    seen = set()
+
+    for dirname in inputs:
+        dirname = normalize_path(dirname)
+        if dirname in seen:
+            continue
+
+        seen.add(dirname)
+        if not os.path.isdir(dirname):
+            continue
+
+        files = os.listdir(dirname)
+        yield dirname, files
+
+        for name in files:
+            if not name.endswith('.pth'):
+                # We only care about the .pth files
+                continue
+            if name in ('easy-install.pth', 'setuptools.pth'):
+                # Ignore .pth files that we control
+                continue
+
+            # Read the .pth file
+            with open(os.path.join(dirname, name), encoding=py39.LOCALE_ENCODING) as f:
+                # Requires encoding="locale" instead of "utf-8" (python/cpython#77102).
+                lines = list(yield_lines(f))
+
+            # Yield existing non-dupe, non-import directory lines from it
+            for line in lines:
+                if line.startswith("import"):
+                    continue
+
+                line = normalize_path(line.rstrip())
+                if line in seen:
+                    continue
+
+                seen.add(line)
+                if not os.path.isdir(line):
+                    continue
+
+                yield line, os.listdir(line)
+
+
+def extract_wininst_cfg(dist_filename):
+    """Extract configuration data from a bdist_wininst .exe
+
+    Returns a configparser.RawConfigParser, or None
+    """
+    f = open(dist_filename, 'rb')
+    try:
+        endrec = zipfile._EndRecData(f)
+        if endrec is None:
+            return None
+
+        prepended = (endrec[9] - endrec[5]) - endrec[6]
+        if prepended < 12:  # no wininst data here
+            return None
+        f.seek(prepended - 12)
+
+        tag, cfglen, bmlen = struct.unpack("egg path translations for a given .exe file"""
+
+    prefixes = [
+        ('PURELIB/', ''),
+        ('PLATLIB/pywin32_system32', ''),
+        ('PLATLIB/', ''),
+        ('SCRIPTS/', 'EGG-INFO/scripts/'),
+        ('DATA/lib/site-packages', ''),
+    ]
+    z = zipfile.ZipFile(exe_filename)
+    try:
+        for info in z.infolist():
+            name = info.filename
+            parts = name.split('/')
+            if len(parts) == 3 and parts[2] == 'PKG-INFO':
+                if parts[1].endswith('.egg-info'):
+                    prefixes.insert(0, ('/'.join(parts[:2]), 'EGG-INFO/'))
+                    break
+            if len(parts) != 2 or not name.endswith('.pth'):
+                continue
+            if name.endswith('-nspkg.pth'):
+                continue
+            if parts[0].upper() in ('PURELIB', 'PLATLIB'):
+                contents = z.read(name).decode()
+                for pth in yield_lines(contents):
+                    pth = pth.strip().replace('\\', '/')
+                    if not pth.startswith('import'):
+                        prefixes.append((('%s/%s/' % (parts[0], pth)), ''))
+    finally:
+        z.close()
+    prefixes = [(x.lower(), y) for x, y in prefixes]
+    prefixes.sort()
+    prefixes.reverse()
+    return prefixes
+
+
+class PthDistributions(Environment):
+    """A .pth file with Distribution paths in it"""
+
+    def __init__(self, filename, sitedirs=()):
+        self.filename = filename
+        self.sitedirs = list(map(normalize_path, sitedirs))
+        self.basedir = normalize_path(os.path.dirname(self.filename))
+        self.paths, self.dirty = self._load()
+        # keep a copy if someone manually updates the paths attribute on the instance
+        self._init_paths = self.paths[:]
+        super().__init__([], None, None)
+        for path in yield_lines(self.paths):
+            list(map(self.add, find_distributions(path, True)))
+
+    def _load_raw(self):
+        paths = []
+        dirty = saw_import = False
+        seen = set(self.sitedirs)
+        f = open(self.filename, 'rt', encoding=py39.LOCALE_ENCODING)
+        # ^-- Requires encoding="locale" instead of "utf-8" (python/cpython#77102).
+        for line in f:
+            path = line.rstrip()
+            # still keep imports and empty/commented lines for formatting
+            paths.append(path)
+            if line.startswith(('import ', 'from ')):
+                saw_import = True
+                continue
+            stripped_path = path.strip()
+            if not stripped_path or stripped_path.startswith('#'):
+                continue
+            # skip non-existent paths, in case somebody deleted a package
+            # manually, and duplicate paths as well
+            normalized_path = normalize_path(os.path.join(self.basedir, path))
+            if normalized_path in seen or not os.path.exists(normalized_path):
+                log.debug("cleaned up dirty or duplicated %r", path)
+                dirty = True
+                paths.pop()
+                continue
+            seen.add(normalized_path)
+        f.close()
+        # remove any trailing empty/blank line
+        while paths and not paths[-1].strip():
+            paths.pop()
+            dirty = True
+        return paths, dirty or (paths and saw_import)
+
+    def _load(self):
+        if os.path.isfile(self.filename):
+            return self._load_raw()
+        return [], False
+
+    def save(self):
+        """Write changed .pth file back to disk"""
+        # first reload the file
+        last_paths, last_dirty = self._load()
+        # and check that there are no difference with what we have.
+        # there can be difference if someone else has written to the file
+        # since we first loaded it.
+        # we don't want to lose the eventual new paths added since then.
+        for path in last_paths[:]:
+            if path not in self.paths:
+                self.paths.append(path)
+                log.info("detected new path %r", path)
+                last_dirty = True
+            else:
+                last_paths.remove(path)
+        # also, re-check that all paths are still valid before saving them
+        for path in self.paths[:]:
+            if path not in last_paths and not path.startswith((
+                'import ',
+                'from ',
+                '#',
+            )):
+                absolute_path = os.path.join(self.basedir, path)
+                if not os.path.exists(absolute_path):
+                    self.paths.remove(path)
+                    log.info("removing now non-existent path %r", path)
+                    last_dirty = True
+
+        self.dirty |= last_dirty or self.paths != self._init_paths
+        if not self.dirty:
+            return
+
+        rel_paths = list(map(self.make_relative, self.paths))
+        if rel_paths:
+            log.debug("Saving %s", self.filename)
+            lines = self._wrap_lines(rel_paths)
+            data = '\n'.join(lines) + '\n'
+            if os.path.islink(self.filename):
+                os.unlink(self.filename)
+            with open(self.filename, 'wt', encoding=py39.LOCALE_ENCODING) as f:
+                # Requires encoding="locale" instead of "utf-8" (python/cpython#77102).
+                f.write(data)
+        elif os.path.exists(self.filename):
+            log.debug("Deleting empty %s", self.filename)
+            os.unlink(self.filename)
+
+        self.dirty = False
+        self._init_paths[:] = self.paths[:]
+
+    @staticmethod
+    def _wrap_lines(lines):
+        return lines
+
+    def add(self, dist):
+        """Add `dist` to the distribution map"""
+        new_path = dist.location not in self.paths and (
+            dist.location not in self.sitedirs
+            or
+            # account for '.' being in PYTHONPATH
+            dist.location == os.getcwd()
+        )
+        if new_path:
+            self.paths.append(dist.location)
+            self.dirty = True
+        super().add(dist)
+
+    def remove(self, dist):
+        """Remove `dist` from the distribution map"""
+        while dist.location in self.paths:
+            self.paths.remove(dist.location)
+            self.dirty = True
+        super().remove(dist)
+
+    def make_relative(self, path):
+        npath, last = os.path.split(normalize_path(path))
+        baselen = len(self.basedir)
+        parts = [last]
+        sep = os.altsep == '/' and '/' or os.sep
+        while len(npath) >= baselen:
+            if npath == self.basedir:
+                parts.append(os.curdir)
+                parts.reverse()
+                return sep.join(parts)
+            npath, last = os.path.split(npath)
+            parts.append(last)
+        else:
+            return path
+
+
+class RewritePthDistributions(PthDistributions):
+    @classmethod
+    def _wrap_lines(cls, lines):
+        yield cls.prelude
+        yield from lines
+        yield cls.postlude
+
+    prelude = _one_liner(
+        """
+        import sys
+        sys.__plen = len(sys.path)
+        """
+    )
+    postlude = _one_liner(
+        """
+        import sys
+        new = sys.path[sys.__plen:]
+        del sys.path[sys.__plen:]
+        p = getattr(sys, '__egginsert', 0)
+        sys.path[p:p] = new
+        sys.__egginsert = p + len(new)
+        """
+    )
+
+
+if os.environ.get('SETUPTOOLS_SYS_PATH_TECHNIQUE', 'raw') == 'rewrite':
+    PthDistributions = RewritePthDistributions  # type: ignore[misc]  # Overwriting type
+
+
+def _first_line_re():
+    """
+    Return a regular expression based on first_line_re suitable for matching
+    strings.
+    """
+    if isinstance(first_line_re.pattern, str):
+        return first_line_re
+
+    # first_line_re in Python >=3.1.4 and >=3.2.1 is a bytes pattern.
+    return re.compile(first_line_re.pattern.decode())
+
+
+def auto_chmod(func, arg, exc):
+    if func in [os.unlink, os.remove] and os.name == 'nt':
+        chmod(arg, stat.S_IWRITE)
+        return func(arg)
+    et, ev, _ = sys.exc_info()
+    # TODO: This code doesn't make sense. What is it trying to do?
+    raise (ev[0], ev[1] + (" %s %s" % (func, arg)))
+
+
+def update_dist_caches(dist_path, fix_zipimporter_caches):
+    """
+    Fix any globally cached `dist_path` related data
+
+    `dist_path` should be a path of a newly installed egg distribution (zipped
+    or unzipped).
+
+    sys.path_importer_cache contains finder objects that have been cached when
+    importing data from the original distribution. Any such finders need to be
+    cleared since the replacement distribution might be packaged differently,
+    e.g. a zipped egg distribution might get replaced with an unzipped egg
+    folder or vice versa. Having the old finders cached may then cause Python
+    to attempt loading modules from the replacement distribution using an
+    incorrect loader.
+
+    zipimport.zipimporter objects are Python loaders charged with importing
+    data packaged inside zip archives. If stale loaders referencing the
+    original distribution, are left behind, they can fail to load modules from
+    the replacement distribution. E.g. if an old zipimport.zipimporter instance
+    is used to load data from a new zipped egg archive, it may cause the
+    operation to attempt to locate the requested data in the wrong location -
+    one indicated by the original distribution's zip archive directory
+    information. Such an operation may then fail outright, e.g. report having
+    read a 'bad local file header', or even worse, it may fail silently &
+    return invalid data.
+
+    zipimport._zip_directory_cache contains cached zip archive directory
+    information for all existing zipimport.zipimporter instances and all such
+    instances connected to the same archive share the same cached directory
+    information.
+
+    If asked, and the underlying Python implementation allows it, we can fix
+    all existing zipimport.zipimporter instances instead of having to track
+    them down and remove them one by one, by updating their shared cached zip
+    archive directory information. This, of course, assumes that the
+    replacement distribution is packaged as a zipped egg.
+
+    If not asked to fix existing zipimport.zipimporter instances, we still do
+    our best to clear any remaining zipimport.zipimporter related cached data
+    that might somehow later get used when attempting to load data from the new
+    distribution and thus cause such load operations to fail. Note that when
+    tracking down such remaining stale data, we can not catch every conceivable
+    usage from here, and we clear only those that we know of and have found to
+    cause problems if left alive. Any remaining caches should be updated by
+    whomever is in charge of maintaining them, i.e. they should be ready to
+    handle us replacing their zip archives with new distributions at runtime.
+
+    """
+    # There are several other known sources of stale zipimport.zipimporter
+    # instances that we do not clear here, but might if ever given a reason to
+    # do so:
+    # * Global setuptools pkg_resources.working_set (a.k.a. 'master working
+    # set') may contain distributions which may in turn contain their
+    #   zipimport.zipimporter loaders.
+    # * Several zipimport.zipimporter loaders held by local variables further
+    #   up the function call stack when running the setuptools installation.
+    # * Already loaded modules may have their __loader__ attribute set to the
+    #   exact loader instance used when importing them. Python 3.4 docs state
+    #   that this information is intended mostly for introspection and so is
+    #   not expected to cause us problems.
+    normalized_path = normalize_path(dist_path)
+    _uncache(normalized_path, sys.path_importer_cache)
+    if fix_zipimporter_caches:
+        _replace_zip_directory_cache_data(normalized_path)
+    else:
+        # Here, even though we do not want to fix existing and now stale
+        # zipimporter cache information, we still want to remove it. Related to
+        # Python's zip archive directory information cache, we clear each of
+        # its stale entries in two phases:
+        #   1. Clear the entry so attempting to access zip archive information
+        #      via any existing stale zipimport.zipimporter instances fails.
+        #   2. Remove the entry from the cache so any newly constructed
+        #      zipimport.zipimporter instances do not end up using old stale
+        #      zip archive directory information.
+        # This whole stale data removal step does not seem strictly necessary,
+        # but has been left in because it was done before we started replacing
+        # the zip archive directory information cache content if possible, and
+        # there are no relevant unit tests that we can depend on to tell us if
+        # this is really needed.
+        _remove_and_clear_zip_directory_cache_data(normalized_path)
+
+
+def _collect_zipimporter_cache_entries(normalized_path, cache):
+    """
+    Return zipimporter cache entry keys related to a given normalized path.
+
+    Alternative path spellings (e.g. those using different character case or
+    those using alternative path separators) related to the same path are
+    included. Any sub-path entries are included as well, i.e. those
+    corresponding to zip archives embedded in other zip archives.
+
+    """
+    result = []
+    prefix_len = len(normalized_path)
+    for p in cache:
+        np = normalize_path(p)
+        if np.startswith(normalized_path) and np[prefix_len : prefix_len + 1] in (
+            os.sep,
+            '',
+        ):
+            result.append(p)
+    return result
+
+
+def _update_zipimporter_cache(normalized_path, cache, updater=None):
+    """
+    Update zipimporter cache data for a given normalized path.
+
+    Any sub-path entries are processed as well, i.e. those corresponding to zip
+    archives embedded in other zip archives.
+
+    Given updater is a callable taking a cache entry key and the original entry
+    (after already removing the entry from the cache), and expected to update
+    the entry and possibly return a new one to be inserted in its place.
+    Returning None indicates that the entry should not be replaced with a new
+    one. If no updater is given, the cache entries are simply removed without
+    any additional processing, the same as if the updater simply returned None.
+
+    """
+    for p in _collect_zipimporter_cache_entries(normalized_path, cache):
+        # N.B. pypy's custom zipimport._zip_directory_cache implementation does
+        # not support the complete dict interface:
+        # * Does not support item assignment, thus not allowing this function
+        #    to be used only for removing existing cache entries.
+        #  * Does not support the dict.pop() method, forcing us to use the
+        #    get/del patterns instead. For more detailed information see the
+        #    following links:
+        #      https://github.com/pypa/setuptools/issues/202#issuecomment-202913420
+        #      https://foss.heptapod.net/pypy/pypy/-/blob/144c4e65cb6accb8e592f3a7584ea38265d1873c/pypy/module/zipimport/interp_zipimport.py
+        old_entry = cache[p]
+        del cache[p]
+        new_entry = updater and updater(p, old_entry)
+        if new_entry is not None:
+            cache[p] = new_entry
+
+
+def _uncache(normalized_path, cache):
+    _update_zipimporter_cache(normalized_path, cache)
+
+
+def _remove_and_clear_zip_directory_cache_data(normalized_path):
+    def clear_and_remove_cached_zip_archive_directory_data(path, old_entry):
+        old_entry.clear()
+
+    _update_zipimporter_cache(
+        normalized_path,
+        zipimport._zip_directory_cache,
+        updater=clear_and_remove_cached_zip_archive_directory_data,
+    )
+
+
+# PyPy Python implementation does not allow directly writing to the
+# zipimport._zip_directory_cache and so prevents us from attempting to correct
+# its content. The best we can do there is clear the problematic cache content
+# and have PyPy repopulate it as needed. The downside is that if there are any
+# stale zipimport.zipimporter instances laying around, attempting to use them
+# will fail due to not having its zip archive directory information available
+# instead of being automatically corrected to use the new correct zip archive
+# directory information.
+if '__pypy__' in sys.builtin_module_names:
+    _replace_zip_directory_cache_data = _remove_and_clear_zip_directory_cache_data
+else:
+
+    def _replace_zip_directory_cache_data(normalized_path):
+        def replace_cached_zip_archive_directory_data(path, old_entry):
+            # N.B. In theory, we could load the zip directory information just
+            # once for all updated path spellings, and then copy it locally and
+            # update its contained path strings to contain the correct
+            # spelling, but that seems like a way too invasive move (this cache
+            # structure is not officially documented anywhere and could in
+            # theory change with new Python releases) for no significant
+            # benefit.
+            old_entry.clear()
+            zipimport.zipimporter(path)
+            old_entry.update(zipimport._zip_directory_cache[path])
+            return old_entry
+
+        _update_zipimporter_cache(
+            normalized_path,
+            zipimport._zip_directory_cache,
+            updater=replace_cached_zip_archive_directory_data,
+        )
+
+
+def is_python(text, filename=''):
+    "Is this string a valid Python script?"
+    try:
+        compile(text, filename, 'exec')
+    except (SyntaxError, TypeError):
+        return False
+    else:
+        return True
+
+
+def is_sh(executable):
+    """Determine if the specified executable is a .sh (contains a #! line)"""
+    try:
+        with open(executable, encoding='latin-1') as fp:
+            magic = fp.read(2)
+    except OSError:
+        return executable
+    return magic == '#!'
+
+
+def nt_quote_arg(arg):
+    """Quote a command line argument according to Windows parsing rules"""
+    return subprocess.list2cmdline([arg])
+
+
+def is_python_script(script_text, filename):
+    """Is this text, as a whole, a Python script? (as opposed to shell/bat/etc."""
+    if filename.endswith('.py') or filename.endswith('.pyw'):
+        return True  # extension says it's Python
+    if is_python(script_text, filename):
+        return True  # it's syntactically valid Python
+    if script_text.startswith('#!'):
+        # It begins with a '#!' line, so check if 'python' is in it somewhere
+        return 'python' in script_text.splitlines()[0].lower()
+
+    return False  # Not any Python I can recognize
+
+
+try:
+    from os import chmod as _chmod
+except ImportError:
+    # Jython compatibility
+    def _chmod(*args: object, **kwargs: object) -> None:  # type: ignore[misc] # Mypy re-uses the imported definition anyway
+        pass
+
+
+def chmod(path, mode):
+    log.debug("changing mode of %s to %o", path, mode)
+    try:
+        _chmod(path, mode)
+    except OSError as e:
+        log.debug("chmod failed: %s", e)
+
+
+class CommandSpec(list):
+    """
+    A command spec for a #! header, specified as a list of arguments akin to
+    those passed to Popen.
+    """
+
+    options: list[str] = []
+    split_args: dict[str, bool] = dict()
+
+    @classmethod
+    def best(cls):
+        """
+        Choose the best CommandSpec class based on environmental conditions.
+        """
+        return cls
+
+    @classmethod
+    def _sys_executable(cls):
+        _default = os.path.normpath(sys.executable)
+        return os.environ.get('__PYVENV_LAUNCHER__', _default)
+
+    @classmethod
+    def from_param(cls, param):
+        """
+        Construct a CommandSpec from a parameter to build_scripts, which may
+        be None.
+        """
+        if isinstance(param, cls):
+            return param
+        if isinstance(param, list):
+            return cls(param)
+        if param is None:
+            return cls.from_environment()
+        # otherwise, assume it's a string.
+        return cls.from_string(param)
+
+    @classmethod
+    def from_environment(cls):
+        return cls([cls._sys_executable()])
+
+    @classmethod
+    def from_string(cls, string):
+        """
+        Construct a command spec from a simple string representing a command
+        line parseable by shlex.split.
+        """
+        items = shlex.split(string, **cls.split_args)
+        return cls(items)
+
+    def install_options(self, script_text):
+        self.options = shlex.split(self._extract_options(script_text))
+        cmdline = subprocess.list2cmdline(self)
+        if not isascii(cmdline):
+            self.options[:0] = ['-x']
+
+    @staticmethod
+    def _extract_options(orig_script):
+        """
+        Extract any options from the first line of the script.
+        """
+        first = (orig_script + '\n').splitlines()[0]
+        match = _first_line_re().match(first)
+        options = match.group(1) or '' if match else ''
+        return options.strip()
+
+    def as_header(self):
+        return self._render(self + list(self.options))
+
+    @staticmethod
+    def _strip_quotes(item):
+        _QUOTES = '"\''
+        for q in _QUOTES:
+            if item.startswith(q) and item.endswith(q):
+                return item[1:-1]
+        return item
+
+    @staticmethod
+    def _render(items):
+        cmdline = subprocess.list2cmdline(
+            CommandSpec._strip_quotes(item.strip()) for item in items
+        )
+        return '#!' + cmdline + '\n'
+
+
+# For pbr compat; will be removed in a future version.
+sys_executable = CommandSpec._sys_executable()
+
+
+class WindowsCommandSpec(CommandSpec):
+    split_args = dict(posix=False)
+
+
+class ScriptWriter:
+    """
+    Encapsulates behavior around writing entry point scripts for console and
+    gui apps.
+    """
+
+    template = textwrap.dedent(
+        r"""
+        # EASY-INSTALL-ENTRY-SCRIPT: %(spec)r,%(group)r,%(name)r
+        import re
+        import sys
+
+        # for compatibility with easy_install; see #2198
+        __requires__ = %(spec)r
+
+        try:
+            from importlib.metadata import distribution
+        except ImportError:
+            try:
+                from importlib_metadata import distribution
+            except ImportError:
+                from pkg_resources import load_entry_point
+
+
+        def importlib_load_entry_point(spec, group, name):
+            dist_name, _, _ = spec.partition('==')
+            matches = (
+                entry_point
+                for entry_point in distribution(dist_name).entry_points
+                if entry_point.group == group and entry_point.name == name
+            )
+            return next(matches).load()
+
+
+        globals().setdefault('load_entry_point', importlib_load_entry_point)
+
+
+        if __name__ == '__main__':
+            sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+            sys.exit(load_entry_point(%(spec)r, %(group)r, %(name)r)())
+        """
+    ).lstrip()
+
+    command_spec_class = CommandSpec
+
+    @classmethod
+    def get_args(cls, dist, header=None):
+        """
+        Yield write_script() argument tuples for a distribution's
+        console_scripts and gui_scripts entry points.
+        """
+        if header is None:
+            header = cls.get_header()
+        spec = str(dist.as_requirement())
+        for type_ in 'console', 'gui':
+            group = type_ + '_scripts'
+            for name, ep in dist.get_entry_map(group).items():
+                cls._ensure_safe_name(name)
+                script_text = cls.template % locals()
+                args = cls._get_script_args(type_, name, header, script_text)
+                yield from args
+
+    @staticmethod
+    def _ensure_safe_name(name):
+        """
+        Prevent paths in *_scripts entry point names.
+        """
+        has_path_sep = re.search(r'[\\/]', name)
+        if has_path_sep:
+            raise ValueError("Path separators not allowed in script names")
+
+    @classmethod
+    def best(cls):
+        """
+        Select the best ScriptWriter for this environment.
+        """
+        if sys.platform == 'win32' or (os.name == 'java' and os._name == 'nt'):
+            return WindowsScriptWriter.best()
+        else:
+            return cls
+
+    @classmethod
+    def _get_script_args(cls, type_, name, header, script_text):
+        # Simply write the stub with no extension.
+        yield (name, header + script_text)
+
+    @classmethod
+    def get_header(cls, script_text="", executable=None):
+        """Create a #! line, getting options (if any) from script_text"""
+        cmd = cls.command_spec_class.best().from_param(executable)
+        cmd.install_options(script_text)
+        return cmd.as_header()
+
+
+class WindowsScriptWriter(ScriptWriter):
+    command_spec_class = WindowsCommandSpec
+
+    @classmethod
+    def best(cls):
+        """
+        Select the best ScriptWriter suitable for Windows
+        """
+        writer_lookup = dict(
+            executable=WindowsExecutableLauncherWriter,
+            natural=cls,
+        )
+        # for compatibility, use the executable launcher by default
+        launcher = os.environ.get('SETUPTOOLS_LAUNCHER', 'executable')
+        return writer_lookup[launcher]
+
+    @classmethod
+    def _get_script_args(cls, type_, name, header, script_text):
+        "For Windows, add a .py extension"
+        ext = dict(console='.pya', gui='.pyw')[type_]
+        if ext not in os.environ['PATHEXT'].lower().split(';'):
+            msg = (
+                "{ext} not listed in PATHEXT; scripts will not be "
+                "recognized as executables."
+            ).format(**locals())
+            SetuptoolsWarning.emit(msg)
+        old = ['.pya', '.py', '-script.py', '.pyc', '.pyo', '.pyw', '.exe']
+        old.remove(ext)
+        header = cls._adjust_header(type_, header)
+        blockers = [name + x for x in old]
+        yield name + ext, header + script_text, 't', blockers
+
+    @classmethod
+    def _adjust_header(cls, type_, orig_header):
+        """
+        Make sure 'pythonw' is used for gui and 'python' is used for
+        console (regardless of what sys.executable is).
+        """
+        pattern = 'pythonw.exe'
+        repl = 'python.exe'
+        if type_ == 'gui':
+            pattern, repl = repl, pattern
+        pattern_ob = re.compile(re.escape(pattern), re.IGNORECASE)
+        new_header = pattern_ob.sub(string=orig_header, repl=repl)
+        return new_header if cls._use_header(new_header) else orig_header
+
+    @staticmethod
+    def _use_header(new_header):
+        """
+        Should _adjust_header use the replaced header?
+
+        On non-windows systems, always use. On
+        Windows systems, only use the replaced header if it resolves
+        to an executable on the system.
+        """
+        clean_header = new_header[2:-1].strip('"')
+        return sys.platform != 'win32' or shutil.which(clean_header)
+
+
+class WindowsExecutableLauncherWriter(WindowsScriptWriter):
+    @classmethod
+    def _get_script_args(cls, type_, name, header, script_text):
+        """
+        For Windows, add a .py extension and an .exe launcher
+        """
+        if type_ == 'gui':
+            launcher_type = 'gui'
+            ext = '-script.pyw'
+            old = ['.pyw']
+        else:
+            launcher_type = 'cli'
+            ext = '-script.py'
+            old = ['.py', '.pyc', '.pyo']
+        hdr = cls._adjust_header(type_, header)
+        blockers = [name + x for x in old]
+        yield (name + ext, hdr + script_text, 't', blockers)
+        yield (
+            name + '.exe',
+            get_win_launcher(launcher_type),
+            'b',  # write in binary mode
+        )
+        if not is_64bit():
+            # install a manifest for the launcher to prevent Windows
+            # from detecting it as an installer (which it will for
+            #  launchers like easy_install.exe). Consider only
+            #  adding a manifest for launchers detected as installers.
+            #  See Distribute #143 for details.
+            m_name = name + '.exe.manifest'
+            yield (m_name, load_launcher_manifest(name), 't')
+
+
+def get_win_launcher(type):
+    """
+    Load the Windows launcher (executable) suitable for launching a script.
+
+    `type` should be either 'cli' or 'gui'
+
+    Returns the executable as a byte string.
+    """
+    launcher_fn = '%s.exe' % type
+    if is_64bit():
+        if get_platform() == "win-arm64":
+            launcher_fn = launcher_fn.replace(".", "-arm64.")
+        else:
+            launcher_fn = launcher_fn.replace(".", "-64.")
+    else:
+        launcher_fn = launcher_fn.replace(".", "-32.")
+    return resource_string('setuptools', launcher_fn)
+
+
+def load_launcher_manifest(name):
+    manifest = pkg_resources.resource_string(__name__, 'launcher manifest.xml')
+    return manifest.decode('utf-8') % vars()
+
+
+def _rmtree(path, ignore_errors=False, onexc=auto_chmod):
+    return py311.shutil_rmtree(path, ignore_errors, onexc)
+
+
+def current_umask():
+    tmp = os.umask(0o022)
+    os.umask(tmp)
+    return tmp
+
+
+def only_strs(values):
+    """
+    Exclude non-str values. Ref #3063.
+    """
+    return filter(lambda val: isinstance(val, str), values)
+
+
+class EasyInstallDeprecationWarning(SetuptoolsDeprecationWarning):
+    _SUMMARY = "easy_install command is deprecated."
+    _DETAILS = """
+    Please avoid running ``setup.py`` and ``easy_install``.
+    Instead, use pypa/build, pypa/installer or other
+    standards-based tools.
+    """
+    _SEE_URL = "https://github.com/pypa/setuptools/issues/917"
+    # _DUE_DATE not defined yet
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/install.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/install.py
new file mode 100644
index 0000000000000000000000000000000000000000..c49fcda939738fa269c4bc2a467ed406e044b25c
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/install.py
@@ -0,0 +1,155 @@
+from distutils.errors import DistutilsArgError
+import inspect
+import glob
+import platform
+import distutils.command.install as orig
+from typing import cast
+
+import setuptools
+from ..warnings import SetuptoolsDeprecationWarning, SetuptoolsWarning
+from .bdist_egg import bdist_egg as bdist_egg_cls
+
+# Prior to numpy 1.9, NumPy relies on the '_install' name, so provide it for
+# now. See https://github.com/pypa/setuptools/issues/199/
+_install = orig.install
+
+
+class install(orig.install):
+    """Use easy_install to install the package, w/dependencies"""
+
+    user_options = orig.install.user_options + [
+        ('old-and-unmanageable', None, "Try not to use this!"),
+        (
+            'single-version-externally-managed',
+            None,
+            "used by system package builders to create 'flat' eggs",
+        ),
+    ]
+    boolean_options = orig.install.boolean_options + [
+        'old-and-unmanageable',
+        'single-version-externally-managed',
+    ]
+    new_commands = [
+        ('install_egg_info', lambda self: True),
+        ('install_scripts', lambda self: True),
+    ]
+    _nc = dict(new_commands)
+
+    def initialize_options(self):
+        SetuptoolsDeprecationWarning.emit(
+            "setup.py install is deprecated.",
+            """
+            Please avoid running ``setup.py`` directly.
+            Instead, use pypa/build, pypa/installer or other
+            standards-based tools.
+            """,
+            see_url="https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html",
+            # TODO: Document how to bootstrap setuptools without install
+            #       (e.g. by unziping the wheel file)
+            #       and then add a due_date to this warning.
+        )
+
+        super().initialize_options()
+        self.old_and_unmanageable = None
+        self.single_version_externally_managed = None
+
+    def finalize_options(self):
+        super().finalize_options()
+        if self.root:
+            self.single_version_externally_managed = True
+        elif self.single_version_externally_managed:
+            if not self.root and not self.record:
+                raise DistutilsArgError(
+                    "You must specify --record or --root when building system"
+                    " packages"
+                )
+
+    def handle_extra_path(self):
+        if self.root or self.single_version_externally_managed:
+            # explicit backward-compatibility mode, allow extra_path to work
+            return orig.install.handle_extra_path(self)
+
+        # Ignore extra_path when installing an egg (or being run by another
+        # command without --root or --single-version-externally-managed
+        self.path_file = None
+        self.extra_dirs = ''
+        return None
+
+    def run(self):
+        # Explicit request for old-style install?  Just do it
+        if self.old_and_unmanageable or self.single_version_externally_managed:
+            return super().run()
+
+        if not self._called_from_setup(inspect.currentframe()):
+            # Run in backward-compatibility mode to support bdist_* commands.
+            super().run()
+        else:
+            self.do_egg_install()
+
+        return None
+
+    @staticmethod
+    def _called_from_setup(run_frame):
+        """
+        Attempt to detect whether run() was called from setup() or by another
+        command.  If called by setup(), the parent caller will be the
+        'run_command' method in 'distutils.dist', and *its* caller will be
+        the 'run_commands' method.  If called any other way, the
+        immediate caller *might* be 'run_command', but it won't have been
+        called by 'run_commands'. Return True in that case or if a call stack
+        is unavailable. Return False otherwise.
+        """
+        if run_frame is None:
+            msg = "Call stack not available. bdist_* commands may fail."
+            SetuptoolsWarning.emit(msg)
+            if platform.python_implementation() == 'IronPython':
+                msg = "For best results, pass -X:Frames to enable call stack."
+                SetuptoolsWarning.emit(msg)
+            return True
+
+        frames = inspect.getouterframes(run_frame)
+        for frame in frames[2:4]:
+            (caller,) = frame[:1]
+            info = inspect.getframeinfo(caller)
+            caller_module = caller.f_globals.get('__name__', '')
+
+            if caller_module == "setuptools.dist" and info.function == "run_command":
+                # Starting from v61.0.0 setuptools overwrites dist.run_command
+                continue
+
+            return caller_module == 'distutils.dist' and info.function == 'run_commands'
+
+        return False
+
+    def do_egg_install(self):
+        easy_install = self.distribution.get_command_class('easy_install')
+
+        cmd = easy_install(
+            self.distribution,
+            args="x",
+            root=self.root,
+            record=self.record,
+        )
+        cmd.ensure_finalized()  # finalize before bdist_egg munges install cmd
+        cmd.always_copy_from = '.'  # make sure local-dir eggs get installed
+
+        # pick up setup-dir .egg files only: no .egg-info
+        cmd.package_index.scan(glob.glob('*.egg'))
+
+        self.run_command('bdist_egg')
+        bdist_egg = cast(bdist_egg_cls, self.distribution.get_command_obj('bdist_egg'))
+        args = [bdist_egg.egg_output]
+
+        if setuptools.bootstrap_install_from:
+            # Bootstrap self-installation of setuptools
+            args.insert(0, setuptools.bootstrap_install_from)
+
+        cmd.args = args
+        cmd.run(show_deprecation=False)
+        setuptools.bootstrap_install_from = None
+
+
+# XXX Python 3.1 doesn't see _nc if this is inside the class
+install.sub_commands = [
+    cmd for cmd in orig.install.sub_commands if cmd[0] not in install._nc
+] + install.new_commands
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/install_egg_info.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/install_egg_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1d2e8184fcc4103c5e0014c64e69c75b25dba60
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/install_egg_info.py
@@ -0,0 +1,57 @@
+from distutils import log, dir_util
+import os
+
+from setuptools import Command
+from setuptools import namespaces
+from setuptools.archive_util import unpack_archive
+from .._path import ensure_directory
+
+
+class install_egg_info(namespaces.Installer, Command):
+    """Install an .egg-info directory for the package"""
+
+    description = "Install an .egg-info directory for the package"
+
+    user_options = [
+        ('install-dir=', 'd', "directory to install to"),
+    ]
+
+    def initialize_options(self):
+        self.install_dir = None
+
+    def finalize_options(self):
+        self.set_undefined_options('install_lib', ('install_dir', 'install_dir'))
+        ei_cmd = self.get_finalized_command("egg_info")
+        basename = f"{ei_cmd._get_egg_basename()}.egg-info"
+        self.source = ei_cmd.egg_info
+        self.target = os.path.join(self.install_dir, basename)
+        self.outputs = []
+
+    def run(self):
+        self.run_command('egg_info')
+        if os.path.isdir(self.target) and not os.path.islink(self.target):
+            dir_util.remove_tree(self.target, dry_run=self.dry_run)
+        elif os.path.exists(self.target):
+            self.execute(os.unlink, (self.target,), "Removing " + self.target)
+        if not self.dry_run:
+            ensure_directory(self.target)
+        self.execute(self.copytree, (), "Copying %s to %s" % (self.source, self.target))
+        self.install_namespaces()
+
+    def get_outputs(self):
+        return self.outputs
+
+    def copytree(self):
+        # Copy the .egg-info tree to site-packages
+        def skimmer(src, dst):
+            # filter out source-control directories; note that 'src' is always
+            # a '/'-separated path, regardless of platform.  'dst' is a
+            # platform-specific path.
+            for skip in '.svn/', 'CVS/':
+                if src.startswith(skip) or '/' + skip in src:
+                    return None
+            self.outputs.append(dst)
+            log.debug("Copying %s to %s", src, dst)
+            return dst
+
+        unpack_archive(self.source, self.target, skimmer)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/saveopts.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/saveopts.py
new file mode 100644
index 0000000000000000000000000000000000000000..f175de1015c2c65a7e67e38af59b0aa9d5b71d91
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/saveopts.py
@@ -0,0 +1,21 @@
+from setuptools.command.setopt import edit_config, option_base
+
+
+class saveopts(option_base):
+    """Save command-line options to a file"""
+
+    description = "save supplied options to setup.cfg or other config file"
+
+    def run(self):
+        dist = self.distribution
+        settings = {}
+
+        for cmd in dist.command_options:
+            if cmd == 'saveopts':
+                continue  # don't save our own options!
+
+            for opt, (src, val) in dist.get_option_dict(cmd).items():
+                if src == "command line":
+                    settings.setdefault(cmd, {})[opt] = val
+
+        edit_config(self.filename, settings, self.dry_run)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/upload.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/upload.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cca47cea9f1fc53fee84744689696176725537c
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/upload.py
@@ -0,0 +1,17 @@
+from distutils import log
+from distutils.command import upload as orig
+
+from setuptools.errors import RemovedCommandError
+
+
+class upload(orig.upload):
+    """Formerly used to upload packages to PyPI."""
+
+    def run(self):
+        msg = (
+            "The upload command has been removed, use twine to upload "
+            "instead (https://pypi.org/p/twine)"
+        )
+
+        self.announce("ERROR: " + msg, log.ERROR)
+        raise RemovedCommandError(msg)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/upload_docs.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/upload_docs.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fbbb62553f1b04be76999bff46a5786624c6d6e
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/command/upload_docs.py
@@ -0,0 +1,221 @@
+"""upload_docs
+
+Implements a Distutils 'upload_docs' subcommand (upload documentation to
+sites other than PyPi such as devpi).
+"""
+
+from base64 import standard_b64encode
+from distutils import log
+from distutils.errors import DistutilsOptionError
+import os
+import zipfile
+import tempfile
+import shutil
+import itertools
+import functools
+import http.client
+import urllib.parse
+
+from .._importlib import metadata
+from ..warnings import SetuptoolsDeprecationWarning
+
+from .upload import upload
+
+
+def _encode(s):
+    return s.encode('utf-8', 'surrogateescape')
+
+
+class upload_docs(upload):
+    # override the default repository as upload_docs isn't
+    # supported by Warehouse (and won't be).
+    DEFAULT_REPOSITORY = 'https://pypi.python.org/pypi/'
+
+    description = 'Upload documentation to sites other than PyPi such as devpi'
+
+    user_options = [
+        (
+            'repository=',
+            'r',
+            "url of repository [default: %s]" % upload.DEFAULT_REPOSITORY,
+        ),
+        ('show-response', None, 'display full response text from server'),
+        ('upload-dir=', None, 'directory to upload'),
+    ]
+    boolean_options = upload.boolean_options
+
+    def has_sphinx(self):
+        return bool(
+            self.upload_dir is None
+            and metadata.entry_points(group='distutils.commands', name='build_sphinx')
+        )
+
+    sub_commands = [('build_sphinx', has_sphinx)]  # type: ignore[list-item] # TODO: Fix in typeshed distutils stubs
+
+    def initialize_options(self):
+        upload.initialize_options(self)
+        self.upload_dir = None
+        self.target_dir = None
+
+    def finalize_options(self):
+        log.warn(
+            "Upload_docs command is deprecated. Use Read the Docs "
+            "(https://readthedocs.org) instead."
+        )
+        upload.finalize_options(self)
+        if self.upload_dir is None:
+            if self.has_sphinx():
+                build_sphinx = self.get_finalized_command('build_sphinx')
+                self.target_dir = dict(build_sphinx.builder_target_dirs)['html']
+            else:
+                build = self.get_finalized_command('build')
+                self.target_dir = os.path.join(build.build_base, 'docs')
+        else:
+            self.ensure_dirname('upload_dir')
+            self.target_dir = self.upload_dir
+        self.announce('Using upload directory %s' % self.target_dir)
+
+    def create_zipfile(self, filename):
+        zip_file = zipfile.ZipFile(filename, "w")
+        try:
+            self.mkpath(self.target_dir)  # just in case
+            for root, dirs, files in os.walk(self.target_dir):
+                if root == self.target_dir and not files:
+                    tmpl = "no files found in upload directory '%s'"
+                    raise DistutilsOptionError(tmpl % self.target_dir)
+                for name in files:
+                    full = os.path.join(root, name)
+                    relative = root[len(self.target_dir) :].lstrip(os.path.sep)
+                    dest = os.path.join(relative, name)
+                    zip_file.write(full, dest)
+        finally:
+            zip_file.close()
+
+    def run(self):
+        SetuptoolsDeprecationWarning.emit(
+            "Deprecated command",
+            """
+            upload_docs is deprecated and will be removed in a future version.
+            Instead, use tools like devpi and Read the Docs; or lower level tools like
+            httpie and curl to interact directly with your hosting service API.
+            """,
+            due_date=(2023, 9, 26),  # warning introduced in 27 Jul 2022
+        )
+
+        # Run sub commands
+        for cmd_name in self.get_sub_commands():
+            self.run_command(cmd_name)
+
+        tmp_dir = tempfile.mkdtemp()
+        name = self.distribution.metadata.get_name()
+        zip_file = os.path.join(tmp_dir, "%s.zip" % name)
+        try:
+            self.create_zipfile(zip_file)
+            self.upload_file(zip_file)
+        finally:
+            shutil.rmtree(tmp_dir)
+
+    @staticmethod
+    def _build_part(item, sep_boundary):
+        key, values = item
+        title = '\nContent-Disposition: form-data; name="%s"' % key
+        # handle multiple entries for the same name
+        if not isinstance(values, list):
+            values = [values]
+        for value in values:
+            if isinstance(value, tuple):
+                title += '; filename="%s"' % value[0]
+                value = value[1]
+            else:
+                value = _encode(value)
+            yield sep_boundary
+            yield _encode(title)
+            yield b"\n\n"
+            yield value
+            if value and value[-1:] == b'\r':
+                yield b'\n'  # write an extra newline (lurve Macs)
+
+    @classmethod
+    def _build_multipart(cls, data):
+        """
+        Build up the MIME payload for the POST data
+        """
+        boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
+        sep_boundary = b'\n--' + boundary.encode('ascii')
+        end_boundary = sep_boundary + b'--'
+        end_items = (
+            end_boundary,
+            b"\n",
+        )
+        builder = functools.partial(
+            cls._build_part,
+            sep_boundary=sep_boundary,
+        )
+        part_groups = map(builder, data.items())
+        parts = itertools.chain.from_iterable(part_groups)
+        body_items = itertools.chain(parts, end_items)
+        content_type = 'multipart/form-data; boundary=%s' % boundary
+        return b''.join(body_items), content_type
+
+    def upload_file(self, filename):
+        with open(filename, 'rb') as f:
+            content = f.read()
+        meta = self.distribution.metadata
+        data = {
+            ':action': 'doc_upload',
+            'name': meta.get_name(),
+            'content': (os.path.basename(filename), content),
+        }
+        # set up the authentication
+        credentials = _encode(self.username + ':' + self.password)
+        credentials = standard_b64encode(credentials).decode('ascii')
+        auth = "Basic " + credentials
+
+        body, ct = self._build_multipart(data)
+
+        msg = "Submitting documentation to %s" % (self.repository)
+        self.announce(msg, log.INFO)
+
+        # build the Request
+        # We can't use urllib2 since we need to send the Basic
+        # auth right with the first request
+        schema, netloc, url, params, query, fragments = urllib.parse.urlparse(
+            self.repository
+        )
+        assert not params and not query and not fragments
+        if schema == 'http':
+            conn = http.client.HTTPConnection(netloc)
+        elif schema == 'https':
+            conn = http.client.HTTPSConnection(netloc)
+        else:
+            raise AssertionError("unsupported schema " + schema)
+
+        data = ''
+        try:
+            conn.connect()
+            conn.putrequest("POST", url)
+            content_type = ct
+            conn.putheader('Content-type', content_type)
+            conn.putheader('Content-length', str(len(body)))
+            conn.putheader('Authorization', auth)
+            conn.endheaders()
+            conn.send(body)
+        except OSError as e:
+            self.announce(str(e), log.ERROR)
+            return
+
+        r = conn.getresponse()
+        if r.status == 200:
+            msg = 'Server response (%s): %s' % (r.status, r.reason)
+            self.announce(msg, log.INFO)
+        elif r.status == 301:
+            location = r.getheader('Location')
+            if location is None:
+                location = 'https://pythonhosted.org/%s/' % meta.get_name()
+            msg = 'Upload successful. Visit %s' % location
+            self.announce(msg, log.INFO)
+        else:
+            msg = 'Upload failed (%s): %s' % (r.status, r.reason)
+            self.announce(msg, log.ERROR)
+        if self.show_response:
+            print('-' * 75, r.read(), '-' * 75)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..edbff7efbbaecc3df5ce4ceb8a2236ab250491ae
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py310.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py310.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3475e40198e5bd0a930224df3b6a95620afc6b5
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py310.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py311.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py311.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f82c398a9fca86602f7826ecca1727a40d5a0b46
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py311.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py39.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py39.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e2e111118bae67621c6bbe6398a631961d69333d
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/__pycache__/py39.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py310.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py310.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7d53d6de949ab473b0b55bea4108511f9567966
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py310.py
@@ -0,0 +1,10 @@
+import sys
+
+
+__all__ = ['tomllib']
+
+
+if sys.version_info >= (3, 11):
+    import tomllib
+else:  # pragma: no cover
+    from setuptools.extern import tomli as tomllib
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py311.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py311.py
new file mode 100644
index 0000000000000000000000000000000000000000..5069c441c462c60b0896b0cd8ad9599a4b49a992
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py311.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+import shutil
+import sys
+from typing import Any, Callable, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from _typeshed import StrOrBytesPath, ExcInfo
+
+# Same as shutil._OnExcCallback from typeshed
+_OnExcCallback = Callable[[Callable[..., Any], str, BaseException], object]
+
+
+def shutil_rmtree(
+    path: StrOrBytesPath,
+    ignore_errors: bool = False,
+    onexc: _OnExcCallback | None = None,
+) -> None:
+    if sys.version_info >= (3, 12):
+        return shutil.rmtree(path, ignore_errors, onexc=onexc)
+
+    def _handler(fn: Callable[..., Any], path: str, excinfo: ExcInfo) -> None:
+        if onexc:
+            onexc(fn, path, excinfo[1])
+
+    return shutil.rmtree(path, ignore_errors, onerror=_handler)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py39.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py39.py
new file mode 100644
index 0000000000000000000000000000000000000000..04a4abe5a9e61596f97dc3d1b0b3da93a558158f
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/compat/py39.py
@@ -0,0 +1,9 @@
+import sys
+
+# Explicitly use the ``"locale"`` encoding in versions that support it,
+# otherwise just rely on the implicit handling of ``encoding=None``.
+# Since all platforms that support ``EncodingWarning`` also support
+# ``encoding="locale"``, this can be used to suppress the warning.
+# However, please try to use UTF-8 when possible
+# (.pth files are the notorious exception: python/cpython#77102, pypa/setuptools#3937).
+LOCALE_ENCODING = "locale" if sys.version_info >= (3, 10) else None
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcc7d008d6c9b3ab2610f2a1b05c4c1eeb5e1416
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__init__.py
@@ -0,0 +1,43 @@
+"""For backward compatibility, expose main functions from
+``setuptools.config.setupcfg``
+"""
+
+from functools import wraps
+from typing import Callable, TypeVar, cast
+
+from ..warnings import SetuptoolsDeprecationWarning
+from . import setupcfg
+
+Fn = TypeVar("Fn", bound=Callable)
+
+__all__ = ('parse_configuration', 'read_configuration')
+
+
+def _deprecation_notice(fn: Fn) -> Fn:
+    @wraps(fn)
+    def _wrapper(*args, **kwargs):
+        SetuptoolsDeprecationWarning.emit(
+            "Deprecated API usage.",
+            f"""
+            As setuptools moves its configuration towards `pyproject.toml`,
+            `{__name__}.{fn.__name__}` became deprecated.
+
+            For the time being, you can use the `{setupcfg.__name__}` module
+            to access a backward compatible API, but this module is provisional
+            and might be removed in the future.
+
+            To read project metadata, consider using
+            ``build.util.project_wheel_metadata`` (https://pypi.org/project/build/).
+            For simple scenarios, you can also try parsing the file directly
+            with the help of ``configparser``.
+            """,
+            # due_date not defined yet, because the community still heavily relies on it
+            # Warning introduced in 24 Mar 2022
+        )
+        return fn(*args, **kwargs)
+
+    return cast(Fn, _wrapper)
+
+
+read_configuration = _deprecation_notice(setupcfg.read_configuration)
+parse_configuration = _deprecation_notice(setupcfg.parse_configuration)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b18e0f628985253ba86a7b6ca9f5259fcfa36b95
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/_apply_pyprojecttoml.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/_apply_pyprojecttoml.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10ea92c78dd9283e12965b0ef4c3bdb39f552283
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/_apply_pyprojecttoml.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/expand.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/expand.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ba66ecdc7fb8ff45ce76c1a480e19950fc63e20b
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/expand.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/pyprojecttoml.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/pyprojecttoml.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72e561d03fbac4231d30c634e7f8bc13389644db
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/pyprojecttoml.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/setupcfg.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/setupcfg.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b7435a6c6057592164eb40c72ca0ecbe8280e5c0
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/__pycache__/setupcfg.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_apply_pyprojecttoml.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_apply_pyprojecttoml.py
new file mode 100644
index 0000000000000000000000000000000000000000..f44271c5ddb6010bdc8453de42085a7cba5d88ed
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_apply_pyprojecttoml.py
@@ -0,0 +1,436 @@
+"""Translation layer between pyproject config and setuptools distribution and
+metadata objects.
+
+The distribution and metadata objects are modeled after (an old version of)
+core metadata, therefore configs in the format specified for ``pyproject.toml``
+need to be processed before being applied.
+
+**PRIVATE MODULE**: API reserved for setuptools internal usage only.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from email.headerregistry import Address
+from functools import partial, reduce
+from inspect import cleandoc
+from itertools import chain
+from types import MappingProxyType
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Mapping,
+    Union,
+)
+from .._path import StrPath
+from ..errors import RemovedConfigError
+from ..warnings import SetuptoolsWarning
+
+if TYPE_CHECKING:
+    from distutils.dist import _OptionsList
+    from setuptools._importlib import metadata
+    from setuptools.dist import Distribution
+
+EMPTY: Mapping = MappingProxyType({})  # Immutable dict-like
+_ProjectReadmeValue = Union[str, Dict[str, str]]
+_CorrespFn = Callable[["Distribution", Any, StrPath], None]
+_Correspondence = Union[str, _CorrespFn]
+
+_logger = logging.getLogger(__name__)
+
+
+def apply(dist: Distribution, config: dict, filename: StrPath) -> Distribution:
+    """Apply configuration dict read with :func:`read_configuration`"""
+
+    if not config:
+        return dist  # short-circuit unrelated pyproject.toml file
+
+    root_dir = os.path.dirname(filename) or "."
+
+    _apply_project_table(dist, config, root_dir)
+    _apply_tool_table(dist, config, filename)
+
+    current_directory = os.getcwd()
+    os.chdir(root_dir)
+    try:
+        dist._finalize_requires()
+        dist._finalize_license_files()
+    finally:
+        os.chdir(current_directory)
+
+    return dist
+
+
+def _apply_project_table(dist: Distribution, config: dict, root_dir: StrPath):
+    project_table = config.get("project", {}).copy()
+    if not project_table:
+        return  # short-circuit
+
+    _handle_missing_dynamic(dist, project_table)
+    _unify_entry_points(project_table)
+
+    for field, value in project_table.items():
+        norm_key = json_compatible_key(field)
+        corresp = PYPROJECT_CORRESPONDENCE.get(norm_key, norm_key)
+        if callable(corresp):
+            corresp(dist, value, root_dir)
+        else:
+            _set_config(dist, corresp, value)
+
+
+def _apply_tool_table(dist: Distribution, config: dict, filename: StrPath):
+    tool_table = config.get("tool", {}).get("setuptools", {})
+    if not tool_table:
+        return  # short-circuit
+
+    for field, value in tool_table.items():
+        norm_key = json_compatible_key(field)
+
+        if norm_key in TOOL_TABLE_REMOVALS:
+            suggestion = cleandoc(TOOL_TABLE_REMOVALS[norm_key])
+            msg = f"""
+            The parameter `tool.setuptools.{field}` was long deprecated
+            and has been removed from `pyproject.toml`.
+            """
+            raise RemovedConfigError("\n".join([cleandoc(msg), suggestion]))
+
+        norm_key = TOOL_TABLE_RENAMES.get(norm_key, norm_key)
+        _set_config(dist, norm_key, value)
+
+    _copy_command_options(config, dist, filename)
+
+
+def _handle_missing_dynamic(dist: Distribution, project_table: dict):
+    """Be temporarily forgiving with ``dynamic`` fields not listed in ``dynamic``"""
+    dynamic = set(project_table.get("dynamic", []))
+    for field, getter in _PREVIOUSLY_DEFINED.items():
+        if not (field in project_table or field in dynamic):
+            value = getter(dist)
+            if value:
+                _MissingDynamic.emit(field=field, value=value)
+                project_table[field] = _RESET_PREVIOUSLY_DEFINED.get(field)
+
+
+def json_compatible_key(key: str) -> str:
+    """As defined in :pep:`566#json-compatible-metadata`"""
+    return key.lower().replace("-", "_")
+
+
+def _set_config(dist: Distribution, field: str, value: Any):
+    setter = getattr(dist.metadata, f"set_{field}", None)
+    if setter:
+        setter(value)
+    elif hasattr(dist.metadata, field) or field in SETUPTOOLS_PATCHES:
+        setattr(dist.metadata, field, value)
+    else:
+        setattr(dist, field, value)
+
+
+_CONTENT_TYPES = {
+    ".md": "text/markdown",
+    ".rst": "text/x-rst",
+    ".txt": "text/plain",
+}
+
+
+def _guess_content_type(file: str) -> str | None:
+    _, ext = os.path.splitext(file.lower())
+    if not ext:
+        return None
+
+    if ext in _CONTENT_TYPES:
+        return _CONTENT_TYPES[ext]
+
+    valid = ", ".join(f"{k} ({v})" for k, v in _CONTENT_TYPES.items())
+    msg = f"only the following file extensions are recognized: {valid}."
+    raise ValueError(f"Undefined content type for {file}, {msg}")
+
+
+def _long_description(dist: Distribution, val: _ProjectReadmeValue, root_dir: StrPath):
+    from setuptools.config import expand
+
+    file: str | tuple[()]
+    if isinstance(val, str):
+        file = val
+        text = expand.read_files(file, root_dir)
+        ctype = _guess_content_type(file)
+    else:
+        file = val.get("file") or ()
+        text = val.get("text") or expand.read_files(file, root_dir)
+        ctype = val["content-type"]
+
+    _set_config(dist, "long_description", text)
+
+    if ctype:
+        _set_config(dist, "long_description_content_type", ctype)
+
+    if file:
+        dist._referenced_files.add(file)
+
+
+def _license(dist: Distribution, val: dict, root_dir: StrPath):
+    from setuptools.config import expand
+
+    if "file" in val:
+        _set_config(dist, "license", expand.read_files([val["file"]], root_dir))
+        dist._referenced_files.add(val["file"])
+    else:
+        _set_config(dist, "license", val["text"])
+
+
+def _people(dist: Distribution, val: list[dict], _root_dir: StrPath, kind: str):
+    field = []
+    email_field = []
+    for person in val:
+        if "name" not in person:
+            email_field.append(person["email"])
+        elif "email" not in person:
+            field.append(person["name"])
+        else:
+            addr = Address(display_name=person["name"], addr_spec=person["email"])
+            email_field.append(str(addr))
+
+    if field:
+        _set_config(dist, kind, ", ".join(field))
+    if email_field:
+        _set_config(dist, f"{kind}_email", ", ".join(email_field))
+
+
+def _project_urls(dist: Distribution, val: dict, _root_dir):
+    _set_config(dist, "project_urls", val)
+
+
+def _python_requires(dist: Distribution, val: dict, _root_dir):
+    from setuptools.extern.packaging.specifiers import SpecifierSet
+
+    _set_config(dist, "python_requires", SpecifierSet(val))
+
+
+def _dependencies(dist: Distribution, val: list, _root_dir):
+    if getattr(dist, "install_requires", []):
+        msg = "`install_requires` overwritten in `pyproject.toml` (dependencies)"
+        SetuptoolsWarning.emit(msg)
+    dist.install_requires = val
+
+
+def _optional_dependencies(dist: Distribution, val: dict, _root_dir):
+    existing = getattr(dist, "extras_require", None) or {}
+    dist.extras_require = {**existing, **val}
+
+
+def _unify_entry_points(project_table: dict):
+    project = project_table
+    entry_points = project.pop("entry-points", project.pop("entry_points", {}))
+    renaming = {"scripts": "console_scripts", "gui_scripts": "gui_scripts"}
+    for key, value in list(project.items()):  # eager to allow modifications
+        norm_key = json_compatible_key(key)
+        if norm_key in renaming:
+            # Don't skip even if value is empty (reason: reset missing `dynamic`)
+            entry_points[renaming[norm_key]] = project.pop(key)
+
+    if entry_points:
+        project["entry-points"] = {
+            name: [f"{k} = {v}" for k, v in group.items()]
+            for name, group in entry_points.items()
+            if group  # now we can skip empty groups
+        }
+        # Sometimes this will set `project["entry-points"] = {}`, and that is
+        # intentional (for resetting configurations that are missing `dynamic`).
+
+
+def _copy_command_options(pyproject: dict, dist: Distribution, filename: StrPath):
+    tool_table = pyproject.get("tool", {})
+    cmdclass = tool_table.get("setuptools", {}).get("cmdclass", {})
+    valid_options = _valid_command_options(cmdclass)
+
+    cmd_opts = dist.command_options
+    for cmd, config in pyproject.get("tool", {}).get("distutils", {}).items():
+        cmd = json_compatible_key(cmd)
+        valid = valid_options.get(cmd, set())
+        cmd_opts.setdefault(cmd, {})
+        for key, value in config.items():
+            key = json_compatible_key(key)
+            cmd_opts[cmd][key] = (str(filename), value)
+            if key not in valid:
+                # To avoid removing options that are specified dynamically we
+                # just log a warn...
+                _logger.warning(f"Command option {cmd}.{key} is not defined")
+
+
+def _valid_command_options(cmdclass: Mapping = EMPTY) -> dict[str, set[str]]:
+    from .._importlib import metadata
+    from setuptools.dist import Distribution
+
+    valid_options = {"global": _normalise_cmd_options(Distribution.global_options)}
+
+    unloaded_entry_points = metadata.entry_points(group='distutils.commands')
+    loaded_entry_points = (_load_ep(ep) for ep in unloaded_entry_points)
+    entry_points = (ep for ep in loaded_entry_points if ep)
+    for cmd, cmd_class in chain(entry_points, cmdclass.items()):
+        opts = valid_options.get(cmd, set())
+        opts = opts | _normalise_cmd_options(getattr(cmd_class, "user_options", []))
+        valid_options[cmd] = opts
+
+    return valid_options
+
+
+def _load_ep(ep: metadata.EntryPoint) -> tuple[str, type] | None:
+    # Ignore all the errors
+    try:
+        return (ep.name, ep.load())
+    except Exception as ex:
+        msg = f"{ex.__class__.__name__} while trying to load entry-point {ep.name}"
+        _logger.warning(f"{msg}: {ex}")
+        return None
+
+
+def _normalise_cmd_option_key(name: str) -> str:
+    return json_compatible_key(name).strip("_=")
+
+
+def _normalise_cmd_options(desc: _OptionsList) -> set[str]:
+    return {_normalise_cmd_option_key(fancy_option[0]) for fancy_option in desc}
+
+
+def _get_previous_entrypoints(dist: Distribution) -> dict[str, list]:
+    ignore = ("console_scripts", "gui_scripts")
+    value = getattr(dist, "entry_points", None) or {}
+    return {k: v for k, v in value.items() if k not in ignore}
+
+
+def _get_previous_scripts(dist: Distribution) -> list | None:
+    value = getattr(dist, "entry_points", None) or {}
+    return value.get("console_scripts")
+
+
+def _get_previous_gui_scripts(dist: Distribution) -> list | None:
+    value = getattr(dist, "entry_points", None) or {}
+    return value.get("gui_scripts")
+
+
+def _attrgetter(attr):
+    """
+    Similar to ``operator.attrgetter`` but returns None if ``attr`` is not found
+    >>> from types import SimpleNamespace
+    >>> obj = SimpleNamespace(a=42, b=SimpleNamespace(c=13))
+    >>> _attrgetter("a")(obj)
+    42
+    >>> _attrgetter("b.c")(obj)
+    13
+    >>> _attrgetter("d")(obj) is None
+    True
+    """
+    return partial(reduce, lambda acc, x: getattr(acc, x, None), attr.split("."))
+
+
+def _some_attrgetter(*items):
+    """
+    Return the first "truth-y" attribute or None
+    >>> from types import SimpleNamespace
+    >>> obj = SimpleNamespace(a=42, b=SimpleNamespace(c=13))
+    >>> _some_attrgetter("d", "a", "b.c")(obj)
+    42
+    >>> _some_attrgetter("d", "e", "b.c", "a")(obj)
+    13
+    >>> _some_attrgetter("d", "e", "f")(obj) is None
+    True
+    """
+
+    def _acessor(obj):
+        values = (_attrgetter(i)(obj) for i in items)
+        return next((i for i in values if i is not None), None)
+
+    return _acessor
+
+
+PYPROJECT_CORRESPONDENCE: dict[str, _Correspondence] = {
+    "readme": _long_description,
+    "license": _license,
+    "authors": partial(_people, kind="author"),
+    "maintainers": partial(_people, kind="maintainer"),
+    "urls": _project_urls,
+    "dependencies": _dependencies,
+    "optional_dependencies": _optional_dependencies,
+    "requires_python": _python_requires,
+}
+
+TOOL_TABLE_RENAMES = {"script_files": "scripts"}
+TOOL_TABLE_REMOVALS = {
+    "namespace_packages": """
+        Please migrate to implicit native namespaces instead.
+        See https://packaging.python.org/en/latest/guides/packaging-namespace-packages/.
+        """,
+}
+
+SETUPTOOLS_PATCHES = {
+    "long_description_content_type",
+    "project_urls",
+    "provides_extras",
+    "license_file",
+    "license_files",
+}
+
+_PREVIOUSLY_DEFINED = {
+    "name": _attrgetter("metadata.name"),
+    "version": _attrgetter("metadata.version"),
+    "description": _attrgetter("metadata.description"),
+    "readme": _attrgetter("metadata.long_description"),
+    "requires-python": _some_attrgetter("python_requires", "metadata.python_requires"),
+    "license": _attrgetter("metadata.license"),
+    "authors": _some_attrgetter("metadata.author", "metadata.author_email"),
+    "maintainers": _some_attrgetter("metadata.maintainer", "metadata.maintainer_email"),
+    "keywords": _attrgetter("metadata.keywords"),
+    "classifiers": _attrgetter("metadata.classifiers"),
+    "urls": _attrgetter("metadata.project_urls"),
+    "entry-points": _get_previous_entrypoints,
+    "scripts": _get_previous_scripts,
+    "gui-scripts": _get_previous_gui_scripts,
+    "dependencies": _attrgetter("install_requires"),
+    "optional-dependencies": _attrgetter("extras_require"),
+}
+
+
+_RESET_PREVIOUSLY_DEFINED: dict = {
+    # Fix improper setting: given in `setup.py`, but not listed in `dynamic`
+    # dict: pyproject name => value to which reset
+    "license": {},
+    "authors": [],
+    "maintainers": [],
+    "keywords": [],
+    "classifiers": [],
+    "urls": {},
+    "entry-points": {},
+    "scripts": {},
+    "gui-scripts": {},
+    "dependencies": [],
+    "optional-dependencies": {},
+}
+
+
+class _MissingDynamic(SetuptoolsWarning):
+    _SUMMARY = "`{field}` defined outside of `pyproject.toml` is ignored."
+
+    _DETAILS = """
+    The following seems to be defined outside of `pyproject.toml`:
+
+    `{field} = {value!r}`
+
+    According to the spec (see the link below), however, setuptools CANNOT
+    consider this value unless `{field}` is listed as `dynamic`.
+
+    https://packaging.python.org/en/latest/specifications/pyproject-toml/#declaring-project-metadata-the-project-table
+
+    To prevent this problem, you can list `{field}` under `dynamic` or alternatively
+    remove the `[project]` table from your file and rely entirely on other means of
+    configuration.
+    """
+    # TODO: Consider removing this check in the future?
+    #       There is a trade-off here between improving "debug-ability" and the cost
+    #       of running/testing/maintaining these unnecessary checks...
+
+    @classmethod
+    def details(cls, field: str, value: Any) -> str:
+        return cls._DETAILS.format(field=field, value=value)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f612bd51c4ac1d272318ed6fb96e4aeb57e3e27
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__init__.py
@@ -0,0 +1,34 @@
+from functools import reduce
+from typing import Any, Callable, Dict
+
+from . import formats
+from .error_reporting import detailed_errors, ValidationError
+from .extra_validations import EXTRA_VALIDATIONS
+from .fastjsonschema_exceptions import JsonSchemaException, JsonSchemaValueException
+from .fastjsonschema_validations import validate as _validate
+
+__all__ = [
+    "validate",
+    "FORMAT_FUNCTIONS",
+    "EXTRA_VALIDATIONS",
+    "ValidationError",
+    "JsonSchemaException",
+    "JsonSchemaValueException",
+]
+
+
+FORMAT_FUNCTIONS: Dict[str, Callable[[str], bool]] = {
+    fn.__name__.replace("_", "-"): fn
+    for fn in formats.__dict__.values()
+    if callable(fn) and not fn.__name__.startswith("_")
+}
+
+
+def validate(data: Any) -> bool:
+    """Validate the given ``data`` object using JSON Schema
+    This function raises ``ValidationError`` if ``data`` is invalid.
+    """
+    with detailed_errors():
+        _validate(data, custom_formats=FORMAT_FUNCTIONS)
+        reduce(lambda acc, fn: fn(acc), EXTRA_VALIDATIONS, data)
+    return True
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..046bdec4dfe7ace3169c238271f95024c9c26219
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/error_reporting.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/error_reporting.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9f5a373f7485ecc51d3d5f48d374c1020def070
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/error_reporting.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/extra_validations.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/extra_validations.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6cfd7c8dafb6356762761e3eac6cc0215a515c6f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/extra_validations.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/fastjsonschema_exceptions.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/fastjsonschema_exceptions.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..197ad77cc78f47cfe30bef4fae9f40d52ce0abdc
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/fastjsonschema_exceptions.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/formats.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/formats.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d70c85280774a8f87a41ed7be8b0f9a312c83479
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/__pycache__/formats.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/error_reporting.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/error_reporting.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6753604f5c95c2b0539bf394b97ecbc013e4818
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/error_reporting.py
@@ -0,0 +1,338 @@
+import io
+import json
+import logging
+import os
+import re
+import typing
+from contextlib import contextmanager
+from textwrap import indent, wrap
+from typing import Any, Dict, Generator, Iterator, List, Optional, Sequence, Union, cast
+
+from .fastjsonschema_exceptions import JsonSchemaValueException
+
+if typing.TYPE_CHECKING:
+    import sys
+
+    if sys.version_info < (3, 11):
+        from typing_extensions import Self
+    else:
+        from typing import Self
+
+_logger = logging.getLogger(__name__)
+
+_MESSAGE_REPLACEMENTS = {
+    "must be named by propertyName definition": "keys must be named by",
+    "one of contains definition": "at least one item that matches",
+    " same as const definition:": "",
+    "only specified items": "only items matching the definition",
+}
+
+_SKIP_DETAILS = (
+    "must not be empty",
+    "is always invalid",
+    "must not be there",
+)
+
+_NEED_DETAILS = {"anyOf", "oneOf", "allOf", "contains", "propertyNames", "not", "items"}
+
+_CAMEL_CASE_SPLITTER = re.compile(r"\W+|([A-Z][^A-Z\W]*)")
+_IDENTIFIER = re.compile(r"^[\w_]+$", re.I)
+
+_TOML_JARGON = {
+    "object": "table",
+    "property": "key",
+    "properties": "keys",
+    "property names": "keys",
+}
+
+_FORMATS_HELP = """
+For more details about `format` see
+https://validate-pyproject.readthedocs.io/en/latest/api/validate_pyproject.formats.html
+"""
+
+
+class ValidationError(JsonSchemaValueException):
+    """Report violations of a given JSON schema.
+
+    This class extends :exc:`~fastjsonschema.JsonSchemaValueException`
+    by adding the following properties:
+
+    - ``summary``: an improved version of the ``JsonSchemaValueException`` error message
+      with only the necessary information)
+
+    - ``details``: more contextual information about the error like the failing schema
+      itself and the value that violates the schema.
+
+    Depending on the level of the verbosity of the ``logging`` configuration
+    the exception message will be only ``summary`` (default) or a combination of
+    ``summary`` and ``details`` (when the logging level is set to :obj:`logging.DEBUG`).
+    """
+
+    summary = ""
+    details = ""
+    _original_message = ""
+
+    @classmethod
+    def _from_jsonschema(cls, ex: JsonSchemaValueException) -> "Self":
+        formatter = _ErrorFormatting(ex)
+        obj = cls(str(formatter), ex.value, formatter.name, ex.definition, ex.rule)
+        debug_code = os.getenv("JSONSCHEMA_DEBUG_CODE_GENERATION", "false").lower()
+        if debug_code != "false":  # pragma: no cover
+            obj.__cause__, obj.__traceback__ = ex.__cause__, ex.__traceback__
+        obj._original_message = ex.message
+        obj.summary = formatter.summary
+        obj.details = formatter.details
+        return obj
+
+
+@contextmanager
+def detailed_errors() -> Generator[None, None, None]:
+    try:
+        yield
+    except JsonSchemaValueException as ex:
+        raise ValidationError._from_jsonschema(ex) from None
+
+
+class _ErrorFormatting:
+    def __init__(self, ex: JsonSchemaValueException):
+        self.ex = ex
+        self.name = f"`{self._simplify_name(ex.name)}`"
+        self._original_message: str = self.ex.message.replace(ex.name, self.name)
+        self._summary = ""
+        self._details = ""
+
+    def __str__(self) -> str:
+        if _logger.getEffectiveLevel() <= logging.DEBUG and self.details:
+            return f"{self.summary}\n\n{self.details}"
+
+        return self.summary
+
+    @property
+    def summary(self) -> str:
+        if not self._summary:
+            self._summary = self._expand_summary()
+
+        return self._summary
+
+    @property
+    def details(self) -> str:
+        if not self._details:
+            self._details = self._expand_details()
+
+        return self._details
+
+    @staticmethod
+    def _simplify_name(name: str) -> str:
+        x = len("data.")
+        return name[x:] if name.startswith("data.") else name
+
+    def _expand_summary(self) -> str:
+        msg = self._original_message
+
+        for bad, repl in _MESSAGE_REPLACEMENTS.items():
+            msg = msg.replace(bad, repl)
+
+        if any(substring in msg for substring in _SKIP_DETAILS):
+            return msg
+
+        schema = self.ex.rule_definition
+        if self.ex.rule in _NEED_DETAILS and schema:
+            summary = _SummaryWriter(_TOML_JARGON)
+            return f"{msg}:\n\n{indent(summary(schema), '    ')}"
+
+        return msg
+
+    def _expand_details(self) -> str:
+        optional = []
+        definition = self.ex.definition or {}
+        desc_lines = definition.pop("$$description", [])
+        desc = definition.pop("description", None) or " ".join(desc_lines)
+        if desc:
+            description = "\n".join(
+                wrap(
+                    desc,
+                    width=80,
+                    initial_indent="    ",
+                    subsequent_indent="    ",
+                    break_long_words=False,
+                )
+            )
+            optional.append(f"DESCRIPTION:\n{description}")
+        schema = json.dumps(definition, indent=4)
+        value = json.dumps(self.ex.value, indent=4)
+        defaults = [
+            f"GIVEN VALUE:\n{indent(value, '    ')}",
+            f"OFFENDING RULE: {self.ex.rule!r}",
+            f"DEFINITION:\n{indent(schema, '    ')}",
+        ]
+        msg = "\n\n".join(optional + defaults)
+        epilog = f"\n{_FORMATS_HELP}" if "format" in msg.lower() else ""
+        return msg + epilog
+
+
+class _SummaryWriter:
+    _IGNORE = frozenset(("description", "default", "title", "examples"))
+
+    def __init__(self, jargon: Optional[Dict[str, str]] = None):
+        self.jargon: Dict[str, str] = jargon or {}
+        # Clarify confusing terms
+        self._terms = {
+            "anyOf": "at least one of the following",
+            "oneOf": "exactly one of the following",
+            "allOf": "all of the following",
+            "not": "(*NOT* the following)",
+            "prefixItems": f"{self._jargon('items')} (in order)",
+            "items": "items",
+            "contains": "contains at least one of",
+            "propertyNames": (
+                f"non-predefined acceptable {self._jargon('property names')}"
+            ),
+            "patternProperties": f"{self._jargon('properties')} named via pattern",
+            "const": "predefined value",
+            "enum": "one of",
+        }
+        # Attributes that indicate that the definition is easy and can be done
+        # inline (e.g. string and number)
+        self._guess_inline_defs = [
+            "enum",
+            "const",
+            "maxLength",
+            "minLength",
+            "pattern",
+            "format",
+            "minimum",
+            "maximum",
+            "exclusiveMinimum",
+            "exclusiveMaximum",
+            "multipleOf",
+        ]
+
+    def _jargon(self, term: Union[str, List[str]]) -> Union[str, List[str]]:
+        if isinstance(term, list):
+            return [self.jargon.get(t, t) for t in term]
+        return self.jargon.get(term, term)
+
+    def __call__(
+        self,
+        schema: Union[dict, List[dict]],
+        prefix: str = "",
+        *,
+        _path: Sequence[str] = (),
+    ) -> str:
+        if isinstance(schema, list):
+            return self._handle_list(schema, prefix, _path)
+
+        filtered = self._filter_unecessary(schema, _path)
+        simple = self._handle_simple_dict(filtered, _path)
+        if simple:
+            return f"{prefix}{simple}"
+
+        child_prefix = self._child_prefix(prefix, "  ")
+        item_prefix = self._child_prefix(prefix, "- ")
+        indent = len(prefix) * " "
+        with io.StringIO() as buffer:
+            for i, (key, value) in enumerate(filtered.items()):
+                child_path = [*_path, key]
+                line_prefix = prefix if i == 0 else indent
+                buffer.write(f"{line_prefix}{self._label(child_path)}:")
+                # ^  just the first item should receive the complete prefix
+                if isinstance(value, dict):
+                    filtered = self._filter_unecessary(value, child_path)
+                    simple = self._handle_simple_dict(filtered, child_path)
+                    buffer.write(
+                        f" {simple}"
+                        if simple
+                        else f"\n{self(value, child_prefix, _path=child_path)}"
+                    )
+                elif isinstance(value, list) and (
+                    key != "type" or self._is_property(child_path)
+                ):
+                    children = self._handle_list(value, item_prefix, child_path)
+                    sep = " " if children.startswith("[") else "\n"
+                    buffer.write(f"{sep}{children}")
+                else:
+                    buffer.write(f" {self._value(value, child_path)}\n")
+            return buffer.getvalue()
+
+    def _is_unecessary(self, path: Sequence[str]) -> bool:
+        if self._is_property(path) or not path:  # empty path => instruction @ root
+            return False
+        key = path[-1]
+        return any(key.startswith(k) for k in "$_") or key in self._IGNORE
+
+    def _filter_unecessary(
+        self, schema: Dict[str, Any], path: Sequence[str]
+    ) -> Dict[str, Any]:
+        return {
+            key: value
+            for key, value in schema.items()
+            if not self._is_unecessary([*path, key])
+        }
+
+    def _handle_simple_dict(self, value: dict, path: Sequence[str]) -> Optional[str]:
+        inline = any(p in value for p in self._guess_inline_defs)
+        simple = not any(isinstance(v, (list, dict)) for v in value.values())
+        if inline or simple:
+            return f"{{{', '.join(self._inline_attrs(value, path))}}}\n"
+        return None
+
+    def _handle_list(
+        self, schemas: list, prefix: str = "", path: Sequence[str] = ()
+    ) -> str:
+        if self._is_unecessary(path):
+            return ""
+
+        repr_ = repr(schemas)
+        if all(not isinstance(e, (dict, list)) for e in schemas) and len(repr_) < 60:
+            return f"{repr_}\n"
+
+        item_prefix = self._child_prefix(prefix, "- ")
+        return "".join(
+            self(v, item_prefix, _path=[*path, f"[{i}]"]) for i, v in enumerate(schemas)
+        )
+
+    def _is_property(self, path: Sequence[str]) -> bool:
+        """Check if the given path can correspond to an arbitrarily named property"""
+        counter = 0
+        for key in path[-2::-1]:
+            if key not in {"properties", "patternProperties"}:
+                break
+            counter += 1
+
+        # If the counter if even, the path correspond to a JSON Schema keyword
+        # otherwise it can be any arbitrary string naming a property
+        return counter % 2 == 1
+
+    def _label(self, path: Sequence[str]) -> str:
+        *parents, key = path
+        if not self._is_property(path):
+            norm_key = _separate_terms(key)
+            return self._terms.get(key) or " ".join(self._jargon(norm_key))
+
+        if parents[-1] == "patternProperties":
+            return f"(regex {key!r})"
+        return repr(key)  # property name
+
+    def _value(self, value: Any, path: Sequence[str]) -> str:
+        if path[-1] == "type" and not self._is_property(path):
+            type_ = self._jargon(value)
+            return (
+                f"[{', '.join(type_)}]" if isinstance(value, list) else cast(str, type_)
+            )
+        return repr(value)
+
+    def _inline_attrs(self, schema: dict, path: Sequence[str]) -> Iterator[str]:
+        for key, value in schema.items():
+            child_path = [*path, key]
+            yield f"{self._label(child_path)}: {self._value(value, child_path)}"
+
+    def _child_prefix(self, parent_prefix: str, child_prefix: str) -> str:
+        return len(parent_prefix) * " " + child_prefix
+
+
+def _separate_terms(word: str) -> List[str]:
+    """
+    >>> _separate_terms("FooBar-foo")
+    ['foo', 'bar', 'foo']
+    """
+    return [w.lower() for w in _CAMEL_CASE_SPLITTER.split(word) if w]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/extra_validations.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/extra_validations.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4ffe651dd0ca44e16fe69726c67a7932bd4b914
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/extra_validations.py
@@ -0,0 +1,52 @@
+"""The purpose of this module is implement PEP 621 validations that are
+difficult to express as a JSON Schema (or that are not supported by the current
+JSON Schema library).
+"""
+
+from inspect import cleandoc
+from typing import Mapping, TypeVar
+
+from .error_reporting import ValidationError
+
+T = TypeVar("T", bound=Mapping)
+
+
+class RedefiningStaticFieldAsDynamic(ValidationError):
+    _DESC = """According to PEP 621:
+
+    Build back-ends MUST raise an error if the metadata specifies a field
+    statically as well as being listed in dynamic.
+    """
+    __doc__ = _DESC
+    _URL = (
+        "https://packaging.python.org/en/latest/specifications/"
+        "pyproject-toml/#dynamic"
+    )
+
+
+def validate_project_dynamic(pyproject: T) -> T:
+    project_table = pyproject.get("project", {})
+    dynamic = project_table.get("dynamic", [])
+
+    for field in dynamic:
+        if field in project_table:
+            raise RedefiningStaticFieldAsDynamic(
+                message=f"You cannot provide a value for `project.{field}` and "
+                "list it under `project.dynamic` at the same time",
+                value={
+                    field: project_table[field],
+                    "...": " # ...",
+                    "dynamic": dynamic,
+                },
+                name=f"data.project.{field}",
+                definition={
+                    "description": cleandoc(RedefiningStaticFieldAsDynamic._DESC),
+                    "see": RedefiningStaticFieldAsDynamic._URL,
+                },
+                rule="PEP 621",
+            )
+
+    return pyproject
+
+
+EXTRA_VALIDATIONS = (validate_project_dynamic,)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/fastjsonschema_exceptions.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/fastjsonschema_exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2dddd6a106f021a4723c1e8f5953ccc09e55e1f
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/fastjsonschema_exceptions.py
@@ -0,0 +1,51 @@
+import re
+
+
+SPLIT_RE = re.compile(r'[\.\[\]]+')
+
+
+class JsonSchemaException(ValueError):
+    """
+    Base exception of ``fastjsonschema`` library.
+    """
+
+
+class JsonSchemaValueException(JsonSchemaException):
+    """
+    Exception raised by validation function. Available properties:
+
+     * ``message`` containing human-readable information what is wrong (e.g. ``data.property[index] must be smaller than or equal to 42``),
+     * invalid ``value`` (e.g. ``60``),
+     * ``name`` of a path in the data structure (e.g. ``data.property[index]``),
+     * ``path`` as an array in the data structure (e.g. ``['data', 'property', 'index']``),
+     * the whole ``definition`` which the ``value`` has to fulfil (e.g. ``{'type': 'number', 'maximum': 42}``),
+     * ``rule`` which the ``value`` is breaking (e.g. ``maximum``)
+     * and ``rule_definition`` (e.g. ``42``).
+
+    .. versionchanged:: 2.14.0
+        Added all extra properties.
+    """
+
+    def __init__(self, message, value=None, name=None, definition=None, rule=None):
+        super().__init__(message)
+        self.message = message
+        self.value = value
+        self.name = name
+        self.definition = definition
+        self.rule = rule
+
+    @property
+    def path(self):
+        return [item for item in SPLIT_RE.split(self.name) if item != '']
+
+    @property
+    def rule_definition(self):
+        if not self.rule or not self.definition:
+            return None
+        return self.definition.get(self.rule)
+
+
+class JsonSchemaDefinitionException(JsonSchemaException):
+    """
+    Exception raised by generator of validation function.
+    """
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/fastjsonschema_validations.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/fastjsonschema_validations.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c58a55ea84c6b13b7696e235ba92183be9fdf94
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/fastjsonschema_validations.py
@@ -0,0 +1,1104 @@
+# noqa
+# ruff: noqa
+# flake8: noqa
+# pylint: skip-file
+# mypy: ignore-errors
+# yapf: disable
+# pylama:skip=1
+
+
+# *** PLEASE DO NOT MODIFY DIRECTLY: Automatically generated code *** 
+
+
+VERSION = "2.19.1"
+from decimal import Decimal
+import re
+from .fastjsonschema_exceptions import JsonSchemaValueException
+
+
+REGEX_PATTERNS = {
+    '^.*$': re.compile('^.*$'),
+    '.+': re.compile('.+'),
+    '^.+$': re.compile('^.+$'),
+    'idn-email_re_pattern': re.compile('^[^@]+@[^@]+\\.[^@]+\\Z')
+}
+
+NoneType = type(None)
+
+def validate(data, custom_formats={}, name_prefix=None):
+    validate_https___packaging_python_org_en_latest_specifications_declaring_build_dependencies(data, custom_formats, (name_prefix or "data") + "")
+    return data
+
+def validate_https___packaging_python_org_en_latest_specifications_declaring_build_dependencies(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-build-dependencies/', 'title': 'Data structure for ``pyproject.toml`` files', '$$description': ['File format containing build-time configurations for the Python ecosystem. ', ':pep:`517` initially defined a build-system independent format for source trees', 'which was complemented by :pep:`518` to provide a way of specifying dependencies ', 'for building Python projects.', 'Please notice the ``project`` table (as initially defined in  :pep:`621`) is not included', 'in this schema and should be considered separately.'], 'type': 'object', 'additionalProperties': False, 'properties': {'build-system': {'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, 'project': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/pyproject-toml/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create command-line wrappers for the given', '`entry points `_.']}, 'gui-scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create GUI wrappers for the given', '`entry points `_.', 'The difference between ``scripts`` and ``gui-scripts`` is only relevant in', 'Windows.']}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$ref': '#/definitions/entry-point-group'}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$ref': '#/definitions/dependency'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$ref': '#/definitions/dependency'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', '    If the core metadata specification lists a field as "Required", then', '    the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', '    The required fields are: Metadata-Version, Name, Version.', '    All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, 'tool': {'type': 'object', 'properties': {'distutils': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/deprecated/distutils/configfile.html', 'title': '``tool.distutils`` table', '$$description': ['**EXPERIMENTAL** (NOT OFFICIALLY SUPPORTED): Use ``tool.distutils``', 'subtables to configure arguments for ``distutils`` commands.', 'Originally, ``distutils`` allowed developers to configure arguments for', '``setup.py`` commands via `distutils configuration files', '`_.', 'See also `the old Python docs _`.'], 'type': 'object', 'properties': {'global': {'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}}, 'patternProperties': {'.+': {'type': 'object'}}, '$comment': 'TODO: Is there a practical way of making this schema more specific?'}, 'setuptools': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html', 'title': '``tool.setuptools`` table', '$$description': ['``setuptools``-specific configurations that can be set by users that require', 'customization.', 'These configurations are completely optional and probably can be skipped when', 'creating simple packages. They are equivalent to some of the `Keywords', '`_', 'used by the ``setup.py`` file, and can be set via the ``tool.setuptools`` table.', 'It considers only ``setuptools`` `parameters', '`_', 'that are not covered by :pep:`621`; and intentionally excludes ``dependency_links``', 'and ``setup_requires`` (incompatible with modern workflows/standards).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'$$description': ['Whether the project can be safely installed and run from a zip file.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'boolean'}, 'script-files': {'$$description': ['Legacy way of defining scripts (entry-points are preferred).', 'Equivalent to the ``script`` keyword in ``setup.py``', '(it was renamed to avoid confusion with entry-point based ``project.scripts``', 'defined in :pep:`621`).', '**DISCOURAGED**: generic script wrappers are tricky and may not work properly.', 'Whenever possible, please use ``project.scripts`` instead.'], 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'$ref': '#/definitions/package-name'}}, {'$ref': '#/definitions/find-directive'}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'const': ''}, {'$ref': '#/definitions/package-name'}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html', 'description': '**DEPRECATED**: use implicit namespaces instead (:pep:`420`).'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['``dict``-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', '**DISCOURAGED**: please notice this might not work as expected with wheels.', 'Whenever possible, consider using data files inside the package directories', '(or create a new namespace package that only contains data files).', 'See `data files support', '`_.'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', '    cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['**PROVISIONAL**: list of glob patterns for all license files being distributed.', '(likely to become standard with :pep:`639`).', "By default: ``['LICEN[CS]E*', 'COPYING*', 'NOTICE*', 'AUTHORS*']``"], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.', 'Also ensure to set ``project.dynamic`` accordingly.'], 'oneOf': [{'$ref': '#/definitions/attr-directive'}, {'$ref': '#/definitions/file-directive'}]}, 'classifiers': {'$ref': '#/definitions/file-directive'}, 'description': {'$ref': '#/definitions/file-directive'}, 'entry-points': {'$ref': '#/definitions/file-directive'}, 'dependencies': {'$ref': '#/definitions/file-directive-for-dependencies'}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$ref': '#/definitions/file-directive-for-dependencies'}}}, 'readme': {'type': 'object', 'anyOf': [{'$ref': '#/definitions/file-directive'}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'$ref': '#/definitions/file-directive/properties/file'}}, 'additionalProperties': False}], 'required': ['file']}}}}, 'definitions': {'package-name': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}, 'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'file-directive-for-dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}}}}, 'project': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/pyproject-toml/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create command-line wrappers for the given', '`entry points `_.']}, 'gui-scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create GUI wrappers for the given', '`entry points `_.', 'The difference between ``scripts`` and ``gui-scripts`` is only relevant in', 'Windows.']}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$ref': '#/definitions/entry-point-group'}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$ref': '#/definitions/dependency'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$ref': '#/definitions/dependency'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', '    If the core metadata specification lists a field as "Required", then', '    the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', '    The required fields are: Metadata-Version, Name, Version.', '    All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data_keys = set(data.keys())
+        if "build-system" in data_keys:
+            data_keys.remove("build-system")
+            data__buildsystem = data["build-system"]
+            if not isinstance(data__buildsystem, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system must be object", value=data__buildsystem, name="" + (name_prefix or "data") + ".build-system", definition={'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, rule='type')
+            data__buildsystem_is_dict = isinstance(data__buildsystem, dict)
+            if data__buildsystem_is_dict:
+                data__buildsystem__missing_keys = set(['requires']) - data__buildsystem.keys()
+                if data__buildsystem__missing_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system must contain " + (str(sorted(data__buildsystem__missing_keys)) + " properties"), value=data__buildsystem, name="" + (name_prefix or "data") + ".build-system", definition={'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, rule='required')
+                data__buildsystem_keys = set(data__buildsystem.keys())
+                if "requires" in data__buildsystem_keys:
+                    data__buildsystem_keys.remove("requires")
+                    data__buildsystem__requires = data__buildsystem["requires"]
+                    if not isinstance(data__buildsystem__requires, (list, tuple)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.requires must be array", value=data__buildsystem__requires, name="" + (name_prefix or "data") + ".build-system.requires", definition={'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, rule='type')
+                    data__buildsystem__requires_is_list = isinstance(data__buildsystem__requires, (list, tuple))
+                    if data__buildsystem__requires_is_list:
+                        data__buildsystem__requires_len = len(data__buildsystem__requires)
+                        for data__buildsystem__requires_x, data__buildsystem__requires_item in enumerate(data__buildsystem__requires):
+                            if not isinstance(data__buildsystem__requires_item, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.requires[{data__buildsystem__requires_x}]".format(**locals()) + " must be string", value=data__buildsystem__requires_item, name="" + (name_prefix or "data") + ".build-system.requires[{data__buildsystem__requires_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+                if "build-backend" in data__buildsystem_keys:
+                    data__buildsystem_keys.remove("build-backend")
+                    data__buildsystem__buildbackend = data__buildsystem["build-backend"]
+                    if not isinstance(data__buildsystem__buildbackend, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.build-backend must be string", value=data__buildsystem__buildbackend, name="" + (name_prefix or "data") + ".build-system.build-backend", definition={'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, rule='type')
+                    if isinstance(data__buildsystem__buildbackend, str):
+                        if not custom_formats["pep517-backend-reference"](data__buildsystem__buildbackend):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.build-backend must be pep517-backend-reference", value=data__buildsystem__buildbackend, name="" + (name_prefix or "data") + ".build-system.build-backend", definition={'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, rule='format')
+                if "backend-path" in data__buildsystem_keys:
+                    data__buildsystem_keys.remove("backend-path")
+                    data__buildsystem__backendpath = data__buildsystem["backend-path"]
+                    if not isinstance(data__buildsystem__backendpath, (list, tuple)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.backend-path must be array", value=data__buildsystem__backendpath, name="" + (name_prefix or "data") + ".build-system.backend-path", definition={'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}, rule='type')
+                    data__buildsystem__backendpath_is_list = isinstance(data__buildsystem__backendpath, (list, tuple))
+                    if data__buildsystem__backendpath_is_list:
+                        data__buildsystem__backendpath_len = len(data__buildsystem__backendpath)
+                        for data__buildsystem__backendpath_x, data__buildsystem__backendpath_item in enumerate(data__buildsystem__backendpath):
+                            if not isinstance(data__buildsystem__backendpath_item, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system.backend-path[{data__buildsystem__backendpath_x}]".format(**locals()) + " must be string", value=data__buildsystem__backendpath_item, name="" + (name_prefix or "data") + ".build-system.backend-path[{data__buildsystem__backendpath_x}]".format(**locals()) + "", definition={'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}, rule='type')
+                if data__buildsystem_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".build-system must not contain "+str(data__buildsystem_keys)+" properties", value=data__buildsystem, name="" + (name_prefix or "data") + ".build-system", definition={'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, rule='additionalProperties')
+        if "project" in data_keys:
+            data_keys.remove("project")
+            data__project = data["project"]
+            validate_https___packaging_python_org_en_latest_specifications_pyproject_toml(data__project, custom_formats, (name_prefix or "data") + ".project")
+        if "tool" in data_keys:
+            data_keys.remove("tool")
+            data__tool = data["tool"]
+            if not isinstance(data__tool, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".tool must be object", value=data__tool, name="" + (name_prefix or "data") + ".tool", definition={'type': 'object', 'properties': {'distutils': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/deprecated/distutils/configfile.html', 'title': '``tool.distutils`` table', '$$description': ['**EXPERIMENTAL** (NOT OFFICIALLY SUPPORTED): Use ``tool.distutils``', 'subtables to configure arguments for ``distutils`` commands.', 'Originally, ``distutils`` allowed developers to configure arguments for', '``setup.py`` commands via `distutils configuration files', '`_.', 'See also `the old Python docs _`.'], 'type': 'object', 'properties': {'global': {'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}}, 'patternProperties': {'.+': {'type': 'object'}}, '$comment': 'TODO: Is there a practical way of making this schema more specific?'}, 'setuptools': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html', 'title': '``tool.setuptools`` table', '$$description': ['``setuptools``-specific configurations that can be set by users that require', 'customization.', 'These configurations are completely optional and probably can be skipped when', 'creating simple packages. They are equivalent to some of the `Keywords', '`_', 'used by the ``setup.py`` file, and can be set via the ``tool.setuptools`` table.', 'It considers only ``setuptools`` `parameters', '`_', 'that are not covered by :pep:`621`; and intentionally excludes ``dependency_links``', 'and ``setup_requires`` (incompatible with modern workflows/standards).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'$$description': ['Whether the project can be safely installed and run from a zip file.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'boolean'}, 'script-files': {'$$description': ['Legacy way of defining scripts (entry-points are preferred).', 'Equivalent to the ``script`` keyword in ``setup.py``', '(it was renamed to avoid confusion with entry-point based ``project.scripts``', 'defined in :pep:`621`).', '**DISCOURAGED**: generic script wrappers are tricky and may not work properly.', 'Whenever possible, please use ``project.scripts`` instead.'], 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'$ref': '#/definitions/package-name'}}, {'$ref': '#/definitions/find-directive'}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'const': ''}, {'$ref': '#/definitions/package-name'}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html', 'description': '**DEPRECATED**: use implicit namespaces instead (:pep:`420`).'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['``dict``-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', '**DISCOURAGED**: please notice this might not work as expected with wheels.', 'Whenever possible, consider using data files inside the package directories', '(or create a new namespace package that only contains data files).', 'See `data files support', '`_.'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', '    cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['**PROVISIONAL**: list of glob patterns for all license files being distributed.', '(likely to become standard with :pep:`639`).', "By default: ``['LICEN[CS]E*', 'COPYING*', 'NOTICE*', 'AUTHORS*']``"], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.', 'Also ensure to set ``project.dynamic`` accordingly.'], 'oneOf': [{'$ref': '#/definitions/attr-directive'}, {'$ref': '#/definitions/file-directive'}]}, 'classifiers': {'$ref': '#/definitions/file-directive'}, 'description': {'$ref': '#/definitions/file-directive'}, 'entry-points': {'$ref': '#/definitions/file-directive'}, 'dependencies': {'$ref': '#/definitions/file-directive-for-dependencies'}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$ref': '#/definitions/file-directive-for-dependencies'}}}, 'readme': {'type': 'object', 'anyOf': [{'$ref': '#/definitions/file-directive'}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'$ref': '#/definitions/file-directive/properties/file'}}, 'additionalProperties': False}], 'required': ['file']}}}}, 'definitions': {'package-name': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}, 'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'file-directive-for-dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}}}, rule='type')
+            data__tool_is_dict = isinstance(data__tool, dict)
+            if data__tool_is_dict:
+                data__tool_keys = set(data__tool.keys())
+                if "distutils" in data__tool_keys:
+                    data__tool_keys.remove("distutils")
+                    data__tool__distutils = data__tool["distutils"]
+                    validate_https___setuptools_pypa_io_en_latest_deprecated_distutils_configfile_html(data__tool__distutils, custom_formats, (name_prefix or "data") + ".tool.distutils")
+                if "setuptools" in data__tool_keys:
+                    data__tool_keys.remove("setuptools")
+                    data__tool__setuptools = data__tool["setuptools"]
+                    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html(data__tool__setuptools, custom_formats, (name_prefix or "data") + ".tool.setuptools")
+        if data_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/declaring-build-dependencies/', 'title': 'Data structure for ``pyproject.toml`` files', '$$description': ['File format containing build-time configurations for the Python ecosystem. ', ':pep:`517` initially defined a build-system independent format for source trees', 'which was complemented by :pep:`518` to provide a way of specifying dependencies ', 'for building Python projects.', 'Please notice the ``project`` table (as initially defined in  :pep:`621`) is not included', 'in this schema and should be considered separately.'], 'type': 'object', 'additionalProperties': False, 'properties': {'build-system': {'type': 'object', 'description': 'Table used to store build-related data', 'additionalProperties': False, 'properties': {'requires': {'type': 'array', '$$description': ['List of dependencies in the :pep:`508` format required to execute the build', 'system. Please notice that the resulting dependency graph', '**MUST NOT contain cycles**'], 'items': {'type': 'string'}}, 'build-backend': {'type': 'string', 'description': 'Python object that will be used to perform the build according to :pep:`517`', 'format': 'pep517-backend-reference'}, 'backend-path': {'type': 'array', '$$description': ['List of directories to be prepended to ``sys.path`` when loading the', 'back-end, and running its hooks'], 'items': {'type': 'string', '$comment': 'Should be a path (TODO: enforce it with format?)'}}}, 'required': ['requires']}, 'project': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/pyproject-toml/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create command-line wrappers for the given', '`entry points `_.']}, 'gui-scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create GUI wrappers for the given', '`entry points `_.', 'The difference between ``scripts`` and ``gui-scripts`` is only relevant in', 'Windows.']}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$ref': '#/definitions/entry-point-group'}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$ref': '#/definitions/dependency'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$ref': '#/definitions/dependency'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', '    If the core metadata specification lists a field as "Required", then', '    the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', '    The required fields are: Metadata-Version, Name, Version.', '    All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, 'tool': {'type': 'object', 'properties': {'distutils': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/deprecated/distutils/configfile.html', 'title': '``tool.distutils`` table', '$$description': ['**EXPERIMENTAL** (NOT OFFICIALLY SUPPORTED): Use ``tool.distutils``', 'subtables to configure arguments for ``distutils`` commands.', 'Originally, ``distutils`` allowed developers to configure arguments for', '``setup.py`` commands via `distutils configuration files', '`_.', 'See also `the old Python docs _`.'], 'type': 'object', 'properties': {'global': {'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}}, 'patternProperties': {'.+': {'type': 'object'}}, '$comment': 'TODO: Is there a practical way of making this schema more specific?'}, 'setuptools': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html', 'title': '``tool.setuptools`` table', '$$description': ['``setuptools``-specific configurations that can be set by users that require', 'customization.', 'These configurations are completely optional and probably can be skipped when', 'creating simple packages. They are equivalent to some of the `Keywords', '`_', 'used by the ``setup.py`` file, and can be set via the ``tool.setuptools`` table.', 'It considers only ``setuptools`` `parameters', '`_', 'that are not covered by :pep:`621`; and intentionally excludes ``dependency_links``', 'and ``setup_requires`` (incompatible with modern workflows/standards).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'$$description': ['Whether the project can be safely installed and run from a zip file.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'boolean'}, 'script-files': {'$$description': ['Legacy way of defining scripts (entry-points are preferred).', 'Equivalent to the ``script`` keyword in ``setup.py``', '(it was renamed to avoid confusion with entry-point based ``project.scripts``', 'defined in :pep:`621`).', '**DISCOURAGED**: generic script wrappers are tricky and may not work properly.', 'Whenever possible, please use ``project.scripts`` instead.'], 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'$ref': '#/definitions/package-name'}}, {'$ref': '#/definitions/find-directive'}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'const': ''}, {'$ref': '#/definitions/package-name'}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html', 'description': '**DEPRECATED**: use implicit namespaces instead (:pep:`420`).'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['``dict``-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', '**DISCOURAGED**: please notice this might not work as expected with wheels.', 'Whenever possible, consider using data files inside the package directories', '(or create a new namespace package that only contains data files).', 'See `data files support', '`_.'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', '    cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['**PROVISIONAL**: list of glob patterns for all license files being distributed.', '(likely to become standard with :pep:`639`).', "By default: ``['LICEN[CS]E*', 'COPYING*', 'NOTICE*', 'AUTHORS*']``"], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.', 'Also ensure to set ``project.dynamic`` accordingly.'], 'oneOf': [{'$ref': '#/definitions/attr-directive'}, {'$ref': '#/definitions/file-directive'}]}, 'classifiers': {'$ref': '#/definitions/file-directive'}, 'description': {'$ref': '#/definitions/file-directive'}, 'entry-points': {'$ref': '#/definitions/file-directive'}, 'dependencies': {'$ref': '#/definitions/file-directive-for-dependencies'}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'$ref': '#/definitions/file-directive-for-dependencies'}}}, 'readme': {'type': 'object', 'anyOf': [{'$ref': '#/definitions/file-directive'}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'$ref': '#/definitions/file-directive/properties/file'}}, 'additionalProperties': False}], 'required': ['file']}}}}, 'definitions': {'package-name': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}, 'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'file-directive-for-dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}}}}, 'project': {'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/pyproject-toml/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$ref': '#/definitions/author'}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create command-line wrappers for the given', '`entry points `_.']}, 'gui-scripts': {'$ref': '#/definitions/entry-point-group', '$$description': ['Instruct the installer to create GUI wrappers for the given', '`entry points `_.', 'The difference between ``scripts`` and ``gui-scripts`` is only relevant in', 'Windows.']}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$ref': '#/definitions/entry-point-group'}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$ref': '#/definitions/dependency'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$ref': '#/definitions/dependency'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', '    If the core metadata specification lists a field as "Required", then', '    the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', '    The required fields are: Metadata-Version, Name, Version.', '    All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='additionalProperties')
+    return data
+
+def validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html', 'title': '``tool.setuptools`` table', '$$description': ['``setuptools``-specific configurations that can be set by users that require', 'customization.', 'These configurations are completely optional and probably can be skipped when', 'creating simple packages. They are equivalent to some of the `Keywords', '`_', 'used by the ``setup.py`` file, and can be set via the ``tool.setuptools`` table.', 'It considers only ``setuptools`` `parameters', '`_', 'that are not covered by :pep:`621`; and intentionally excludes ``dependency_links``', 'and ``setup_requires`` (incompatible with modern workflows/standards).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'$$description': ['Whether the project can be safely installed and run from a zip file.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'boolean'}, 'script-files': {'$$description': ['Legacy way of defining scripts (entry-points are preferred).', 'Equivalent to the ``script`` keyword in ``setup.py``', '(it was renamed to avoid confusion with entry-point based ``project.scripts``', 'defined in :pep:`621`).', '**DISCOURAGED**: generic script wrappers are tricky and may not work properly.', 'Whenever possible, please use ``project.scripts`` instead.'], 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}}, {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'const': ''}, {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html', 'description': '**DEPRECATED**: use implicit namespaces instead (:pep:`420`).'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['``dict``-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', '**DISCOURAGED**: please notice this might not work as expected with wheels.', 'Whenever possible, consider using data files inside the package directories', '(or create a new namespace package that only contains data files).', 'See `data files support', '`_.'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', '    cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['**PROVISIONAL**: list of glob patterns for all license files being distributed.', '(likely to become standard with :pep:`639`).', "By default: ``['LICEN[CS]E*', 'COPYING*', 'NOTICE*', 'AUTHORS*']``"], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.', 'Also ensure to set ``project.dynamic`` accordingly.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'classifiers': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'description': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'entry-points': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}}}, 'readme': {'type': 'object', 'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}], 'required': ['file']}}}}, 'definitions': {'package-name': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}, 'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'file-directive-for-dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data_keys = set(data.keys())
+        if "platforms" in data_keys:
+            data_keys.remove("platforms")
+            data__platforms = data["platforms"]
+            if not isinstance(data__platforms, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".platforms must be array", value=data__platforms, name="" + (name_prefix or "data") + ".platforms", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type')
+            data__platforms_is_list = isinstance(data__platforms, (list, tuple))
+            if data__platforms_is_list:
+                data__platforms_len = len(data__platforms)
+                for data__platforms_x, data__platforms_item in enumerate(data__platforms):
+                    if not isinstance(data__platforms_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".platforms[{data__platforms_x}]".format(**locals()) + " must be string", value=data__platforms_item, name="" + (name_prefix or "data") + ".platforms[{data__platforms_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+        if "provides" in data_keys:
+            data_keys.remove("provides")
+            data__provides = data["provides"]
+            if not isinstance(data__provides, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".provides must be array", value=data__provides, name="" + (name_prefix or "data") + ".provides", definition={'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, rule='type')
+            data__provides_is_list = isinstance(data__provides, (list, tuple))
+            if data__provides_is_list:
+                data__provides_len = len(data__provides)
+                for data__provides_x, data__provides_item in enumerate(data__provides):
+                    if not isinstance(data__provides_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".provides[{data__provides_x}]".format(**locals()) + " must be string", value=data__provides_item, name="" + (name_prefix or "data") + ".provides[{data__provides_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='type')
+                    if isinstance(data__provides_item, str):
+                        if not custom_formats["pep508-identifier"](data__provides_item):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".provides[{data__provides_x}]".format(**locals()) + " must be pep508-identifier", value=data__provides_item, name="" + (name_prefix or "data") + ".provides[{data__provides_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='format')
+        if "obsoletes" in data_keys:
+            data_keys.remove("obsoletes")
+            data__obsoletes = data["obsoletes"]
+            if not isinstance(data__obsoletes, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".obsoletes must be array", value=data__obsoletes, name="" + (name_prefix or "data") + ".obsoletes", definition={'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, rule='type')
+            data__obsoletes_is_list = isinstance(data__obsoletes, (list, tuple))
+            if data__obsoletes_is_list:
+                data__obsoletes_len = len(data__obsoletes)
+                for data__obsoletes_x, data__obsoletes_item in enumerate(data__obsoletes):
+                    if not isinstance(data__obsoletes_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".obsoletes[{data__obsoletes_x}]".format(**locals()) + " must be string", value=data__obsoletes_item, name="" + (name_prefix or "data") + ".obsoletes[{data__obsoletes_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='type')
+                    if isinstance(data__obsoletes_item, str):
+                        if not custom_formats["pep508-identifier"](data__obsoletes_item):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".obsoletes[{data__obsoletes_x}]".format(**locals()) + " must be pep508-identifier", value=data__obsoletes_item, name="" + (name_prefix or "data") + ".obsoletes[{data__obsoletes_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='format')
+        if "zip-safe" in data_keys:
+            data_keys.remove("zip-safe")
+            data__zipsafe = data["zip-safe"]
+            if not isinstance(data__zipsafe, (bool)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".zip-safe must be boolean", value=data__zipsafe, name="" + (name_prefix or "data") + ".zip-safe", definition={'$$description': ['Whether the project can be safely installed and run from a zip file.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'boolean'}, rule='type')
+        if "script-files" in data_keys:
+            data_keys.remove("script-files")
+            data__scriptfiles = data["script-files"]
+            if not isinstance(data__scriptfiles, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".script-files must be array", value=data__scriptfiles, name="" + (name_prefix or "data") + ".script-files", definition={'$$description': ['Legacy way of defining scripts (entry-points are preferred).', 'Equivalent to the ``script`` keyword in ``setup.py``', '(it was renamed to avoid confusion with entry-point based ``project.scripts``', 'defined in :pep:`621`).', '**DISCOURAGED**: generic script wrappers are tricky and may not work properly.', 'Whenever possible, please use ``project.scripts`` instead.'], 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, rule='type')
+            data__scriptfiles_is_list = isinstance(data__scriptfiles, (list, tuple))
+            if data__scriptfiles_is_list:
+                data__scriptfiles_len = len(data__scriptfiles)
+                for data__scriptfiles_x, data__scriptfiles_item in enumerate(data__scriptfiles):
+                    if not isinstance(data__scriptfiles_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".script-files[{data__scriptfiles_x}]".format(**locals()) + " must be string", value=data__scriptfiles_item, name="" + (name_prefix or "data") + ".script-files[{data__scriptfiles_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+        if "eager-resources" in data_keys:
+            data_keys.remove("eager-resources")
+            data__eagerresources = data["eager-resources"]
+            if not isinstance(data__eagerresources, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".eager-resources must be array", value=data__eagerresources, name="" + (name_prefix or "data") + ".eager-resources", definition={'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'array', 'items': {'type': 'string'}}, rule='type')
+            data__eagerresources_is_list = isinstance(data__eagerresources, (list, tuple))
+            if data__eagerresources_is_list:
+                data__eagerresources_len = len(data__eagerresources)
+                for data__eagerresources_x, data__eagerresources_item in enumerate(data__eagerresources):
+                    if not isinstance(data__eagerresources_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".eager-resources[{data__eagerresources_x}]".format(**locals()) + " must be string", value=data__eagerresources_item, name="" + (name_prefix or "data") + ".eager-resources[{data__eagerresources_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+        if "packages" in data_keys:
+            data_keys.remove("packages")
+            data__packages = data["packages"]
+            data__packages_one_of_count1 = 0
+            if data__packages_one_of_count1 < 2:
+                try:
+                    if not isinstance(data__packages, (list, tuple)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".packages must be array", value=data__packages, name="" + (name_prefix or "data") + ".packages", definition={'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}}, rule='type')
+                    data__packages_is_list = isinstance(data__packages, (list, tuple))
+                    if data__packages_is_list:
+                        data__packages_len = len(data__packages)
+                        for data__packages_x, data__packages_item in enumerate(data__packages):
+                            validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_package_name(data__packages_item, custom_formats, (name_prefix or "data") + ".packages[{data__packages_x}]".format(**locals()))
+                    data__packages_one_of_count1 += 1
+                except JsonSchemaValueException: pass
+            if data__packages_one_of_count1 < 2:
+                try:
+                    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_find_directive(data__packages, custom_formats, (name_prefix or "data") + ".packages")
+                    data__packages_one_of_count1 += 1
+                except JsonSchemaValueException: pass
+            if data__packages_one_of_count1 != 1:
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".packages must be valid exactly by one definition" + (" (" + str(data__packages_one_of_count1) + " matches found)"), value=data__packages, name="" + (name_prefix or "data") + ".packages", definition={'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}}, {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}]}, rule='oneOf')
+        if "package-dir" in data_keys:
+            data_keys.remove("package-dir")
+            data__packagedir = data["package-dir"]
+            if not isinstance(data__packagedir, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must be object", value=data__packagedir, name="" + (name_prefix or "data") + ".package-dir", definition={'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'const': ''}, {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, rule='type')
+            data__packagedir_is_dict = isinstance(data__packagedir, dict)
+            if data__packagedir_is_dict:
+                data__packagedir_keys = set(data__packagedir.keys())
+                for data__packagedir_key, data__packagedir_val in data__packagedir.items():
+                    if REGEX_PATTERNS['^.*$'].search(data__packagedir_key):
+                        if data__packagedir_key in data__packagedir_keys:
+                            data__packagedir_keys.remove(data__packagedir_key)
+                        if not isinstance(data__packagedir_val, (str)):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir.{data__packagedir_key}".format(**locals()) + " must be string", value=data__packagedir_val, name="" + (name_prefix or "data") + ".package-dir.{data__packagedir_key}".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+                if data__packagedir_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must not contain "+str(data__packagedir_keys)+" properties", value=data__packagedir, name="" + (name_prefix or "data") + ".package-dir", definition={'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'const': ''}, {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, rule='additionalProperties')
+                data__packagedir_len = len(data__packagedir)
+                if data__packagedir_len != 0:
+                    data__packagedir_property_names = True
+                    for data__packagedir_key in data__packagedir:
+                        try:
+                            data__packagedir_key_any_of_count2 = 0
+                            if not data__packagedir_key_any_of_count2:
+                                try:
+                                    if data__packagedir_key != "":
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must be same as const definition: ", value=data__packagedir_key, name="" + (name_prefix or "data") + ".package-dir", definition={'const': ''}, rule='const')
+                                    data__packagedir_key_any_of_count2 += 1
+                                except JsonSchemaValueException: pass
+                            if not data__packagedir_key_any_of_count2:
+                                try:
+                                    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_package_name(data__packagedir_key, custom_formats, (name_prefix or "data") + ".package-dir")
+                                    data__packagedir_key_any_of_count2 += 1
+                                except JsonSchemaValueException: pass
+                            if not data__packagedir_key_any_of_count2:
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir cannot be validated by any definition", value=data__packagedir_key, name="" + (name_prefix or "data") + ".package-dir", definition={'anyOf': [{'const': ''}, {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}]}, rule='anyOf')
+                        except JsonSchemaValueException:
+                            data__packagedir_property_names = False
+                    if not data__packagedir_property_names:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-dir must be named by propertyName definition", value=data__packagedir, name="" + (name_prefix or "data") + ".package-dir", definition={'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'const': ''}, {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, rule='propertyNames')
+        if "package-data" in data_keys:
+            data_keys.remove("package-data")
+            data__packagedata = data["package-data"]
+            if not isinstance(data__packagedata, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be object", value=data__packagedata, name="" + (name_prefix or "data") + ".package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='type')
+            data__packagedata_is_dict = isinstance(data__packagedata, dict)
+            if data__packagedata_is_dict:
+                data__packagedata_keys = set(data__packagedata.keys())
+                for data__packagedata_key, data__packagedata_val in data__packagedata.items():
+                    if REGEX_PATTERNS['^.*$'].search(data__packagedata_key):
+                        if data__packagedata_key in data__packagedata_keys:
+                            data__packagedata_keys.remove(data__packagedata_key)
+                        if not isinstance(data__packagedata_val, (list, tuple)):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data.{data__packagedata_key}".format(**locals()) + " must be array", value=data__packagedata_val, name="" + (name_prefix or "data") + ".package-data.{data__packagedata_key}".format(**locals()) + "", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type')
+                        data__packagedata_val_is_list = isinstance(data__packagedata_val, (list, tuple))
+                        if data__packagedata_val_is_list:
+                            data__packagedata_val_len = len(data__packagedata_val)
+                            for data__packagedata_val_x, data__packagedata_val_item in enumerate(data__packagedata_val):
+                                if not isinstance(data__packagedata_val_item, (str)):
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data.{data__packagedata_key}[{data__packagedata_val_x}]".format(**locals()) + " must be string", value=data__packagedata_val_item, name="" + (name_prefix or "data") + ".package-data.{data__packagedata_key}[{data__packagedata_val_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+                if data__packagedata_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must not contain "+str(data__packagedata_keys)+" properties", value=data__packagedata, name="" + (name_prefix or "data") + ".package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='additionalProperties')
+                data__packagedata_len = len(data__packagedata)
+                if data__packagedata_len != 0:
+                    data__packagedata_property_names = True
+                    for data__packagedata_key in data__packagedata:
+                        try:
+                            data__packagedata_key_any_of_count3 = 0
+                            if not data__packagedata_key_any_of_count3:
+                                try:
+                                    if not isinstance(data__packagedata_key, (str)):
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be string", value=data__packagedata_key, name="" + (name_prefix or "data") + ".package-data", definition={'type': 'string', 'format': 'python-module-name'}, rule='type')
+                                    if isinstance(data__packagedata_key, str):
+                                        if not custom_formats["python-module-name"](data__packagedata_key):
+                                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be python-module-name", value=data__packagedata_key, name="" + (name_prefix or "data") + ".package-data", definition={'type': 'string', 'format': 'python-module-name'}, rule='format')
+                                    data__packagedata_key_any_of_count3 += 1
+                                except JsonSchemaValueException: pass
+                            if not data__packagedata_key_any_of_count3:
+                                try:
+                                    if data__packagedata_key != "*":
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be same as const definition: *", value=data__packagedata_key, name="" + (name_prefix or "data") + ".package-data", definition={'const': '*'}, rule='const')
+                                    data__packagedata_key_any_of_count3 += 1
+                                except JsonSchemaValueException: pass
+                            if not data__packagedata_key_any_of_count3:
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data cannot be validated by any definition", value=data__packagedata_key, name="" + (name_prefix or "data") + ".package-data", definition={'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, rule='anyOf')
+                        except JsonSchemaValueException:
+                            data__packagedata_property_names = False
+                    if not data__packagedata_property_names:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".package-data must be named by propertyName definition", value=data__packagedata, name="" + (name_prefix or "data") + ".package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='propertyNames')
+        if "include-package-data" in data_keys:
+            data_keys.remove("include-package-data")
+            data__includepackagedata = data["include-package-data"]
+            if not isinstance(data__includepackagedata, (bool)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".include-package-data must be boolean", value=data__includepackagedata, name="" + (name_prefix or "data") + ".include-package-data", definition={'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, rule='type')
+        if "exclude-package-data" in data_keys:
+            data_keys.remove("exclude-package-data")
+            data__excludepackagedata = data["exclude-package-data"]
+            if not isinstance(data__excludepackagedata, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be object", value=data__excludepackagedata, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='type')
+            data__excludepackagedata_is_dict = isinstance(data__excludepackagedata, dict)
+            if data__excludepackagedata_is_dict:
+                data__excludepackagedata_keys = set(data__excludepackagedata.keys())
+                for data__excludepackagedata_key, data__excludepackagedata_val in data__excludepackagedata.items():
+                    if REGEX_PATTERNS['^.*$'].search(data__excludepackagedata_key):
+                        if data__excludepackagedata_key in data__excludepackagedata_keys:
+                            data__excludepackagedata_keys.remove(data__excludepackagedata_key)
+                        if not isinstance(data__excludepackagedata_val, (list, tuple)):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data.{data__excludepackagedata_key}".format(**locals()) + " must be array", value=data__excludepackagedata_val, name="" + (name_prefix or "data") + ".exclude-package-data.{data__excludepackagedata_key}".format(**locals()) + "", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type')
+                        data__excludepackagedata_val_is_list = isinstance(data__excludepackagedata_val, (list, tuple))
+                        if data__excludepackagedata_val_is_list:
+                            data__excludepackagedata_val_len = len(data__excludepackagedata_val)
+                            for data__excludepackagedata_val_x, data__excludepackagedata_val_item in enumerate(data__excludepackagedata_val):
+                                if not isinstance(data__excludepackagedata_val_item, (str)):
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data.{data__excludepackagedata_key}[{data__excludepackagedata_val_x}]".format(**locals()) + " must be string", value=data__excludepackagedata_val_item, name="" + (name_prefix or "data") + ".exclude-package-data.{data__excludepackagedata_key}[{data__excludepackagedata_val_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+                if data__excludepackagedata_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must not contain "+str(data__excludepackagedata_keys)+" properties", value=data__excludepackagedata, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='additionalProperties')
+                data__excludepackagedata_len = len(data__excludepackagedata)
+                if data__excludepackagedata_len != 0:
+                    data__excludepackagedata_property_names = True
+                    for data__excludepackagedata_key in data__excludepackagedata:
+                        try:
+                            data__excludepackagedata_key_any_of_count4 = 0
+                            if not data__excludepackagedata_key_any_of_count4:
+                                try:
+                                    if not isinstance(data__excludepackagedata_key, (str)):
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be string", value=data__excludepackagedata_key, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'type': 'string', 'format': 'python-module-name'}, rule='type')
+                                    if isinstance(data__excludepackagedata_key, str):
+                                        if not custom_formats["python-module-name"](data__excludepackagedata_key):
+                                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be python-module-name", value=data__excludepackagedata_key, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'type': 'string', 'format': 'python-module-name'}, rule='format')
+                                    data__excludepackagedata_key_any_of_count4 += 1
+                                except JsonSchemaValueException: pass
+                            if not data__excludepackagedata_key_any_of_count4:
+                                try:
+                                    if data__excludepackagedata_key != "*":
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be same as const definition: *", value=data__excludepackagedata_key, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'const': '*'}, rule='const')
+                                    data__excludepackagedata_key_any_of_count4 += 1
+                                except JsonSchemaValueException: pass
+                            if not data__excludepackagedata_key_any_of_count4:
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data cannot be validated by any definition", value=data__excludepackagedata_key, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, rule='anyOf')
+                        except JsonSchemaValueException:
+                            data__excludepackagedata_property_names = False
+                    if not data__excludepackagedata_property_names:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".exclude-package-data must be named by propertyName definition", value=data__excludepackagedata, name="" + (name_prefix or "data") + ".exclude-package-data", definition={'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='propertyNames')
+        if "namespace-packages" in data_keys:
+            data_keys.remove("namespace-packages")
+            data__namespacepackages = data["namespace-packages"]
+            if not isinstance(data__namespacepackages, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".namespace-packages must be array", value=data__namespacepackages, name="" + (name_prefix or "data") + ".namespace-packages", definition={'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html', 'description': '**DEPRECATED**: use implicit namespaces instead (:pep:`420`).'}, rule='type')
+            data__namespacepackages_is_list = isinstance(data__namespacepackages, (list, tuple))
+            if data__namespacepackages_is_list:
+                data__namespacepackages_len = len(data__namespacepackages)
+                for data__namespacepackages_x, data__namespacepackages_item in enumerate(data__namespacepackages):
+                    if not isinstance(data__namespacepackages_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".namespace-packages[{data__namespacepackages_x}]".format(**locals()) + " must be string", value=data__namespacepackages_item, name="" + (name_prefix or "data") + ".namespace-packages[{data__namespacepackages_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='type')
+                    if isinstance(data__namespacepackages_item, str):
+                        if not custom_formats["python-module-name"](data__namespacepackages_item):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".namespace-packages[{data__namespacepackages_x}]".format(**locals()) + " must be python-module-name", value=data__namespacepackages_item, name="" + (name_prefix or "data") + ".namespace-packages[{data__namespacepackages_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='format')
+        if "py-modules" in data_keys:
+            data_keys.remove("py-modules")
+            data__pymodules = data["py-modules"]
+            if not isinstance(data__pymodules, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".py-modules must be array", value=data__pymodules, name="" + (name_prefix or "data") + ".py-modules", definition={'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, rule='type')
+            data__pymodules_is_list = isinstance(data__pymodules, (list, tuple))
+            if data__pymodules_is_list:
+                data__pymodules_len = len(data__pymodules)
+                for data__pymodules_x, data__pymodules_item in enumerate(data__pymodules):
+                    if not isinstance(data__pymodules_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".py-modules[{data__pymodules_x}]".format(**locals()) + " must be string", value=data__pymodules_item, name="" + (name_prefix or "data") + ".py-modules[{data__pymodules_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='type')
+                    if isinstance(data__pymodules_item, str):
+                        if not custom_formats["python-module-name"](data__pymodules_item):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".py-modules[{data__pymodules_x}]".format(**locals()) + " must be python-module-name", value=data__pymodules_item, name="" + (name_prefix or "data") + ".py-modules[{data__pymodules_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='format')
+        if "data-files" in data_keys:
+            data_keys.remove("data-files")
+            data__datafiles = data["data-files"]
+            if not isinstance(data__datafiles, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".data-files must be object", value=data__datafiles, name="" + (name_prefix or "data") + ".data-files", definition={'$$description': ['``dict``-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', '**DISCOURAGED**: please notice this might not work as expected with wheels.', 'Whenever possible, consider using data files inside the package directories', '(or create a new namespace package that only contains data files).', 'See `data files support', '`_.'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, rule='type')
+            data__datafiles_is_dict = isinstance(data__datafiles, dict)
+            if data__datafiles_is_dict:
+                data__datafiles_keys = set(data__datafiles.keys())
+                for data__datafiles_key, data__datafiles_val in data__datafiles.items():
+                    if REGEX_PATTERNS['^.*$'].search(data__datafiles_key):
+                        if data__datafiles_key in data__datafiles_keys:
+                            data__datafiles_keys.remove(data__datafiles_key)
+                        if not isinstance(data__datafiles_val, (list, tuple)):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".data-files.{data__datafiles_key}".format(**locals()) + " must be array", value=data__datafiles_val, name="" + (name_prefix or "data") + ".data-files.{data__datafiles_key}".format(**locals()) + "", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type')
+                        data__datafiles_val_is_list = isinstance(data__datafiles_val, (list, tuple))
+                        if data__datafiles_val_is_list:
+                            data__datafiles_val_len = len(data__datafiles_val)
+                            for data__datafiles_val_x, data__datafiles_val_item in enumerate(data__datafiles_val):
+                                if not isinstance(data__datafiles_val_item, (str)):
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".data-files.{data__datafiles_key}[{data__datafiles_val_x}]".format(**locals()) + " must be string", value=data__datafiles_val_item, name="" + (name_prefix or "data") + ".data-files.{data__datafiles_key}[{data__datafiles_val_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+        if "cmdclass" in data_keys:
+            data_keys.remove("cmdclass")
+            data__cmdclass = data["cmdclass"]
+            if not isinstance(data__cmdclass, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".cmdclass must be object", value=data__cmdclass, name="" + (name_prefix or "data") + ".cmdclass", definition={'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', '    cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, rule='type')
+            data__cmdclass_is_dict = isinstance(data__cmdclass, dict)
+            if data__cmdclass_is_dict:
+                data__cmdclass_keys = set(data__cmdclass.keys())
+                for data__cmdclass_key, data__cmdclass_val in data__cmdclass.items():
+                    if REGEX_PATTERNS['^.*$'].search(data__cmdclass_key):
+                        if data__cmdclass_key in data__cmdclass_keys:
+                            data__cmdclass_keys.remove(data__cmdclass_key)
+                        if not isinstance(data__cmdclass_val, (str)):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".cmdclass.{data__cmdclass_key}".format(**locals()) + " must be string", value=data__cmdclass_val, name="" + (name_prefix or "data") + ".cmdclass.{data__cmdclass_key}".format(**locals()) + "", definition={'type': 'string', 'format': 'python-qualified-identifier'}, rule='type')
+                        if isinstance(data__cmdclass_val, str):
+                            if not custom_formats["python-qualified-identifier"](data__cmdclass_val):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".cmdclass.{data__cmdclass_key}".format(**locals()) + " must be python-qualified-identifier", value=data__cmdclass_val, name="" + (name_prefix or "data") + ".cmdclass.{data__cmdclass_key}".format(**locals()) + "", definition={'type': 'string', 'format': 'python-qualified-identifier'}, rule='format')
+        if "license-files" in data_keys:
+            data_keys.remove("license-files")
+            data__licensefiles = data["license-files"]
+            if not isinstance(data__licensefiles, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".license-files must be array", value=data__licensefiles, name="" + (name_prefix or "data") + ".license-files", definition={'type': 'array', 'items': {'type': 'string'}, '$$description': ['**PROVISIONAL**: list of glob patterns for all license files being distributed.', '(likely to become standard with :pep:`639`).', "By default: ``['LICEN[CS]E*', 'COPYING*', 'NOTICE*', 'AUTHORS*']``"], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, rule='type')
+            data__licensefiles_is_list = isinstance(data__licensefiles, (list, tuple))
+            if data__licensefiles_is_list:
+                data__licensefiles_len = len(data__licensefiles)
+                for data__licensefiles_x, data__licensefiles_item in enumerate(data__licensefiles):
+                    if not isinstance(data__licensefiles_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".license-files[{data__licensefiles_x}]".format(**locals()) + " must be string", value=data__licensefiles_item, name="" + (name_prefix or "data") + ".license-files[{data__licensefiles_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+        if "dynamic" in data_keys:
+            data_keys.remove("dynamic")
+            data__dynamic = data["dynamic"]
+            if not isinstance(data__dynamic, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must be object", value=data__dynamic, name="" + (name_prefix or "data") + ".dynamic", definition={'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.', 'Also ensure to set ``project.dynamic`` accordingly.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'classifiers': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'description': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'entry-points': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}}}, 'readme': {'type': 'object', 'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}], 'required': ['file']}}}, rule='type')
+            data__dynamic_is_dict = isinstance(data__dynamic, dict)
+            if data__dynamic_is_dict:
+                data__dynamic_keys = set(data__dynamic.keys())
+                if "version" in data__dynamic_keys:
+                    data__dynamic_keys.remove("version")
+                    data__dynamic__version = data__dynamic["version"]
+                    data__dynamic__version_one_of_count5 = 0
+                    if data__dynamic__version_one_of_count5 < 2:
+                        try:
+                            validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_attr_directive(data__dynamic__version, custom_formats, (name_prefix or "data") + ".dynamic.version")
+                            data__dynamic__version_one_of_count5 += 1
+                        except JsonSchemaValueException: pass
+                    if data__dynamic__version_one_of_count5 < 2:
+                        try:
+                            validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive(data__dynamic__version, custom_formats, (name_prefix or "data") + ".dynamic.version")
+                            data__dynamic__version_one_of_count5 += 1
+                        except JsonSchemaValueException: pass
+                    if data__dynamic__version_one_of_count5 != 1:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.version must be valid exactly by one definition" + (" (" + str(data__dynamic__version_one_of_count5) + " matches found)"), value=data__dynamic__version, name="" + (name_prefix or "data") + ".dynamic.version", definition={'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.', 'Also ensure to set ``project.dynamic`` accordingly.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, rule='oneOf')
+                if "classifiers" in data__dynamic_keys:
+                    data__dynamic_keys.remove("classifiers")
+                    data__dynamic__classifiers = data__dynamic["classifiers"]
+                    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive(data__dynamic__classifiers, custom_formats, (name_prefix or "data") + ".dynamic.classifiers")
+                if "description" in data__dynamic_keys:
+                    data__dynamic_keys.remove("description")
+                    data__dynamic__description = data__dynamic["description"]
+                    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive(data__dynamic__description, custom_formats, (name_prefix or "data") + ".dynamic.description")
+                if "entry-points" in data__dynamic_keys:
+                    data__dynamic_keys.remove("entry-points")
+                    data__dynamic__entrypoints = data__dynamic["entry-points"]
+                    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive(data__dynamic__entrypoints, custom_formats, (name_prefix or "data") + ".dynamic.entry-points")
+                if "dependencies" in data__dynamic_keys:
+                    data__dynamic_keys.remove("dependencies")
+                    data__dynamic__dependencies = data__dynamic["dependencies"]
+                    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive_for_dependencies(data__dynamic__dependencies, custom_formats, (name_prefix or "data") + ".dynamic.dependencies")
+                if "optional-dependencies" in data__dynamic_keys:
+                    data__dynamic_keys.remove("optional-dependencies")
+                    data__dynamic__optionaldependencies = data__dynamic["optional-dependencies"]
+                    if not isinstance(data__dynamic__optionaldependencies, (dict)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must be object", value=data__dynamic__optionaldependencies, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}}}, rule='type')
+                    data__dynamic__optionaldependencies_is_dict = isinstance(data__dynamic__optionaldependencies, dict)
+                    if data__dynamic__optionaldependencies_is_dict:
+                        data__dynamic__optionaldependencies_keys = set(data__dynamic__optionaldependencies.keys())
+                        for data__dynamic__optionaldependencies_key, data__dynamic__optionaldependencies_val in data__dynamic__optionaldependencies.items():
+                            if REGEX_PATTERNS['.+'].search(data__dynamic__optionaldependencies_key):
+                                if data__dynamic__optionaldependencies_key in data__dynamic__optionaldependencies_keys:
+                                    data__dynamic__optionaldependencies_keys.remove(data__dynamic__optionaldependencies_key)
+                                validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive_for_dependencies(data__dynamic__optionaldependencies_val, custom_formats, (name_prefix or "data") + ".dynamic.optional-dependencies.{data__dynamic__optionaldependencies_key}".format(**locals()))
+                        if data__dynamic__optionaldependencies_keys:
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must not contain "+str(data__dynamic__optionaldependencies_keys)+" properties", value=data__dynamic__optionaldependencies, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}}}, rule='additionalProperties')
+                        data__dynamic__optionaldependencies_len = len(data__dynamic__optionaldependencies)
+                        if data__dynamic__optionaldependencies_len != 0:
+                            data__dynamic__optionaldependencies_property_names = True
+                            for data__dynamic__optionaldependencies_key in data__dynamic__optionaldependencies:
+                                try:
+                                    if not isinstance(data__dynamic__optionaldependencies_key, (str)):
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must be string", value=data__dynamic__optionaldependencies_key, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='type')
+                                    if isinstance(data__dynamic__optionaldependencies_key, str):
+                                        if not custom_formats["pep508-identifier"](data__dynamic__optionaldependencies_key):
+                                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must be pep508-identifier", value=data__dynamic__optionaldependencies_key, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'type': 'string', 'format': 'pep508-identifier'}, rule='format')
+                                except JsonSchemaValueException:
+                                    data__dynamic__optionaldependencies_property_names = False
+                            if not data__dynamic__optionaldependencies_property_names:
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.optional-dependencies must be named by propertyName definition", value=data__dynamic__optionaldependencies, name="" + (name_prefix or "data") + ".dynamic.optional-dependencies", definition={'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}}}, rule='propertyNames')
+                if "readme" in data__dynamic_keys:
+                    data__dynamic_keys.remove("readme")
+                    data__dynamic__readme = data__dynamic["readme"]
+                    if not isinstance(data__dynamic__readme, (dict)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme must be object", value=data__dynamic__readme, name="" + (name_prefix or "data") + ".dynamic.readme", definition={'type': 'object', 'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}], 'required': ['file']}, rule='type')
+                    data__dynamic__readme_any_of_count6 = 0
+                    if not data__dynamic__readme_any_of_count6:
+                        try:
+                            validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive(data__dynamic__readme, custom_formats, (name_prefix or "data") + ".dynamic.readme")
+                            data__dynamic__readme_any_of_count6 += 1
+                        except JsonSchemaValueException: pass
+                    if not data__dynamic__readme_any_of_count6:
+                        try:
+                            if not isinstance(data__dynamic__readme, (dict)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme must be object", value=data__dynamic__readme, name="" + (name_prefix or "data") + ".dynamic.readme", definition={'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}, rule='type')
+                            data__dynamic__readme_is_dict = isinstance(data__dynamic__readme, dict)
+                            if data__dynamic__readme_is_dict:
+                                data__dynamic__readme_keys = set(data__dynamic__readme.keys())
+                                if "content-type" in data__dynamic__readme_keys:
+                                    data__dynamic__readme_keys.remove("content-type")
+                                    data__dynamic__readme__contenttype = data__dynamic__readme["content-type"]
+                                    if not isinstance(data__dynamic__readme__contenttype, (str)):
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme.content-type must be string", value=data__dynamic__readme__contenttype, name="" + (name_prefix or "data") + ".dynamic.readme.content-type", definition={'type': 'string'}, rule='type')
+                                if "file" in data__dynamic__readme_keys:
+                                    data__dynamic__readme_keys.remove("file")
+                                    data__dynamic__readme__file = data__dynamic__readme["file"]
+                                    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive_properties_file(data__dynamic__readme__file, custom_formats, (name_prefix or "data") + ".dynamic.readme.file")
+                                if data__dynamic__readme_keys:
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme must not contain "+str(data__dynamic__readme_keys)+" properties", value=data__dynamic__readme, name="" + (name_prefix or "data") + ".dynamic.readme", definition={'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}, rule='additionalProperties')
+                            data__dynamic__readme_any_of_count6 += 1
+                        except JsonSchemaValueException: pass
+                    if not data__dynamic__readme_any_of_count6:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme cannot be validated by any definition", value=data__dynamic__readme, name="" + (name_prefix or "data") + ".dynamic.readme", definition={'type': 'object', 'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}], 'required': ['file']}, rule='anyOf')
+                    data__dynamic__readme_is_dict = isinstance(data__dynamic__readme, dict)
+                    if data__dynamic__readme_is_dict:
+                        data__dynamic__readme__missing_keys = set(['file']) - data__dynamic__readme.keys()
+                        if data__dynamic__readme__missing_keys:
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic.readme must contain " + (str(sorted(data__dynamic__readme__missing_keys)) + " properties"), value=data__dynamic__readme, name="" + (name_prefix or "data") + ".dynamic.readme", definition={'type': 'object', 'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}], 'required': ['file']}, rule='required')
+                if data__dynamic_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must not contain "+str(data__dynamic_keys)+" properties", value=data__dynamic, name="" + (name_prefix or "data") + ".dynamic", definition={'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.', 'Also ensure to set ``project.dynamic`` accordingly.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'classifiers': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'description': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'entry-points': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}}}, 'readme': {'type': 'object', 'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}], 'required': ['file']}}}, rule='additionalProperties')
+        if data_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html', 'title': '``tool.setuptools`` table', '$$description': ['``setuptools``-specific configurations that can be set by users that require', 'customization.', 'These configurations are completely optional and probably can be skipped when', 'creating simple packages. They are equivalent to some of the `Keywords', '`_', 'used by the ``setup.py`` file, and can be set via the ``tool.setuptools`` table.', 'It considers only ``setuptools`` `parameters', '`_', 'that are not covered by :pep:`621`; and intentionally excludes ``dependency_links``', 'and ``setup_requires`` (incompatible with modern workflows/standards).'], 'type': 'object', 'additionalProperties': False, 'properties': {'platforms': {'type': 'array', 'items': {'type': 'string'}}, 'provides': {'$$description': ['Package and virtual package names contained within this package', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'obsoletes': {'$$description': ['Packages which this package renders obsolete', '**(not supported by pip)**'], 'type': 'array', 'items': {'type': 'string', 'format': 'pep508-identifier'}}, 'zip-safe': {'$$description': ['Whether the project can be safely installed and run from a zip file.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'boolean'}, 'script-files': {'$$description': ['Legacy way of defining scripts (entry-points are preferred).', 'Equivalent to the ``script`` keyword in ``setup.py``', '(it was renamed to avoid confusion with entry-point based ``project.scripts``', 'defined in :pep:`621`).', '**DISCOURAGED**: generic script wrappers are tricky and may not work properly.', 'Whenever possible, please use ``project.scripts`` instead.'], 'type': 'array', 'items': {'type': 'string'}, '$comment': 'TODO: is this field deprecated/should be removed?'}, 'eager-resources': {'$$description': ['Resources that should be extracted together, if any of them is needed,', 'or if any C extensions included in the project are imported.', '**OBSOLETE**: only relevant for ``pkg_resources``, ``easy_install`` and', '``setup.py install`` in the context of ``eggs`` (**DEPRECATED**).'], 'type': 'array', 'items': {'type': 'string'}}, 'packages': {'$$description': ['Packages that should be included in the distribution.', 'It can be given either as a list of package identifiers', 'or as a ``dict``-like structure with a single key ``find``', 'which corresponds to a dynamic call to', '``setuptools.config.expand.find_packages`` function.', 'The ``find`` key is associated with a nested ``dict``-like structure that can', 'contain ``where``, ``include``, ``exclude`` and ``namespaces`` keys,', 'mimicking the keyword arguments of the associated function.'], 'oneOf': [{'title': 'Array of Python package identifiers', 'type': 'array', 'items': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}}, {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}]}, 'package-dir': {'$$description': [':class:`dict`-like structure mapping from package names to directories where their', 'code can be found.', 'The empty string (as key) means that all packages are contained inside', 'the given directory will be included in the distribution.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'const': ''}, {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}]}, 'patternProperties': {'^.*$': {'type': 'string'}}}, 'package-data': {'$$description': ['Mapping from package names to lists of glob patterns.', 'Usually this option is not needed when using ``include-package-data = true``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'include-package-data': {'$$description': ['Automatically include any data files inside the package directories', 'that are specified by ``MANIFEST.in``', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'boolean'}, 'exclude-package-data': {'$$description': ['Mapping from package names to lists of glob patterns that should be excluded', 'For more information on how to include data files, check ``setuptools`` `docs', '`_.'], 'type': 'object', 'additionalProperties': False, 'propertyNames': {'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'const': '*'}]}, 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'namespace-packages': {'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'https://setuptools.pypa.io/en/latest/userguide/package_discovery.html', 'description': '**DEPRECATED**: use implicit namespaces instead (:pep:`420`).'}, 'py-modules': {'description': 'Modules that setuptools will manipulate', 'type': 'array', 'items': {'type': 'string', 'format': 'python-module-name'}, '$comment': 'TODO: clarify the relationship with ``packages``'}, 'data-files': {'$$description': ['``dict``-like structure where each key represents a directory and', 'the value is a list of glob patterns that should be installed in them.', '**DISCOURAGED**: please notice this might not work as expected with wheels.', 'Whenever possible, consider using data files inside the package directories', '(or create a new namespace package that only contains data files).', 'See `data files support', '`_.'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'array', 'items': {'type': 'string'}}}}, 'cmdclass': {'$$description': ['Mapping of distutils-style command names to ``setuptools.Command`` subclasses', 'which in turn should be represented by strings with a qualified class name', '(i.e., "dotted" form with module), e.g.::\n\n', '    cmdclass = {mycmd = "pkg.subpkg.module.CommandClass"}\n\n', 'The command class should be a directly defined at the top-level of the', 'containing module (no class nesting).'], 'type': 'object', 'patternProperties': {'^.*$': {'type': 'string', 'format': 'python-qualified-identifier'}}}, 'license-files': {'type': 'array', 'items': {'type': 'string'}, '$$description': ['**PROVISIONAL**: list of glob patterns for all license files being distributed.', '(likely to become standard with :pep:`639`).', "By default: ``['LICEN[CS]E*', 'COPYING*', 'NOTICE*', 'AUTHORS*']``"], '$comment': 'TODO: revise if PEP 639 is accepted. Probably ``project.license-files``?'}, 'dynamic': {'type': 'object', 'description': 'Instructions for loading :pep:`621`-related metadata dynamically', 'additionalProperties': False, 'properties': {'version': {'$$description': ['A version dynamically loaded via either the ``attr:`` or ``file:``', 'directives. Please make sure the given file or attribute respects :pep:`440`.', 'Also ensure to set ``project.dynamic`` accordingly.'], 'oneOf': [{'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'classifiers': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'description': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'entry-points': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}, 'optional-dependencies': {'type': 'object', 'propertyNames': {'type': 'string', 'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'.+': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$ref': '#/definitions/file-directive'}]}}}, 'readme': {'type': 'object', 'anyOf': [{'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, {'type': 'object', 'properties': {'content-type': {'type': 'string'}, 'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'additionalProperties': False}], 'required': ['file']}}}}, 'definitions': {'package-name': {'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}, 'file-directive': {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, 'file-directive-for-dependencies': {'title': "'file:' directive for dependencies", 'allOf': [{'$$description': ['**BETA**: subset of the ``requirements.txt`` format', 'without ``pip`` flags and options', '(one :pep:`508`-compliant string per line,', 'lines that are blank or start with ``#`` are excluded).', 'See `dynamic metadata', '`_.']}, {'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}]}, 'attr-directive': {'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, 'find-directive': {'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}}}, rule='additionalProperties')
+    return data
+
+def validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive_properties_file(data, custom_formats={}, name_prefix=None):
+    data_one_of_count7 = 0
+    if data_one_of_count7 < 2:
+        try:
+            if not isinstance(data, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + " must be string", value=data, name="" + (name_prefix or "data") + "", definition={'type': 'string'}, rule='type')
+            data_one_of_count7 += 1
+        except JsonSchemaValueException: pass
+    if data_one_of_count7 < 2:
+        try:
+            if not isinstance(data, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + " must be array", value=data, name="" + (name_prefix or "data") + "", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type')
+            data_is_list = isinstance(data, (list, tuple))
+            if data_is_list:
+                data_len = len(data)
+                for data_x, data_item in enumerate(data):
+                    if not isinstance(data_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + "[{data_x}]".format(**locals()) + " must be string", value=data_item, name="" + (name_prefix or "data") + "[{data_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+            data_one_of_count7 += 1
+        except JsonSchemaValueException: pass
+    if data_one_of_count7 != 1:
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be valid exactly by one definition" + (" (" + str(data_one_of_count7) + " matches found)"), value=data, name="" + (name_prefix or "data") + "", definition={'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}, rule='oneOf')
+    return data
+
+def validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive_for_dependencies(data, custom_formats={}, name_prefix=None):
+    validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive(data, custom_formats, (name_prefix or "data") + "")
+    return data
+
+def validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_file_directive(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data__missing_keys = set(['file']) - data.keys()
+        if data__missing_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain " + (str(sorted(data__missing_keys)) + " properties"), value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, rule='required')
+        data_keys = set(data.keys())
+        if "file" in data_keys:
+            data_keys.remove("file")
+            data__file = data["file"]
+            data__file_one_of_count8 = 0
+            if data__file_one_of_count8 < 2:
+                try:
+                    if not isinstance(data__file, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".file must be string", value=data__file, name="" + (name_prefix or "data") + ".file", definition={'type': 'string'}, rule='type')
+                    data__file_one_of_count8 += 1
+                except JsonSchemaValueException: pass
+            if data__file_one_of_count8 < 2:
+                try:
+                    if not isinstance(data__file, (list, tuple)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".file must be array", value=data__file, name="" + (name_prefix or "data") + ".file", definition={'type': 'array', 'items': {'type': 'string'}}, rule='type')
+                    data__file_is_list = isinstance(data__file, (list, tuple))
+                    if data__file_is_list:
+                        data__file_len = len(data__file)
+                        for data__file_x, data__file_item in enumerate(data__file):
+                            if not isinstance(data__file_item, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".file[{data__file_x}]".format(**locals()) + " must be string", value=data__file_item, name="" + (name_prefix or "data") + ".file[{data__file_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+                    data__file_one_of_count8 += 1
+                except JsonSchemaValueException: pass
+            if data__file_one_of_count8 != 1:
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".file must be valid exactly by one definition" + (" (" + str(data__file_one_of_count8) + " matches found)"), value=data__file, name="" + (name_prefix or "data") + ".file", definition={'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}, rule='oneOf')
+        if data_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/file-directive', 'title': "'file:' directive", 'description': 'Value is read from a file (or list of files and then concatenated)', 'type': 'object', 'additionalProperties': False, 'properties': {'file': {'oneOf': [{'type': 'string'}, {'type': 'array', 'items': {'type': 'string'}}]}}, 'required': ['file']}, rule='additionalProperties')
+    return data
+
+def validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_attr_directive(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data__missing_keys = set(['attr']) - data.keys()
+        if data__missing_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain " + (str(sorted(data__missing_keys)) + " properties"), value=data, name="" + (name_prefix or "data") + "", definition={'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, rule='required')
+        data_keys = set(data.keys())
+        if "attr" in data_keys:
+            data_keys.remove("attr")
+            data__attr = data["attr"]
+            if not isinstance(data__attr, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".attr must be string", value=data__attr, name="" + (name_prefix or "data") + ".attr", definition={'type': 'string', 'format': 'python-qualified-identifier'}, rule='type')
+            if isinstance(data__attr, str):
+                if not custom_formats["python-qualified-identifier"](data__attr):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".attr must be python-qualified-identifier", value=data__attr, name="" + (name_prefix or "data") + ".attr", definition={'type': 'string', 'format': 'python-qualified-identifier'}, rule='format')
+        if data_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'title': "'attr:' directive", '$id': '#/definitions/attr-directive', '$$description': ['Value is read from a module attribute. Supports callables and iterables;', 'unsupported types are cast via ``str()``'], 'type': 'object', 'additionalProperties': False, 'properties': {'attr': {'type': 'string', 'format': 'python-qualified-identifier'}}, 'required': ['attr']}, rule='additionalProperties')
+    return data
+
+def validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_find_directive(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data_keys = set(data.keys())
+        if "find" in data_keys:
+            data_keys.remove("find")
+            data__find = data["find"]
+            if not isinstance(data__find, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".find must be object", value=data__find, name="" + (name_prefix or "data") + ".find", definition={'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}, rule='type')
+            data__find_is_dict = isinstance(data__find, dict)
+            if data__find_is_dict:
+                data__find_keys = set(data__find.keys())
+                if "where" in data__find_keys:
+                    data__find_keys.remove("where")
+                    data__find__where = data__find["where"]
+                    if not isinstance(data__find__where, (list, tuple)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.where must be array", value=data__find__where, name="" + (name_prefix or "data") + ".find.where", definition={'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, rule='type')
+                    data__find__where_is_list = isinstance(data__find__where, (list, tuple))
+                    if data__find__where_is_list:
+                        data__find__where_len = len(data__find__where)
+                        for data__find__where_x, data__find__where_item in enumerate(data__find__where):
+                            if not isinstance(data__find__where_item, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.where[{data__find__where_x}]".format(**locals()) + " must be string", value=data__find__where_item, name="" + (name_prefix or "data") + ".find.where[{data__find__where_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+                if "exclude" in data__find_keys:
+                    data__find_keys.remove("exclude")
+                    data__find__exclude = data__find["exclude"]
+                    if not isinstance(data__find__exclude, (list, tuple)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.exclude must be array", value=data__find__exclude, name="" + (name_prefix or "data") + ".find.exclude", definition={'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, rule='type')
+                    data__find__exclude_is_list = isinstance(data__find__exclude, (list, tuple))
+                    if data__find__exclude_is_list:
+                        data__find__exclude_len = len(data__find__exclude)
+                        for data__find__exclude_x, data__find__exclude_item in enumerate(data__find__exclude):
+                            if not isinstance(data__find__exclude_item, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.exclude[{data__find__exclude_x}]".format(**locals()) + " must be string", value=data__find__exclude_item, name="" + (name_prefix or "data") + ".find.exclude[{data__find__exclude_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+                if "include" in data__find_keys:
+                    data__find_keys.remove("include")
+                    data__find__include = data__find["include"]
+                    if not isinstance(data__find__include, (list, tuple)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.include must be array", value=data__find__include, name="" + (name_prefix or "data") + ".find.include", definition={'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, rule='type')
+                    data__find__include_is_list = isinstance(data__find__include, (list, tuple))
+                    if data__find__include_is_list:
+                        data__find__include_len = len(data__find__include)
+                        for data__find__include_x, data__find__include_item in enumerate(data__find__include):
+                            if not isinstance(data__find__include_item, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.include[{data__find__include_x}]".format(**locals()) + " must be string", value=data__find__include_item, name="" + (name_prefix or "data") + ".find.include[{data__find__include_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+                if "namespaces" in data__find_keys:
+                    data__find_keys.remove("namespaces")
+                    data__find__namespaces = data__find["namespaces"]
+                    if not isinstance(data__find__namespaces, (bool)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".find.namespaces must be boolean", value=data__find__namespaces, name="" + (name_prefix or "data") + ".find.namespaces", definition={'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}, rule='type')
+                if data__find_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".find must not contain "+str(data__find_keys)+" properties", value=data__find, name="" + (name_prefix or "data") + ".find", definition={'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}, rule='additionalProperties')
+        if data_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/find-directive', 'title': "'find:' directive", 'type': 'object', 'additionalProperties': False, 'properties': {'find': {'type': 'object', '$$description': ['Dynamic `package discovery', '`_.'], 'additionalProperties': False, 'properties': {'where': {'description': 'Directories to be searched for packages (Unix-style relative path)', 'type': 'array', 'items': {'type': 'string'}}, 'exclude': {'type': 'array', '$$description': ['Exclude packages that match the values listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'include': {'type': 'array', '$$description': ['Restrict the found packages to just the ones listed in this field.', "Can container shell-style wildcards (e.g. ``'pkg.*'``)"], 'items': {'type': 'string'}}, 'namespaces': {'type': 'boolean', '$$description': ['When ``True``, directories without a ``__init__.py`` file will also', 'be scanned for :pep:`420`-style implicit namespaces']}}}}}, rule='additionalProperties')
+    return data
+
+def validate_https___setuptools_pypa_io_en_latest_userguide_pyproject_config_html__definitions_package_name(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (str)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be string", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}, rule='type')
+    data_any_of_count9 = 0
+    if not data_any_of_count9:
+        try:
+            if not isinstance(data, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + " must be string", value=data, name="" + (name_prefix or "data") + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='type')
+            if isinstance(data, str):
+                if not custom_formats["python-module-name"](data):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + " must be python-module-name", value=data, name="" + (name_prefix or "data") + "", definition={'type': 'string', 'format': 'python-module-name'}, rule='format')
+            data_any_of_count9 += 1
+        except JsonSchemaValueException: pass
+    if not data_any_of_count9:
+        try:
+            if not isinstance(data, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + " must be string", value=data, name="" + (name_prefix or "data") + "", definition={'type': 'string', 'format': 'pep561-stub-name'}, rule='type')
+            if isinstance(data, str):
+                if not custom_formats["pep561-stub-name"](data):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + " must be pep561-stub-name", value=data, name="" + (name_prefix or "data") + "", definition={'type': 'string', 'format': 'pep561-stub-name'}, rule='format')
+            data_any_of_count9 += 1
+        except JsonSchemaValueException: pass
+    if not data_any_of_count9:
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " cannot be validated by any definition", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/package-name', 'title': 'Valid package name', 'description': 'Valid package name (importable or :pep:`561`).', 'type': 'string', 'anyOf': [{'type': 'string', 'format': 'python-module-name'}, {'type': 'string', 'format': 'pep561-stub-name'}]}, rule='anyOf')
+    return data
+
+def validate_https___setuptools_pypa_io_en_latest_deprecated_distutils_configfile_html(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://setuptools.pypa.io/en/latest/deprecated/distutils/configfile.html', 'title': '``tool.distutils`` table', '$$description': ['**EXPERIMENTAL** (NOT OFFICIALLY SUPPORTED): Use ``tool.distutils``', 'subtables to configure arguments for ``distutils`` commands.', 'Originally, ``distutils`` allowed developers to configure arguments for', '``setup.py`` commands via `distutils configuration files', '`_.', 'See also `the old Python docs _`.'], 'type': 'object', 'properties': {'global': {'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}}, 'patternProperties': {'.+': {'type': 'object'}}, '$comment': 'TODO: Is there a practical way of making this schema more specific?'}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data_keys = set(data.keys())
+        if "global" in data_keys:
+            data_keys.remove("global")
+            data__global = data["global"]
+            if not isinstance(data__global, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".global must be object", value=data__global, name="" + (name_prefix or "data") + ".global", definition={'type': 'object', 'description': 'Global options applied to all ``distutils`` commands'}, rule='type')
+        for data_key, data_val in data.items():
+            if REGEX_PATTERNS['.+'].search(data_key):
+                if data_key in data_keys:
+                    data_keys.remove(data_key)
+                if not isinstance(data_val, (dict)):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".{data_key}".format(**locals()) + " must be object", value=data_val, name="" + (name_prefix or "data") + ".{data_key}".format(**locals()) + "", definition={'type': 'object'}, rule='type')
+    return data
+
+def validate_https___packaging_python_org_en_latest_specifications_pyproject_toml(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/pyproject-toml/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'gui-scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', '    If the core metadata specification lists a field as "Required", then', '    the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', '    The required fields are: Metadata-Version, Name, Version.', '    All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data__missing_keys = set(['name']) - data.keys()
+        if data__missing_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain " + (str(sorted(data__missing_keys)) + " properties"), value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/pyproject-toml/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'gui-scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', '    If the core metadata specification lists a field as "Required", then', '    the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', '    The required fields are: Metadata-Version, Name, Version.', '    All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, rule='required')
+        data_keys = set(data.keys())
+        if "name" in data_keys:
+            data_keys.remove("name")
+            data__name = data["name"]
+            if not isinstance(data__name, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".name must be string", value=data__name, name="" + (name_prefix or "data") + ".name", definition={'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, rule='type')
+            if isinstance(data__name, str):
+                if not custom_formats["pep508-identifier"](data__name):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".name must be pep508-identifier", value=data__name, name="" + (name_prefix or "data") + ".name", definition={'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, rule='format')
+        if "version" in data_keys:
+            data_keys.remove("version")
+            data__version = data["version"]
+            if not isinstance(data__version, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".version must be string", value=data__version, name="" + (name_prefix or "data") + ".version", definition={'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, rule='type')
+            if isinstance(data__version, str):
+                if not custom_formats["pep440"](data__version):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".version must be pep440", value=data__version, name="" + (name_prefix or "data") + ".version", definition={'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, rule='format')
+        if "description" in data_keys:
+            data_keys.remove("description")
+            data__description = data["description"]
+            if not isinstance(data__description, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".description must be string", value=data__description, name="" + (name_prefix or "data") + ".description", definition={'type': 'string', '$$description': ['The `summary description of the project', '`_']}, rule='type')
+        if "readme" in data_keys:
+            data_keys.remove("readme")
+            data__readme = data["readme"]
+            data__readme_one_of_count10 = 0
+            if data__readme_one_of_count10 < 2:
+                try:
+                    if not isinstance(data__readme, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must be string", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, rule='type')
+                    data__readme_one_of_count10 += 1
+                except JsonSchemaValueException: pass
+            if data__readme_one_of_count10 < 2:
+                try:
+                    if not isinstance(data__readme, (dict)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must be object", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}, rule='type')
+                    data__readme_any_of_count11 = 0
+                    if not data__readme_any_of_count11:
+                        try:
+                            data__readme_is_dict = isinstance(data__readme, dict)
+                            if data__readme_is_dict:
+                                data__readme__missing_keys = set(['file']) - data__readme.keys()
+                                if data__readme__missing_keys:
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must contain " + (str(sorted(data__readme__missing_keys)) + " properties"), value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, rule='required')
+                                data__readme_keys = set(data__readme.keys())
+                                if "file" in data__readme_keys:
+                                    data__readme_keys.remove("file")
+                                    data__readme__file = data__readme["file"]
+                                    if not isinstance(data__readme__file, (str)):
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme.file must be string", value=data__readme__file, name="" + (name_prefix or "data") + ".readme.file", definition={'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}, rule='type')
+                            data__readme_any_of_count11 += 1
+                        except JsonSchemaValueException: pass
+                    if not data__readme_any_of_count11:
+                        try:
+                            data__readme_is_dict = isinstance(data__readme, dict)
+                            if data__readme_is_dict:
+                                data__readme__missing_keys = set(['text']) - data__readme.keys()
+                                if data__readme__missing_keys:
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must contain " + (str(sorted(data__readme__missing_keys)) + " properties"), value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}, rule='required')
+                                data__readme_keys = set(data__readme.keys())
+                                if "text" in data__readme_keys:
+                                    data__readme_keys.remove("text")
+                                    data__readme__text = data__readme["text"]
+                                    if not isinstance(data__readme__text, (str)):
+                                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme.text must be string", value=data__readme__text, name="" + (name_prefix or "data") + ".readme.text", definition={'type': 'string', 'description': 'Full text describing the project.'}, rule='type')
+                            data__readme_any_of_count11 += 1
+                        except JsonSchemaValueException: pass
+                    if not data__readme_any_of_count11:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme cannot be validated by any definition", value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, rule='anyOf')
+                    data__readme_is_dict = isinstance(data__readme, dict)
+                    if data__readme_is_dict:
+                        data__readme__missing_keys = set(['content-type']) - data__readme.keys()
+                        if data__readme__missing_keys:
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must contain " + (str(sorted(data__readme__missing_keys)) + " properties"), value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}, rule='required')
+                        data__readme_keys = set(data__readme.keys())
+                        if "content-type" in data__readme_keys:
+                            data__readme_keys.remove("content-type")
+                            data__readme__contenttype = data__readme["content-type"]
+                            if not isinstance(data__readme__contenttype, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme.content-type must be string", value=data__readme__contenttype, name="" + (name_prefix or "data") + ".readme.content-type", definition={'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}, rule='type')
+                    data__readme_one_of_count10 += 1
+                except JsonSchemaValueException: pass
+            if data__readme_one_of_count10 != 1:
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".readme must be valid exactly by one definition" + (" (" + str(data__readme_one_of_count10) + " matches found)"), value=data__readme, name="" + (name_prefix or "data") + ".readme", definition={'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, rule='oneOf')
+        if "requires-python" in data_keys:
+            data_keys.remove("requires-python")
+            data__requirespython = data["requires-python"]
+            if not isinstance(data__requirespython, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".requires-python must be string", value=data__requirespython, name="" + (name_prefix or "data") + ".requires-python", definition={'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, rule='type')
+            if isinstance(data__requirespython, str):
+                if not custom_formats["pep508-versionspec"](data__requirespython):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".requires-python must be pep508-versionspec", value=data__requirespython, name="" + (name_prefix or "data") + ".requires-python", definition={'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, rule='format')
+        if "license" in data_keys:
+            data_keys.remove("license")
+            data__license = data["license"]
+            data__license_one_of_count12 = 0
+            if data__license_one_of_count12 < 2:
+                try:
+                    data__license_is_dict = isinstance(data__license, dict)
+                    if data__license_is_dict:
+                        data__license__missing_keys = set(['file']) - data__license.keys()
+                        if data__license__missing_keys:
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".license must contain " + (str(sorted(data__license__missing_keys)) + " properties"), value=data__license, name="" + (name_prefix or "data") + ".license", definition={'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, rule='required')
+                        data__license_keys = set(data__license.keys())
+                        if "file" in data__license_keys:
+                            data__license_keys.remove("file")
+                            data__license__file = data__license["file"]
+                            if not isinstance(data__license__file, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".license.file must be string", value=data__license__file, name="" + (name_prefix or "data") + ".license.file", definition={'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}, rule='type')
+                    data__license_one_of_count12 += 1
+                except JsonSchemaValueException: pass
+            if data__license_one_of_count12 < 2:
+                try:
+                    data__license_is_dict = isinstance(data__license, dict)
+                    if data__license_is_dict:
+                        data__license__missing_keys = set(['text']) - data__license.keys()
+                        if data__license__missing_keys:
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".license must contain " + (str(sorted(data__license__missing_keys)) + " properties"), value=data__license, name="" + (name_prefix or "data") + ".license", definition={'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}, rule='required')
+                        data__license_keys = set(data__license.keys())
+                        if "text" in data__license_keys:
+                            data__license_keys.remove("text")
+                            data__license__text = data__license["text"]
+                            if not isinstance(data__license__text, (str)):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".license.text must be string", value=data__license__text, name="" + (name_prefix or "data") + ".license.text", definition={'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}, rule='type')
+                    data__license_one_of_count12 += 1
+                except JsonSchemaValueException: pass
+            if data__license_one_of_count12 != 1:
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".license must be valid exactly by one definition" + (" (" + str(data__license_one_of_count12) + " matches found)"), value=data__license, name="" + (name_prefix or "data") + ".license", definition={'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, rule='oneOf')
+        if "authors" in data_keys:
+            data_keys.remove("authors")
+            data__authors = data["authors"]
+            if not isinstance(data__authors, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".authors must be array", value=data__authors, name="" + (name_prefix or "data") + ".authors", definition={'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, rule='type')
+            data__authors_is_list = isinstance(data__authors, (list, tuple))
+            if data__authors_is_list:
+                data__authors_len = len(data__authors)
+                for data__authors_x, data__authors_item in enumerate(data__authors):
+                    validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_author(data__authors_item, custom_formats, (name_prefix or "data") + ".authors[{data__authors_x}]".format(**locals()))
+        if "maintainers" in data_keys:
+            data_keys.remove("maintainers")
+            data__maintainers = data["maintainers"]
+            if not isinstance(data__maintainers, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".maintainers must be array", value=data__maintainers, name="" + (name_prefix or "data") + ".maintainers", definition={'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, rule='type')
+            data__maintainers_is_list = isinstance(data__maintainers, (list, tuple))
+            if data__maintainers_is_list:
+                data__maintainers_len = len(data__maintainers)
+                for data__maintainers_x, data__maintainers_item in enumerate(data__maintainers):
+                    validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_author(data__maintainers_item, custom_formats, (name_prefix or "data") + ".maintainers[{data__maintainers_x}]".format(**locals()))
+        if "keywords" in data_keys:
+            data_keys.remove("keywords")
+            data__keywords = data["keywords"]
+            if not isinstance(data__keywords, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".keywords must be array", value=data__keywords, name="" + (name_prefix or "data") + ".keywords", definition={'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, rule='type')
+            data__keywords_is_list = isinstance(data__keywords, (list, tuple))
+            if data__keywords_is_list:
+                data__keywords_len = len(data__keywords)
+                for data__keywords_x, data__keywords_item in enumerate(data__keywords):
+                    if not isinstance(data__keywords_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".keywords[{data__keywords_x}]".format(**locals()) + " must be string", value=data__keywords_item, name="" + (name_prefix or "data") + ".keywords[{data__keywords_x}]".format(**locals()) + "", definition={'type': 'string'}, rule='type')
+        if "classifiers" in data_keys:
+            data_keys.remove("classifiers")
+            data__classifiers = data["classifiers"]
+            if not isinstance(data__classifiers, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".classifiers must be array", value=data__classifiers, name="" + (name_prefix or "data") + ".classifiers", definition={'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, rule='type')
+            data__classifiers_is_list = isinstance(data__classifiers, (list, tuple))
+            if data__classifiers_is_list:
+                data__classifiers_len = len(data__classifiers)
+                for data__classifiers_x, data__classifiers_item in enumerate(data__classifiers):
+                    if not isinstance(data__classifiers_item, (str)):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".classifiers[{data__classifiers_x}]".format(**locals()) + " must be string", value=data__classifiers_item, name="" + (name_prefix or "data") + ".classifiers[{data__classifiers_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, rule='type')
+                    if isinstance(data__classifiers_item, str):
+                        if not custom_formats["trove-classifier"](data__classifiers_item):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".classifiers[{data__classifiers_x}]".format(**locals()) + " must be trove-classifier", value=data__classifiers_item, name="" + (name_prefix or "data") + ".classifiers[{data__classifiers_x}]".format(**locals()) + "", definition={'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, rule='format')
+        if "urls" in data_keys:
+            data_keys.remove("urls")
+            data__urls = data["urls"]
+            if not isinstance(data__urls, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".urls must be object", value=data__urls, name="" + (name_prefix or "data") + ".urls", definition={'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, rule='type')
+            data__urls_is_dict = isinstance(data__urls, dict)
+            if data__urls_is_dict:
+                data__urls_keys = set(data__urls.keys())
+                for data__urls_key, data__urls_val in data__urls.items():
+                    if REGEX_PATTERNS['^.+$'].search(data__urls_key):
+                        if data__urls_key in data__urls_keys:
+                            data__urls_keys.remove(data__urls_key)
+                        if not isinstance(data__urls_val, (str)):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".urls.{data__urls_key}".format(**locals()) + " must be string", value=data__urls_val, name="" + (name_prefix or "data") + ".urls.{data__urls_key}".format(**locals()) + "", definition={'type': 'string', 'format': 'url'}, rule='type')
+                        if isinstance(data__urls_val, str):
+                            if not custom_formats["url"](data__urls_val):
+                                raise JsonSchemaValueException("" + (name_prefix or "data") + ".urls.{data__urls_key}".format(**locals()) + " must be url", value=data__urls_val, name="" + (name_prefix or "data") + ".urls.{data__urls_key}".format(**locals()) + "", definition={'type': 'string', 'format': 'url'}, rule='format')
+                if data__urls_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".urls must not contain "+str(data__urls_keys)+" properties", value=data__urls, name="" + (name_prefix or "data") + ".urls", definition={'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, rule='additionalProperties')
+        if "scripts" in data_keys:
+            data_keys.remove("scripts")
+            data__scripts = data["scripts"]
+            validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_entry_point_group(data__scripts, custom_formats, (name_prefix or "data") + ".scripts")
+        if "gui-scripts" in data_keys:
+            data_keys.remove("gui-scripts")
+            data__guiscripts = data["gui-scripts"]
+            validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_entry_point_group(data__guiscripts, custom_formats, (name_prefix or "data") + ".gui-scripts")
+        if "entry-points" in data_keys:
+            data_keys.remove("entry-points")
+            data__entrypoints = data["entry-points"]
+            data__entrypoints_is_dict = isinstance(data__entrypoints, dict)
+            if data__entrypoints_is_dict:
+                data__entrypoints_keys = set(data__entrypoints.keys())
+                for data__entrypoints_key, data__entrypoints_val in data__entrypoints.items():
+                    if REGEX_PATTERNS['^.+$'].search(data__entrypoints_key):
+                        if data__entrypoints_key in data__entrypoints_keys:
+                            data__entrypoints_keys.remove(data__entrypoints_key)
+                        validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_entry_point_group(data__entrypoints_val, custom_formats, (name_prefix or "data") + ".entry-points.{data__entrypoints_key}".format(**locals()))
+                if data__entrypoints_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".entry-points must not contain "+str(data__entrypoints_keys)+" properties", value=data__entrypoints, name="" + (name_prefix or "data") + ".entry-points", definition={'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, rule='additionalProperties')
+                data__entrypoints_len = len(data__entrypoints)
+                if data__entrypoints_len != 0:
+                    data__entrypoints_property_names = True
+                    for data__entrypoints_key in data__entrypoints:
+                        try:
+                            if isinstance(data__entrypoints_key, str):
+                                if not custom_formats["python-entrypoint-group"](data__entrypoints_key):
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".entry-points must be python-entrypoint-group", value=data__entrypoints_key, name="" + (name_prefix or "data") + ".entry-points", definition={'format': 'python-entrypoint-group'}, rule='format')
+                        except JsonSchemaValueException:
+                            data__entrypoints_property_names = False
+                    if not data__entrypoints_property_names:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".entry-points must be named by propertyName definition", value=data__entrypoints, name="" + (name_prefix or "data") + ".entry-points", definition={'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, rule='propertyNames')
+        if "dependencies" in data_keys:
+            data_keys.remove("dependencies")
+            data__dependencies = data["dependencies"]
+            if not isinstance(data__dependencies, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".dependencies must be array", value=data__dependencies, name="" + (name_prefix or "data") + ".dependencies", definition={'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, rule='type')
+            data__dependencies_is_list = isinstance(data__dependencies, (list, tuple))
+            if data__dependencies_is_list:
+                data__dependencies_len = len(data__dependencies)
+                for data__dependencies_x, data__dependencies_item in enumerate(data__dependencies):
+                    validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_dependency(data__dependencies_item, custom_formats, (name_prefix or "data") + ".dependencies[{data__dependencies_x}]".format(**locals()))
+        if "optional-dependencies" in data_keys:
+            data_keys.remove("optional-dependencies")
+            data__optionaldependencies = data["optional-dependencies"]
+            if not isinstance(data__optionaldependencies, (dict)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies must be object", value=data__optionaldependencies, name="" + (name_prefix or "data") + ".optional-dependencies", definition={'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='type')
+            data__optionaldependencies_is_dict = isinstance(data__optionaldependencies, dict)
+            if data__optionaldependencies_is_dict:
+                data__optionaldependencies_keys = set(data__optionaldependencies.keys())
+                for data__optionaldependencies_key, data__optionaldependencies_val in data__optionaldependencies.items():
+                    if REGEX_PATTERNS['^.+$'].search(data__optionaldependencies_key):
+                        if data__optionaldependencies_key in data__optionaldependencies_keys:
+                            data__optionaldependencies_keys.remove(data__optionaldependencies_key)
+                        if not isinstance(data__optionaldependencies_val, (list, tuple)):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies.{data__optionaldependencies_key}".format(**locals()) + " must be array", value=data__optionaldependencies_val, name="" + (name_prefix or "data") + ".optional-dependencies.{data__optionaldependencies_key}".format(**locals()) + "", definition={'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, rule='type')
+                        data__optionaldependencies_val_is_list = isinstance(data__optionaldependencies_val, (list, tuple))
+                        if data__optionaldependencies_val_is_list:
+                            data__optionaldependencies_val_len = len(data__optionaldependencies_val)
+                            for data__optionaldependencies_val_x, data__optionaldependencies_val_item in enumerate(data__optionaldependencies_val):
+                                validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_dependency(data__optionaldependencies_val_item, custom_formats, (name_prefix or "data") + ".optional-dependencies.{data__optionaldependencies_key}[{data__optionaldependencies_val_x}]".format(**locals()))
+                if data__optionaldependencies_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies must not contain "+str(data__optionaldependencies_keys)+" properties", value=data__optionaldependencies, name="" + (name_prefix or "data") + ".optional-dependencies", definition={'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='additionalProperties')
+                data__optionaldependencies_len = len(data__optionaldependencies)
+                if data__optionaldependencies_len != 0:
+                    data__optionaldependencies_property_names = True
+                    for data__optionaldependencies_key in data__optionaldependencies:
+                        try:
+                            if isinstance(data__optionaldependencies_key, str):
+                                if not custom_formats["pep508-identifier"](data__optionaldependencies_key):
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies must be pep508-identifier", value=data__optionaldependencies_key, name="" + (name_prefix or "data") + ".optional-dependencies", definition={'format': 'pep508-identifier'}, rule='format')
+                        except JsonSchemaValueException:
+                            data__optionaldependencies_property_names = False
+                    if not data__optionaldependencies_property_names:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".optional-dependencies must be named by propertyName definition", value=data__optionaldependencies, name="" + (name_prefix or "data") + ".optional-dependencies", definition={'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, rule='propertyNames')
+        if "dynamic" in data_keys:
+            data_keys.remove("dynamic")
+            data__dynamic = data["dynamic"]
+            if not isinstance(data__dynamic, (list, tuple)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must be array", value=data__dynamic, name="" + (name_prefix or "data") + ".dynamic", definition={'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}, rule='type')
+            data__dynamic_is_list = isinstance(data__dynamic, (list, tuple))
+            if data__dynamic_is_list:
+                data__dynamic_len = len(data__dynamic)
+                for data__dynamic_x, data__dynamic_item in enumerate(data__dynamic):
+                    if data__dynamic_item not in ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']:
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic[{data__dynamic_x}]".format(**locals()) + " must be one of ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']", value=data__dynamic_item, name="" + (name_prefix or "data") + ".dynamic[{data__dynamic_x}]".format(**locals()) + "", definition={'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}, rule='enum')
+        if data_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$schema': 'http://json-schema.org/draft-07/schema#', '$id': 'https://packaging.python.org/en/latest/specifications/pyproject-toml/', 'title': 'Package metadata stored in the ``project`` table', '$$description': ['Data structure for the **project** table inside ``pyproject.toml``', '(as initially defined in :pep:`621`)'], 'type': 'object', 'properties': {'name': {'type': 'string', 'description': 'The name (primary identifier) of the project. MUST be statically defined.', 'format': 'pep508-identifier'}, 'version': {'type': 'string', 'description': 'The version of the project as supported by :pep:`440`.', 'format': 'pep440'}, 'description': {'type': 'string', '$$description': ['The `summary description of the project', '`_']}, 'readme': {'$$description': ['`Full/detailed description of the project in the form of a README', '`_', "with meaning similar to the one defined in `core metadata's Description", '`_'], 'oneOf': [{'type': 'string', '$$description': ['Relative path to a text file (UTF-8) containing the full description', 'of the project. If the file path ends in case-insensitive ``.md`` or', '``.rst`` suffixes, then the content-type is respectively', '``text/markdown`` or ``text/x-rst``']}, {'type': 'object', 'allOf': [{'anyOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to a text file containing the full description', 'of the project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', 'description': 'Full text describing the project.'}}, 'required': ['text']}]}, {'properties': {'content-type': {'type': 'string', '$$description': ['Content-type (:rfc:`1341`) of the full description', '(e.g. ``text/markdown``). The ``charset`` parameter is assumed', 'UTF-8 when not present.'], '$comment': 'TODO: add regex pattern or format?'}}, 'required': ['content-type']}]}]}, 'requires-python': {'type': 'string', 'format': 'pep508-versionspec', '$$description': ['`The Python version requirements of the project', '`_.']}, 'license': {'description': '`Project license `_.', 'oneOf': [{'properties': {'file': {'type': 'string', '$$description': ['Relative path to the file (UTF-8) which contains the license for the', 'project.']}}, 'required': ['file']}, {'properties': {'text': {'type': 'string', '$$description': ['The license of the project whose meaning is that of the', '`License field from the core metadata', '`_.']}}, 'required': ['text']}]}, 'authors': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'authors' of the project.", 'The exact meaning is open to interpretation (e.g. original or primary authors,', 'current maintainers, or owners of the package).']}, 'maintainers': {'type': 'array', 'items': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, '$$description': ["The people or organizations considered to be the 'maintainers' of the project.", 'Similarly to ``authors``, the exact meaning is open to interpretation.']}, 'keywords': {'type': 'array', 'items': {'type': 'string'}, 'description': 'List of keywords to assist searching for the distribution in a larger catalog.'}, 'classifiers': {'type': 'array', 'items': {'type': 'string', 'format': 'trove-classifier', 'description': '`PyPI classifier `_.'}, '$$description': ['`Trove classifiers `_', 'which apply to the project.']}, 'urls': {'type': 'object', 'description': 'URLs associated with the project in the form ``label => value``.', 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', 'format': 'url'}}}, 'scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'gui-scripts': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'entry-points': {'$$description': ['Instruct the installer to expose the given modules/functions via', '``entry-point`` discovery mechanism (useful for plugins).', 'More information available in the `Python packaging guide', '`_.'], 'propertyNames': {'format': 'python-entrypoint-group'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}}}, 'dependencies': {'type': 'array', 'description': 'Project (mandatory) dependencies.', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}, 'optional-dependencies': {'type': 'object', 'description': 'Optional dependency for the project', 'propertyNames': {'format': 'pep508-identifier'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'array', 'items': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}}, 'dynamic': {'type': 'array', '$$description': ['Specifies which fields are intentionally unspecified and expected to be', 'dynamically provided by build tools'], 'items': {'enum': ['version', 'description', 'readme', 'requires-python', 'license', 'authors', 'maintainers', 'keywords', 'classifiers', 'urls', 'scripts', 'gui-scripts', 'entry-points', 'dependencies', 'optional-dependencies']}}}, 'required': ['name'], 'additionalProperties': False, 'if': {'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', '    If the core metadata specification lists a field as "Required", then', '    the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', '    The required fields are: Metadata-Version, Name, Version.', '    All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, 'then': {'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, 'definitions': {'author': {'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, 'entry-point-group': {'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, 'dependency': {'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}}}, rule='additionalProperties')
+    try:
+        try:
+            data_is_dict = isinstance(data, dict)
+            if data_is_dict:
+                data__missing_keys = set(['dynamic']) - data.keys()
+                if data__missing_keys:
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain " + (str(sorted(data__missing_keys)) + " properties"), value=data, name="" + (name_prefix or "data") + "", definition={'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, rule='required')
+                data_keys = set(data.keys())
+                if "dynamic" in data_keys:
+                    data_keys.remove("dynamic")
+                    data__dynamic = data["dynamic"]
+                    data__dynamic_is_list = isinstance(data__dynamic, (list, tuple))
+                    if data__dynamic_is_list:
+                        data__dynamic_contains = False
+                        for data__dynamic_key in data__dynamic:
+                            try:
+                                if data__dynamic_key != "version":
+                                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must be same as const definition: version", value=data__dynamic_key, name="" + (name_prefix or "data") + ".dynamic", definition={'const': 'version'}, rule='const')
+                                data__dynamic_contains = True
+                                break
+                            except JsonSchemaValueException: pass
+                        if not data__dynamic_contains:
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + ".dynamic must contain one of contains definition", value=data__dynamic, name="" + (name_prefix or "data") + ".dynamic", definition={'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}, rule='contains')
+        except JsonSchemaValueException: pass
+        else:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must NOT match a disallowed definition", value=data, name="" + (name_prefix or "data") + "", definition={'not': {'required': ['dynamic'], 'properties': {'dynamic': {'contains': {'const': 'version'}, '$$description': ['version is listed in ``dynamic``']}}}, '$$comment': ['According to :pep:`621`:', '    If the core metadata specification lists a field as "Required", then', '    the metadata MUST specify the field statically or list it in dynamic', 'In turn, `core metadata`_ defines:', '    The required fields are: Metadata-Version, Name, Version.', '    All the other fields are optional.', 'Since ``Metadata-Version`` is defined by the build back-end, ``name`` and', '``version`` are the only mandatory information in ``pyproject.toml``.', '.. _core metadata: https://packaging.python.org/specifications/core-metadata/']}, rule='not')
+    except JsonSchemaValueException:
+        pass
+    else:
+        data_is_dict = isinstance(data, dict)
+        if data_is_dict:
+            data__missing_keys = set(['version']) - data.keys()
+            if data__missing_keys:
+                raise JsonSchemaValueException("" + (name_prefix or "data") + " must contain " + (str(sorted(data__missing_keys)) + " properties"), value=data, name="" + (name_prefix or "data") + "", definition={'required': ['version'], '$$description': ['version should be statically defined in the ``version`` field']}, rule='required')
+    return data
+
+def validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_dependency(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (str)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be string", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}, rule='type')
+    if isinstance(data, str):
+        if not custom_formats["pep508"](data):
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must be pep508", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/dependency', 'title': 'Dependency', 'type': 'string', 'description': 'Project dependency specification according to PEP 508', 'format': 'pep508'}, rule='format')
+    return data
+
+def validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_entry_point_group(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data_keys = set(data.keys())
+        for data_key, data_val in data.items():
+            if REGEX_PATTERNS['^.+$'].search(data_key):
+                if data_key in data_keys:
+                    data_keys.remove(data_key)
+                if not isinstance(data_val, (str)):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".{data_key}".format(**locals()) + " must be string", value=data_val, name="" + (name_prefix or "data") + ".{data_key}".format(**locals()) + "", definition={'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}, rule='type')
+                if isinstance(data_val, str):
+                    if not custom_formats["python-entrypoint-reference"](data_val):
+                        raise JsonSchemaValueException("" + (name_prefix or "data") + ".{data_key}".format(**locals()) + " must be python-entrypoint-reference", value=data_val, name="" + (name_prefix or "data") + ".{data_key}".format(**locals()) + "", definition={'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}, rule='format')
+        if data_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, rule='additionalProperties')
+        data_len = len(data)
+        if data_len != 0:
+            data_property_names = True
+            for data_key in data:
+                try:
+                    if isinstance(data_key, str):
+                        if not custom_formats["python-entrypoint-name"](data_key):
+                            raise JsonSchemaValueException("" + (name_prefix or "data") + " must be python-entrypoint-name", value=data_key, name="" + (name_prefix or "data") + "", definition={'format': 'python-entrypoint-name'}, rule='format')
+                except JsonSchemaValueException:
+                    data_property_names = False
+            if not data_property_names:
+                raise JsonSchemaValueException("" + (name_prefix or "data") + " must be named by propertyName definition", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/entry-point-group', 'title': 'Entry-points', 'type': 'object', '$$description': ['Entry-points are grouped together to indicate what sort of capabilities they', 'provide.', 'See the `packaging guides', '`_', 'and `setuptools docs', '`_', 'for more information.'], 'propertyNames': {'format': 'python-entrypoint-name'}, 'additionalProperties': False, 'patternProperties': {'^.+$': {'type': 'string', '$$description': ['Reference to a Python object. It is either in the form', '``importable.module``, or ``importable.module:object.attr``.'], 'format': 'python-entrypoint-reference', '$comment': 'https://packaging.python.org/specifications/entry-points/'}}}, rule='propertyNames')
+    return data
+
+def validate_https___packaging_python_org_en_latest_specifications_pyproject_toml___definitions_author(data, custom_formats={}, name_prefix=None):
+    if not isinstance(data, (dict)):
+        raise JsonSchemaValueException("" + (name_prefix or "data") + " must be object", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, rule='type')
+    data_is_dict = isinstance(data, dict)
+    if data_is_dict:
+        data_keys = set(data.keys())
+        if "name" in data_keys:
+            data_keys.remove("name")
+            data__name = data["name"]
+            if not isinstance(data__name, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".name must be string", value=data__name, name="" + (name_prefix or "data") + ".name", definition={'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, rule='type')
+        if "email" in data_keys:
+            data_keys.remove("email")
+            data__email = data["email"]
+            if not isinstance(data__email, (str)):
+                raise JsonSchemaValueException("" + (name_prefix or "data") + ".email must be string", value=data__email, name="" + (name_prefix or "data") + ".email", definition={'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}, rule='type')
+            if isinstance(data__email, str):
+                if not REGEX_PATTERNS["idn-email_re_pattern"].match(data__email):
+                    raise JsonSchemaValueException("" + (name_prefix or "data") + ".email must be idn-email", value=data__email, name="" + (name_prefix or "data") + ".email", definition={'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}, rule='format')
+        if data_keys:
+            raise JsonSchemaValueException("" + (name_prefix or "data") + " must not contain "+str(data_keys)+" properties", value=data, name="" + (name_prefix or "data") + "", definition={'$id': '#/definitions/author', 'title': 'Author or Maintainer', '$comment': 'https://peps.python.org/pep-0621/#authors-maintainers', 'type': 'object', 'additionalProperties': False, 'properties': {'name': {'type': 'string', '$$description': ['MUST be a valid email name, i.e. whatever can be put as a name, before an', 'email, in :rfc:`822`.']}, 'email': {'type': 'string', 'format': 'idn-email', 'description': 'MUST be a valid email address'}}}, rule='additionalProperties')
+    return data
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/formats.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/formats.py
new file mode 100644
index 0000000000000000000000000000000000000000..aacf4092b02b73738e8e5b93f33e362daaac2b63
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/_validate_pyproject/formats.py
@@ -0,0 +1,354 @@
+"""
+The functions in this module are used to validate schemas with the
+`format JSON Schema keyword
+`_.
+
+The correspondence is given by replacing the ``_`` character in the name of the
+function with a ``-`` to obtain the format name and vice versa.
+"""
+
+import builtins
+import logging
+import os
+import re
+import string
+import typing
+from itertools import chain as _chain
+
+if typing.TYPE_CHECKING:
+    from typing_extensions import Literal
+
+_logger = logging.getLogger(__name__)
+
+# -------------------------------------------------------------------------------------
+# PEP 440
+
+VERSION_PATTERN = r"""
+    v?
+    (?:
+        (?:(?P[0-9]+)!)?                           # epoch
+        (?P[0-9]+(?:\.[0-9]+)*)                  # release segment
+        (?P
                                          # pre-release
+            [-_\.]?
+            (?Palpha|a|beta|b|preview|pre|c|rc)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+VERSION_REGEX = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.X | re.I)
+
+
+def pep440(version: str) -> bool:
+    """See :ref:`PyPA's version specification `
+    (initially introduced in :pep:`440`).
+    """
+    return VERSION_REGEX.match(version) is not None
+
+
+# -------------------------------------------------------------------------------------
+# PEP 508
+
+PEP508_IDENTIFIER_PATTERN = r"([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])"
+PEP508_IDENTIFIER_REGEX = re.compile(f"^{PEP508_IDENTIFIER_PATTERN}$", re.I)
+
+
+def pep508_identifier(name: str) -> bool:
+    """See :ref:`PyPA's name specification `
+    (initially introduced in :pep:`508#names`).
+    """
+    return PEP508_IDENTIFIER_REGEX.match(name) is not None
+
+
+try:
+    try:
+        from packaging import requirements as _req
+    except ImportError:  # pragma: no cover
+        # let's try setuptools vendored version
+        from setuptools._vendor.packaging import requirements as _req  # type: ignore
+
+    def pep508(value: str) -> bool:
+        """See :ref:`PyPA's dependency specifiers `
+        (initially introduced in :pep:`508`).
+        """
+        try:
+            _req.Requirement(value)
+        except _req.InvalidRequirement:
+            return False
+        return True
+
+except ImportError:  # pragma: no cover
+    _logger.warning(
+        "Could not find an installation of `packaging`. Requirements, dependencies and "
+        "versions might not be validated. "
+        "To enforce validation, please install `packaging`."
+    )
+
+    def pep508(value: str) -> bool:
+        return True
+
+
+def pep508_versionspec(value: str) -> bool:
+    """Expression that can be used to specify/lock versions (including ranges)
+    See ``versionspec`` in :ref:`PyPA's dependency specifiers
+    ` (initially introduced in :pep:`508`).
+    """
+    if any(c in value for c in (";", "]", "@")):
+        # In PEP 508:
+        # conditional markers, extras and URL specs are not included in the
+        # versionspec
+        return False
+    # Let's pretend we have a dependency called `requirement` with the given
+    # version spec, then we can reuse the pep508 function for validation:
+    return pep508(f"requirement{value}")
+
+
+# -------------------------------------------------------------------------------------
+# PEP 517
+
+
+def pep517_backend_reference(value: str) -> bool:
+    """See PyPA's specification for defining build-backend references
+    introduced in :pep:`517#source-trees`.
+
+    This is similar to an entry-point reference (e.g., ``package.module:object``).
+    """
+    module, _, obj = value.partition(":")
+    identifiers = (i.strip() for i in _chain(module.split("."), obj.split(".")))
+    return all(python_identifier(i) for i in identifiers if i)
+
+
+# -------------------------------------------------------------------------------------
+# Classifiers - PEP 301
+
+
+def _download_classifiers() -> str:
+    import ssl
+    from email.message import Message
+    from urllib.request import urlopen
+
+    url = "https://pypi.org/pypi?:action=list_classifiers"
+    context = ssl.create_default_context()
+    with urlopen(url, context=context) as response:  # noqa: S310 (audit URLs)
+        headers = Message()
+        headers["content_type"] = response.getheader("content-type", "text/plain")
+        return response.read().decode(headers.get_param("charset", "utf-8"))  # type: ignore[no-any-return]
+
+
+class _TroveClassifier:
+    """The ``trove_classifiers`` package is the official way of validating classifiers,
+    however this package might not be always available.
+    As a workaround we can still download a list from PyPI.
+    We also don't want to be over strict about it, so simply skipping silently is an
+    option (classifiers will be validated anyway during the upload to PyPI).
+    """
+
+    downloaded: typing.Union[None, "Literal[False]", typing.Set[str]]
+
+    def __init__(self) -> None:
+        self.downloaded = None
+        self._skip_download = False
+        # None => not cached yet
+        # False => cache not available
+        self.__name__ = "trove_classifier"  # Emulate a public function
+
+    def _disable_download(self) -> None:
+        # This is a private API. Only setuptools has the consent of using it.
+        self._skip_download = True
+
+    def __call__(self, value: str) -> bool:
+        if self.downloaded is False or self._skip_download is True:
+            return True
+
+        if os.getenv("NO_NETWORK") or os.getenv("VALIDATE_PYPROJECT_NO_NETWORK"):
+            self.downloaded = False
+            msg = (
+                "Install ``trove-classifiers`` to ensure proper validation. "
+                "Skipping download of classifiers list from PyPI (NO_NETWORK)."
+            )
+            _logger.debug(msg)
+            return True
+
+        if self.downloaded is None:
+            msg = (
+                "Install ``trove-classifiers`` to ensure proper validation. "
+                "Meanwhile a list of classifiers will be downloaded from PyPI."
+            )
+            _logger.debug(msg)
+            try:
+                self.downloaded = set(_download_classifiers().splitlines())
+            except Exception:
+                self.downloaded = False
+                _logger.debug("Problem with download, skipping validation")
+                return True
+
+        return value in self.downloaded or value.lower().startswith("private ::")
+
+
+try:
+    from trove_classifiers import classifiers as _trove_classifiers
+
+    def trove_classifier(value: str) -> bool:
+        """See https://pypi.org/classifiers/"""
+        return value in _trove_classifiers or value.lower().startswith("private ::")
+
+except ImportError:  # pragma: no cover
+    trove_classifier = _TroveClassifier()
+
+
+# -------------------------------------------------------------------------------------
+# Stub packages - PEP 561
+
+
+def pep561_stub_name(value: str) -> bool:
+    """Name of a directory containing type stubs.
+    It must follow the name scheme ``-stubs`` as defined in
+    :pep:`561#stub-only-packages`.
+    """
+    top, *children = value.split(".")
+    if not top.endswith("-stubs"):
+        return False
+    return python_module_name(".".join([top[: -len("-stubs")], *children]))
+
+
+# -------------------------------------------------------------------------------------
+# Non-PEP related
+
+
+def url(value: str) -> bool:
+    """Valid URL (validation uses :obj:`urllib.parse`).
+    For maximum compatibility please make sure to include a ``scheme`` prefix
+    in your URL (e.g. ``http://``).
+    """
+    from urllib.parse import urlparse
+
+    try:
+        parts = urlparse(value)
+        if not parts.scheme:
+            _logger.warning(
+                "For maximum compatibility please make sure to include a "
+                "`scheme` prefix in your URL (e.g. 'http://'). "
+                f"Given value: {value}"
+            )
+            if not (value.startswith("/") or value.startswith("\\") or "@" in value):
+                parts = urlparse(f"http://{value}")
+
+        return bool(parts.scheme and parts.netloc)
+    except Exception:
+        return False
+
+
+# https://packaging.python.org/specifications/entry-points/
+ENTRYPOINT_PATTERN = r"[^\[\s=]([^=]*[^\s=])?"
+ENTRYPOINT_REGEX = re.compile(f"^{ENTRYPOINT_PATTERN}$", re.I)
+RECOMMEDED_ENTRYPOINT_PATTERN = r"[\w.-]+"
+RECOMMEDED_ENTRYPOINT_REGEX = re.compile(f"^{RECOMMEDED_ENTRYPOINT_PATTERN}$", re.I)
+ENTRYPOINT_GROUP_PATTERN = r"\w+(\.\w+)*"
+ENTRYPOINT_GROUP_REGEX = re.compile(f"^{ENTRYPOINT_GROUP_PATTERN}$", re.I)
+
+
+def python_identifier(value: str) -> bool:
+    """Can be used as identifier in Python.
+    (Validation uses :obj:`str.isidentifier`).
+    """
+    return value.isidentifier()
+
+
+def python_qualified_identifier(value: str) -> bool:
+    """
+    Python "dotted identifier", i.e. a sequence of :obj:`python_identifier`
+    concatenated with ``"."`` (e.g.: ``package.module.submodule``).
+    """
+    if value.startswith(".") or value.endswith("."):
+        return False
+    return all(python_identifier(m) for m in value.split("."))
+
+
+def python_module_name(value: str) -> bool:
+    """Module name that can be used in an ``import``-statement in Python.
+    See :obj:`python_qualified_identifier`.
+    """
+    return python_qualified_identifier(value)
+
+
+def python_entrypoint_group(value: str) -> bool:
+    """See ``Data model > group`` in the :ref:`PyPA's entry-points specification
+    `.
+    """
+    return ENTRYPOINT_GROUP_REGEX.match(value) is not None
+
+
+def python_entrypoint_name(value: str) -> bool:
+    """See ``Data model > name`` in the :ref:`PyPA's entry-points specification
+    `.
+    """
+    if not ENTRYPOINT_REGEX.match(value):
+        return False
+    if not RECOMMEDED_ENTRYPOINT_REGEX.match(value):
+        msg = f"Entry point `{value}` does not follow recommended pattern: "
+        msg += RECOMMEDED_ENTRYPOINT_PATTERN
+        _logger.warning(msg)
+    return True
+
+
+def python_entrypoint_reference(value: str) -> bool:
+    """Reference to a Python object using in the format::
+
+        importable.module:object.attr
+
+    See ``Data model >object reference`` in the :ref:`PyPA's entry-points specification
+    `.
+    """
+    module, _, rest = value.partition(":")
+    if "[" in rest:
+        obj, _, extras_ = rest.partition("[")
+        if extras_.strip()[-1] != "]":
+            return False
+        extras = (x.strip() for x in extras_.strip(string.whitespace + "[]").split(","))
+        if not all(pep508_identifier(e) for e in extras):
+            return False
+        _logger.warning(f"`{value}` - using extras for entry points is not recommended")
+    else:
+        obj = rest
+
+    module_parts = module.split(".")
+    identifiers = _chain(module_parts, obj.split(".")) if rest else module_parts
+    return all(python_identifier(i.strip()) for i in identifiers)
+
+
+def uint8(value: builtins.int) -> bool:
+    r"""Unsigned 8-bit integer (:math:`0 \leq x < 2^8`)"""
+    return 0 <= value < 2**8
+
+
+def uint16(value: builtins.int) -> bool:
+    r"""Unsigned 16-bit integer (:math:`0 \leq x < 2^{16}`)"""
+    return 0 <= value < 2**16
+
+
+def uint(value: builtins.int) -> bool:
+    r"""Unsigned 64-bit integer (:math:`0 \leq x < 2^{64}`)"""
+    return 0 <= value < 2**64
+
+
+def int(value: builtins.int) -> bool:
+    r"""Signed 64-bit integer (:math:`-2^{63} \leq x < 2^{63}`)"""
+    return -(2**63) <= value < 2**63
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/expand.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/expand.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5f5dc586e84039b36e14e06dc857b8bb84592e8
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/expand.py
@@ -0,0 +1,442 @@
+"""Utility functions to expand configuration directives or special values
+(such glob patterns).
+
+We can split the process of interpreting configuration files into 2 steps:
+
+1. The parsing the file contents from strings to value objects
+   that can be understand by Python (for example a string with a comma
+   separated list of keywords into an actual Python list of strings).
+
+2. The expansion (or post-processing) of these values according to the
+   semantics ``setuptools`` assign to them (for example a configuration field
+   with the ``file:`` directive should be expanded from a list of file paths to
+   a single string with the contents of those files concatenated)
+
+This module focus on the second step, and therefore allow sharing the expansion
+functions among several configuration file formats.
+
+**PRIVATE MODULE**: API reserved for setuptools internal usage only.
+"""
+
+from __future__ import annotations
+
+import ast
+import importlib
+import os
+import pathlib
+import sys
+from glob import iglob
+from configparser import ConfigParser
+from importlib.machinery import ModuleSpec, all_suffixes
+from itertools import chain
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Iterable,
+    Iterator,
+    Mapping,
+    TypeVar,
+)
+from pathlib import Path
+from types import ModuleType
+
+from distutils.errors import DistutilsOptionError
+
+from .._path import same_path as _same_path, StrPath
+from ..discovery import find_package_path
+from ..warnings import SetuptoolsWarning
+
+if TYPE_CHECKING:
+    from setuptools.dist import Distribution
+
+_K = TypeVar("_K")
+_V = TypeVar("_V", covariant=True)
+
+
+class StaticModule:
+    """Proxy to a module object that avoids executing arbitrary code."""
+
+    def __init__(self, name: str, spec: ModuleSpec):
+        module = ast.parse(pathlib.Path(spec.origin).read_bytes())  # type: ignore[arg-type] # Let it raise an error on None
+        vars(self).update(locals())
+        del self.self
+
+    def _find_assignments(self) -> Iterator[tuple[ast.AST, ast.AST]]:
+        for statement in self.module.body:
+            if isinstance(statement, ast.Assign):
+                yield from ((target, statement.value) for target in statement.targets)
+            elif isinstance(statement, ast.AnnAssign) and statement.value:
+                yield (statement.target, statement.value)
+
+    def __getattr__(self, attr):
+        """Attempt to load an attribute "statically", via :func:`ast.literal_eval`."""
+        try:
+            return next(
+                ast.literal_eval(value)
+                for target, value in self._find_assignments()
+                if isinstance(target, ast.Name) and target.id == attr
+            )
+        except Exception as e:
+            raise AttributeError(f"{self.name} has no attribute {attr}") from e
+
+
+def glob_relative(
+    patterns: Iterable[str], root_dir: StrPath | None = None
+) -> list[str]:
+    """Expand the list of glob patterns, but preserving relative paths.
+
+    :param list[str] patterns: List of glob patterns
+    :param str root_dir: Path to which globs should be relative
+                         (current directory by default)
+    :rtype: list
+    """
+    glob_characters = {'*', '?', '[', ']', '{', '}'}
+    expanded_values = []
+    root_dir = root_dir or os.getcwd()
+    for value in patterns:
+        # Has globby characters?
+        if any(char in value for char in glob_characters):
+            # then expand the glob pattern while keeping paths *relative*:
+            glob_path = os.path.abspath(os.path.join(root_dir, value))
+            expanded_values.extend(
+                sorted(
+                    os.path.relpath(path, root_dir).replace(os.sep, "/")
+                    for path in iglob(glob_path, recursive=True)
+                )
+            )
+
+        else:
+            # take the value as-is
+            path = os.path.relpath(value, root_dir).replace(os.sep, "/")
+            expanded_values.append(path)
+
+    return expanded_values
+
+
+def read_files(
+    filepaths: StrPath | Iterable[StrPath], root_dir: StrPath | None = None
+) -> str:
+    """Return the content of the files concatenated using ``\n`` as str
+
+    This function is sandboxed and won't reach anything outside ``root_dir``
+
+    (By default ``root_dir`` is the current directory).
+    """
+    from setuptools.extern.more_itertools import always_iterable
+
+    root_dir = os.path.abspath(root_dir or os.getcwd())
+    _filepaths = (os.path.join(root_dir, path) for path in always_iterable(filepaths))
+    return '\n'.join(
+        _read_file(path)
+        for path in _filter_existing_files(_filepaths)
+        if _assert_local(path, root_dir)
+    )
+
+
+def _filter_existing_files(filepaths: Iterable[StrPath]) -> Iterator[StrPath]:
+    for path in filepaths:
+        if os.path.isfile(path):
+            yield path
+        else:
+            SetuptoolsWarning.emit(f"File {path!r} cannot be found")
+
+
+def _read_file(filepath: bytes | StrPath) -> str:
+    with open(filepath, encoding='utf-8') as f:
+        return f.read()
+
+
+def _assert_local(filepath: StrPath, root_dir: str):
+    if Path(os.path.abspath(root_dir)) not in Path(os.path.abspath(filepath)).parents:
+        msg = f"Cannot access {filepath!r} (or anything outside {root_dir!r})"
+        raise DistutilsOptionError(msg)
+
+    return True
+
+
+def read_attr(
+    attr_desc: str,
+    package_dir: Mapping[str, str] | None = None,
+    root_dir: StrPath | None = None,
+):
+    """Reads the value of an attribute from a module.
+
+    This function will try to read the attributed statically first
+    (via :func:`ast.literal_eval`), and only evaluate the module if it fails.
+
+    Examples:
+        read_attr("package.attr")
+        read_attr("package.module.attr")
+
+    :param str attr_desc: Dot-separated string describing how to reach the
+        attribute (see examples above)
+    :param dict[str, str] package_dir: Mapping of package names to their
+        location in disk (represented by paths relative to ``root_dir``).
+    :param str root_dir: Path to directory containing all the packages in
+        ``package_dir`` (current directory by default).
+    :rtype: str
+    """
+    root_dir = root_dir or os.getcwd()
+    attrs_path = attr_desc.strip().split('.')
+    attr_name = attrs_path.pop()
+    module_name = '.'.join(attrs_path)
+    module_name = module_name or '__init__'
+    path = _find_module(module_name, package_dir, root_dir)
+    spec = _find_spec(module_name, path)
+
+    try:
+        return getattr(StaticModule(module_name, spec), attr_name)
+    except Exception:
+        # fallback to evaluate module
+        module = _load_spec(spec, module_name)
+        return getattr(module, attr_name)
+
+
+def _find_spec(module_name: str, module_path: StrPath | None) -> ModuleSpec:
+    spec = importlib.util.spec_from_file_location(module_name, module_path)
+    spec = spec or importlib.util.find_spec(module_name)
+
+    if spec is None:
+        raise ModuleNotFoundError(module_name)
+
+    return spec
+
+
+def _load_spec(spec: ModuleSpec, module_name: str) -> ModuleType:
+    name = getattr(spec, "__name__", module_name)
+    if name in sys.modules:
+        return sys.modules[name]
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[name] = module  # cache (it also ensures `==` works on loaded items)
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+
+
+def _find_module(
+    module_name: str, package_dir: Mapping[str, str] | None, root_dir: StrPath
+) -> str | None:
+    """Find the path to the module named ``module_name``,
+    considering the ``package_dir`` in the build configuration and ``root_dir``.
+
+    >>> tmp = getfixture('tmpdir')
+    >>> _ = tmp.ensure("a/b/c.py")
+    >>> _ = tmp.ensure("a/b/d/__init__.py")
+    >>> r = lambda x: x.replace(str(tmp), "tmp").replace(os.sep, "/")
+    >>> r(_find_module("a.b.c", None, tmp))
+    'tmp/a/b/c.py'
+    >>> r(_find_module("f.g.h", {"": "1", "f": "2", "f.g": "3", "f.g.h": "a/b/d"}, tmp))
+    'tmp/a/b/d/__init__.py'
+    """
+    path_start = find_package_path(module_name, package_dir or {}, root_dir)
+    candidates = chain.from_iterable(
+        (f"{path_start}{ext}", os.path.join(path_start, f"__init__{ext}"))
+        for ext in all_suffixes()
+    )
+    return next((x for x in candidates if os.path.isfile(x)), None)
+
+
+def resolve_class(
+    qualified_class_name: str,
+    package_dir: Mapping[str, str] | None = None,
+    root_dir: StrPath | None = None,
+) -> Callable:
+    """Given a qualified class name, return the associated class object"""
+    root_dir = root_dir or os.getcwd()
+    idx = qualified_class_name.rfind('.')
+    class_name = qualified_class_name[idx + 1 :]
+    pkg_name = qualified_class_name[:idx]
+
+    path = _find_module(pkg_name, package_dir, root_dir)
+    module = _load_spec(_find_spec(pkg_name, path), pkg_name)
+    return getattr(module, class_name)
+
+
+def cmdclass(
+    values: dict[str, str],
+    package_dir: Mapping[str, str] | None = None,
+    root_dir: StrPath | None = None,
+) -> dict[str, Callable]:
+    """Given a dictionary mapping command names to strings for qualified class
+    names, apply :func:`resolve_class` to the dict values.
+    """
+    return {k: resolve_class(v, package_dir, root_dir) for k, v in values.items()}
+
+
+def find_packages(
+    *,
+    namespaces=True,
+    fill_package_dir: dict[str, str] | None = None,
+    root_dir: StrPath | None = None,
+    **kwargs,
+) -> list[str]:
+    """Works similarly to :func:`setuptools.find_packages`, but with all
+    arguments given as keyword arguments. Moreover, ``where`` can be given
+    as a list (the results will be simply concatenated).
+
+    When the additional keyword argument ``namespaces`` is ``True``, it will
+    behave like :func:`setuptools.find_namespace_packages`` (i.e. include
+    implicit namespaces as per :pep:`420`).
+
+    The ``where`` argument will be considered relative to ``root_dir`` (or the current
+    working directory when ``root_dir`` is not given).
+
+    If the ``fill_package_dir`` argument is passed, this function will consider it as a
+    similar data structure to the ``package_dir`` configuration parameter add fill-in
+    any missing package location.
+
+    :rtype: list
+    """
+    from setuptools.discovery import construct_package_dir
+    from setuptools.extern.more_itertools import unique_everseen, always_iterable
+
+    if namespaces:
+        from setuptools.discovery import PEP420PackageFinder as PackageFinder
+    else:
+        from setuptools.discovery import PackageFinder  # type: ignore
+
+    root_dir = root_dir or os.curdir
+    where = kwargs.pop('where', ['.'])
+    packages: list[str] = []
+    fill_package_dir = {} if fill_package_dir is None else fill_package_dir
+    search = list(unique_everseen(always_iterable(where)))
+
+    if len(search) == 1 and all(not _same_path(search[0], x) for x in (".", root_dir)):
+        fill_package_dir.setdefault("", search[0])
+
+    for path in search:
+        package_path = _nest_path(root_dir, path)
+        pkgs = PackageFinder.find(package_path, **kwargs)
+        packages.extend(pkgs)
+        if pkgs and not (
+            fill_package_dir.get("") == path or os.path.samefile(package_path, root_dir)
+        ):
+            fill_package_dir.update(construct_package_dir(pkgs, path))
+
+    return packages
+
+
+def _nest_path(parent: StrPath, path: StrPath) -> str:
+    path = parent if path in {".", ""} else os.path.join(parent, path)
+    return os.path.normpath(path)
+
+
+def version(value: Callable | Iterable[str | int] | str) -> str:
+    """When getting the version directly from an attribute,
+    it should be normalised to string.
+    """
+    _value = value() if callable(value) else value
+
+    if isinstance(_value, str):
+        return _value
+    if hasattr(_value, '__iter__'):
+        return '.'.join(map(str, _value))
+    return '%s' % _value
+
+
+def canonic_package_data(package_data: dict) -> dict:
+    if "*" in package_data:
+        package_data[""] = package_data.pop("*")
+    return package_data
+
+
+def canonic_data_files(
+    data_files: list | dict, root_dir: StrPath | None = None
+) -> list[tuple[str, list[str]]]:
+    """For compatibility with ``setup.py``, ``data_files`` should be a list
+    of pairs instead of a dict.
+
+    This function also expands glob patterns.
+    """
+    if isinstance(data_files, list):
+        return data_files
+
+    return [
+        (dest, glob_relative(patterns, root_dir))
+        for dest, patterns in data_files.items()
+    ]
+
+
+def entry_points(text: str, text_source="entry-points") -> dict[str, dict]:
+    """Given the contents of entry-points file,
+    process it into a 2-level dictionary (``dict[str, dict[str, str]]``).
+    The first level keys are entry-point groups, the second level keys are
+    entry-point names, and the second level values are references to objects
+    (that correspond to the entry-point value).
+    """
+    parser = ConfigParser(default_section=None, delimiters=("=",))  # type: ignore
+    parser.optionxform = str  # case sensitive
+    parser.read_string(text, text_source)
+    groups = {k: dict(v.items()) for k, v in parser.items()}
+    groups.pop(parser.default_section, None)
+    return groups
+
+
+class EnsurePackagesDiscovered:
+    """Some expand functions require all the packages to already be discovered before
+    they run, e.g. :func:`read_attr`, :func:`resolve_class`, :func:`cmdclass`.
+
+    Therefore in some cases we will need to run autodiscovery during the evaluation of
+    the configuration. However, it is better to postpone calling package discovery as
+    much as possible, because some parameters can influence it (e.g. ``package_dir``),
+    and those might not have been processed yet.
+    """
+
+    def __init__(self, distribution: Distribution):
+        self._dist = distribution
+        self._called = False
+
+    def __call__(self):
+        """Trigger the automatic package discovery, if it is still necessary."""
+        if not self._called:
+            self._called = True
+            self._dist.set_defaults(name=False)  # Skip name, we can still be parsing
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, _exc_type, _exc_value, _traceback):
+        if self._called:
+            self._dist.set_defaults.analyse_name()  # Now we can set a default name
+
+    def _get_package_dir(self) -> Mapping[str, str]:
+        self()
+        pkg_dir = self._dist.package_dir
+        return {} if pkg_dir is None else pkg_dir
+
+    @property
+    def package_dir(self) -> Mapping[str, str]:
+        """Proxy to ``package_dir`` that may trigger auto-discovery when used."""
+        return LazyMappingProxy(self._get_package_dir)
+
+
+class LazyMappingProxy(Mapping[_K, _V]):
+    """Mapping proxy that delays resolving the target object, until really needed.
+
+    >>> def obtain_mapping():
+    ...     print("Running expensive function!")
+    ...     return {"key": "value", "other key": "other value"}
+    >>> mapping = LazyMappingProxy(obtain_mapping)
+    >>> mapping["key"]
+    Running expensive function!
+    'value'
+    >>> mapping["other key"]
+    'other value'
+    """
+
+    def __init__(self, obtain_mapping_value: Callable[[], Mapping[_K, _V]]):
+        self._obtain = obtain_mapping_value
+        self._value: Mapping[_K, _V] | None = None
+
+    def _target(self) -> Mapping[_K, _V]:
+        if self._value is None:
+            self._value = self._obtain()
+        return self._value
+
+    def __getitem__(self, key: _K) -> _V:
+        return self._target()[key]
+
+    def __len__(self) -> int:
+        return len(self._target())
+
+    def __iter__(self) -> Iterator[_K]:
+        return iter(self._target())
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/pyprojecttoml.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/pyprojecttoml.py
new file mode 100644
index 0000000000000000000000000000000000000000..d41c956cbd49f0b8e3d7007c23c0a207fa0d39be
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/pyprojecttoml.py
@@ -0,0 +1,453 @@
+"""
+Load setuptools configuration from ``pyproject.toml`` files.
+
+**PRIVATE MODULE**: API reserved for setuptools internal usage only.
+
+To read project metadata, consider using
+``build.util.project_wheel_metadata`` (https://pypi.org/project/build/).
+For simple scenarios, you can also try parsing the file directly
+with the help of ``tomllib`` or ``tomli``.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from contextlib import contextmanager
+from functools import partial
+from typing import TYPE_CHECKING, Callable, Mapping
+
+from .._path import StrPath
+from ..errors import FileError, InvalidConfigError
+from ..warnings import SetuptoolsWarning
+from . import expand as _expand
+from ._apply_pyprojecttoml import _PREVIOUSLY_DEFINED, _MissingDynamic
+from ._apply_pyprojecttoml import apply as _apply
+
+if TYPE_CHECKING:
+    from setuptools.dist import Distribution
+    from typing_extensions import Self
+
+_logger = logging.getLogger(__name__)
+
+
+def load_file(filepath: StrPath) -> dict:
+    from ..compat.py310 import tomllib
+
+    with open(filepath, "rb") as file:
+        return tomllib.load(file)
+
+
+def validate(config: dict, filepath: StrPath) -> bool:
+    from . import _validate_pyproject as validator
+
+    trove_classifier = validator.FORMAT_FUNCTIONS.get("trove-classifier")
+    if hasattr(trove_classifier, "_disable_download"):
+        # Improve reproducibility by default. See issue 31 for validate-pyproject.
+        trove_classifier._disable_download()  # type: ignore
+
+    try:
+        return validator.validate(config)
+    except validator.ValidationError as ex:
+        summary = f"configuration error: {ex.summary}"
+        if ex.name.strip("`") != "project":
+            # Probably it is just a field missing/misnamed, not worthy the verbosity...
+            _logger.debug(summary)
+            _logger.debug(ex.details)
+
+        error = f"invalid pyproject.toml config: {ex.name}."
+        raise ValueError(f"{error}\n{summary}") from None
+
+
+def apply_configuration(
+    dist: Distribution,
+    filepath: StrPath,
+    ignore_option_errors=False,
+) -> Distribution:
+    """Apply the configuration from a ``pyproject.toml`` file into an existing
+    distribution object.
+    """
+    config = read_configuration(filepath, True, ignore_option_errors, dist)
+    return _apply(dist, config, filepath)
+
+
+def read_configuration(
+    filepath: StrPath,
+    expand=True,
+    ignore_option_errors=False,
+    dist: Distribution | None = None,
+):
+    """Read given configuration file and returns options from it as a dict.
+
+    :param str|unicode filepath: Path to configuration file in the ``pyproject.toml``
+        format.
+
+    :param bool expand: Whether to expand directives and other computed values
+        (i.e. post-process the given configuration)
+
+    :param bool ignore_option_errors: Whether to silently ignore
+        options, values of which could not be resolved (e.g. due to exceptions
+        in directives such as file:, attr:, etc.).
+        If False exceptions are propagated as expected.
+
+    :param Distribution|None: Distribution object to which the configuration refers.
+        If not given a dummy object will be created and discarded after the
+        configuration is read. This is used for auto-discovery of packages and in the
+        case a dynamic configuration (e.g. ``attr`` or ``cmdclass``) is expanded.
+        When ``expand=False`` this object is simply ignored.
+
+    :rtype: dict
+    """
+    filepath = os.path.abspath(filepath)
+
+    if not os.path.isfile(filepath):
+        raise FileError(f"Configuration file {filepath!r} does not exist.")
+
+    asdict = load_file(filepath) or {}
+    project_table = asdict.get("project", {})
+    tool_table = asdict.get("tool", {})
+    setuptools_table = tool_table.get("setuptools", {})
+    if not asdict or not (project_table or setuptools_table):
+        return {}  # User is not using pyproject to configure setuptools
+
+    if "setuptools" in asdict.get("tools", {}):
+        # let the user know they probably have a typo in their metadata
+        _ToolsTypoInMetadata.emit()
+
+    if "distutils" in tool_table:
+        _ExperimentalConfiguration.emit(subject="[tool.distutils]")
+
+    # There is an overall sense in the community that making include_package_data=True
+    # the default would be an improvement.
+    # `ini2toml` backfills include_package_data=False when nothing is explicitly given,
+    # therefore setting a default here is backwards compatible.
+    if dist and getattr(dist, "include_package_data", None) is not None:
+        setuptools_table.setdefault("include-package-data", dist.include_package_data)
+    else:
+        setuptools_table.setdefault("include-package-data", True)
+    # Persist changes:
+    asdict["tool"] = tool_table
+    tool_table["setuptools"] = setuptools_table
+
+    with _ignore_errors(ignore_option_errors):
+        # Don't complain about unrelated errors (e.g. tools not using the "tool" table)
+        subset = {"project": project_table, "tool": {"setuptools": setuptools_table}}
+        validate(subset, filepath)
+
+    if expand:
+        root_dir = os.path.dirname(filepath)
+        return expand_configuration(asdict, root_dir, ignore_option_errors, dist)
+
+    return asdict
+
+
+def expand_configuration(
+    config: dict,
+    root_dir: StrPath | None = None,
+    ignore_option_errors: bool = False,
+    dist: Distribution | None = None,
+) -> dict:
+    """Given a configuration with unresolved fields (e.g. dynamic, cmdclass, ...)
+    find their final values.
+
+    :param dict config: Dict containing the configuration for the distribution
+    :param str root_dir: Top-level directory for the distribution/project
+        (the same directory where ``pyproject.toml`` is place)
+    :param bool ignore_option_errors: see :func:`read_configuration`
+    :param Distribution|None: Distribution object to which the configuration refers.
+        If not given a dummy object will be created and discarded after the
+        configuration is read. Used in the case a dynamic configuration
+        (e.g. ``attr`` or ``cmdclass``).
+
+    :rtype: dict
+    """
+    return _ConfigExpander(config, root_dir, ignore_option_errors, dist).expand()
+
+
+class _ConfigExpander:
+    def __init__(
+        self,
+        config: dict,
+        root_dir: StrPath | None = None,
+        ignore_option_errors: bool = False,
+        dist: Distribution | None = None,
+    ):
+        self.config = config
+        self.root_dir = root_dir or os.getcwd()
+        self.project_cfg = config.get("project", {})
+        self.dynamic = self.project_cfg.get("dynamic", [])
+        self.setuptools_cfg = config.get("tool", {}).get("setuptools", {})
+        self.dynamic_cfg = self.setuptools_cfg.get("dynamic", {})
+        self.ignore_option_errors = ignore_option_errors
+        self._dist = dist
+        self._referenced_files: set[str] = set()
+
+    def _ensure_dist(self) -> Distribution:
+        from setuptools.dist import Distribution
+
+        attrs = {"src_root": self.root_dir, "name": self.project_cfg.get("name", None)}
+        return self._dist or Distribution(attrs)
+
+    def _process_field(self, container: dict, field: str, fn: Callable):
+        if field in container:
+            with _ignore_errors(self.ignore_option_errors):
+                container[field] = fn(container[field])
+
+    def _canonic_package_data(self, field="package-data"):
+        package_data = self.setuptools_cfg.get(field, {})
+        return _expand.canonic_package_data(package_data)
+
+    def expand(self):
+        self._expand_packages()
+        self._canonic_package_data()
+        self._canonic_package_data("exclude-package-data")
+
+        # A distribution object is required for discovering the correct package_dir
+        dist = self._ensure_dist()
+        ctx = _EnsurePackagesDiscovered(dist, self.project_cfg, self.setuptools_cfg)
+        with ctx as ensure_discovered:
+            package_dir = ensure_discovered.package_dir
+            self._expand_data_files()
+            self._expand_cmdclass(package_dir)
+            self._expand_all_dynamic(dist, package_dir)
+
+        dist._referenced_files.update(self._referenced_files)
+        return self.config
+
+    def _expand_packages(self):
+        packages = self.setuptools_cfg.get("packages")
+        if packages is None or isinstance(packages, (list, tuple)):
+            return
+
+        find = packages.get("find")
+        if isinstance(find, dict):
+            find["root_dir"] = self.root_dir
+            find["fill_package_dir"] = self.setuptools_cfg.setdefault("package-dir", {})
+            with _ignore_errors(self.ignore_option_errors):
+                self.setuptools_cfg["packages"] = _expand.find_packages(**find)
+
+    def _expand_data_files(self):
+        data_files = partial(_expand.canonic_data_files, root_dir=self.root_dir)
+        self._process_field(self.setuptools_cfg, "data-files", data_files)
+
+    def _expand_cmdclass(self, package_dir: Mapping[str, str]):
+        root_dir = self.root_dir
+        cmdclass = partial(_expand.cmdclass, package_dir=package_dir, root_dir=root_dir)
+        self._process_field(self.setuptools_cfg, "cmdclass", cmdclass)
+
+    def _expand_all_dynamic(self, dist: Distribution, package_dir: Mapping[str, str]):
+        special = (  # need special handling
+            "version",
+            "readme",
+            "entry-points",
+            "scripts",
+            "gui-scripts",
+            "classifiers",
+            "dependencies",
+            "optional-dependencies",
+        )
+        # `_obtain` functions are assumed to raise appropriate exceptions/warnings.
+        obtained_dynamic = {
+            field: self._obtain(dist, field, package_dir)
+            for field in self.dynamic
+            if field not in special
+        }
+        obtained_dynamic.update(
+            self._obtain_entry_points(dist, package_dir) or {},
+            version=self._obtain_version(dist, package_dir),
+            readme=self._obtain_readme(dist),
+            classifiers=self._obtain_classifiers(dist),
+            dependencies=self._obtain_dependencies(dist),
+            optional_dependencies=self._obtain_optional_dependencies(dist),
+        )
+        # `None` indicates there is nothing in `tool.setuptools.dynamic` but the value
+        # might have already been set by setup.py/extensions, so avoid overwriting.
+        updates = {k: v for k, v in obtained_dynamic.items() if v is not None}
+        self.project_cfg.update(updates)
+
+    def _ensure_previously_set(self, dist: Distribution, field: str):
+        previous = _PREVIOUSLY_DEFINED[field](dist)
+        if previous is None and not self.ignore_option_errors:
+            msg = (
+                f"No configuration found for dynamic {field!r}.\n"
+                "Some dynamic fields need to be specified via `tool.setuptools.dynamic`"
+                "\nothers must be specified via the equivalent attribute in `setup.py`."
+            )
+            raise InvalidConfigError(msg)
+
+    def _expand_directive(
+        self, specifier: str, directive, package_dir: Mapping[str, str]
+    ):
+        from setuptools.extern.more_itertools import always_iterable
+
+        with _ignore_errors(self.ignore_option_errors):
+            root_dir = self.root_dir
+            if "file" in directive:
+                self._referenced_files.update(always_iterable(directive["file"]))
+                return _expand.read_files(directive["file"], root_dir)
+            if "attr" in directive:
+                return _expand.read_attr(directive["attr"], package_dir, root_dir)
+            raise ValueError(f"invalid `{specifier}`: {directive!r}")
+        return None
+
+    def _obtain(self, dist: Distribution, field: str, package_dir: Mapping[str, str]):
+        if field in self.dynamic_cfg:
+            return self._expand_directive(
+                f"tool.setuptools.dynamic.{field}",
+                self.dynamic_cfg[field],
+                package_dir,
+            )
+        self._ensure_previously_set(dist, field)
+        return None
+
+    def _obtain_version(self, dist: Distribution, package_dir: Mapping[str, str]):
+        # Since plugins can set version, let's silently skip if it cannot be obtained
+        if "version" in self.dynamic and "version" in self.dynamic_cfg:
+            return _expand.version(self._obtain(dist, "version", package_dir))
+        return None
+
+    def _obtain_readme(self, dist: Distribution) -> dict[str, str] | None:
+        if "readme" not in self.dynamic:
+            return None
+
+        dynamic_cfg = self.dynamic_cfg
+        if "readme" in dynamic_cfg:
+            return {
+                "text": self._obtain(dist, "readme", {}),
+                "content-type": dynamic_cfg["readme"].get("content-type", "text/x-rst"),
+            }
+
+        self._ensure_previously_set(dist, "readme")
+        return None
+
+    def _obtain_entry_points(
+        self, dist: Distribution, package_dir: Mapping[str, str]
+    ) -> dict[str, dict] | None:
+        fields = ("entry-points", "scripts", "gui-scripts")
+        if not any(field in self.dynamic for field in fields):
+            return None
+
+        text = self._obtain(dist, "entry-points", package_dir)
+        if text is None:
+            return None
+
+        groups = _expand.entry_points(text)
+        expanded = {"entry-points": groups}
+
+        def _set_scripts(field: str, group: str):
+            if group in groups:
+                value = groups.pop(group)
+                if field not in self.dynamic:
+                    raise InvalidConfigError(_MissingDynamic.details(field, value))
+                expanded[field] = value
+
+        _set_scripts("scripts", "console_scripts")
+        _set_scripts("gui-scripts", "gui_scripts")
+
+        return expanded
+
+    def _obtain_classifiers(self, dist: Distribution):
+        if "classifiers" in self.dynamic:
+            value = self._obtain(dist, "classifiers", {})
+            if value:
+                return value.splitlines()
+        return None
+
+    def _obtain_dependencies(self, dist: Distribution):
+        if "dependencies" in self.dynamic:
+            value = self._obtain(dist, "dependencies", {})
+            if value:
+                return _parse_requirements_list(value)
+        return None
+
+    def _obtain_optional_dependencies(self, dist: Distribution):
+        if "optional-dependencies" not in self.dynamic:
+            return None
+        if "optional-dependencies" in self.dynamic_cfg:
+            optional_dependencies_map = self.dynamic_cfg["optional-dependencies"]
+            assert isinstance(optional_dependencies_map, dict)
+            return {
+                group: _parse_requirements_list(
+                    self._expand_directive(
+                        f"tool.setuptools.dynamic.optional-dependencies.{group}",
+                        directive,
+                        {},
+                    )
+                )
+                for group, directive in optional_dependencies_map.items()
+            }
+        self._ensure_previously_set(dist, "optional-dependencies")
+        return None
+
+
+def _parse_requirements_list(value):
+    return [
+        line
+        for line in value.splitlines()
+        if line.strip() and not line.strip().startswith("#")
+    ]
+
+
+@contextmanager
+def _ignore_errors(ignore_option_errors: bool):
+    if not ignore_option_errors:
+        yield
+        return
+
+    try:
+        yield
+    except Exception as ex:
+        _logger.debug(f"ignored error: {ex.__class__.__name__} - {ex}")
+
+
+class _EnsurePackagesDiscovered(_expand.EnsurePackagesDiscovered):
+    def __init__(
+        self, distribution: Distribution, project_cfg: dict, setuptools_cfg: dict
+    ):
+        super().__init__(distribution)
+        self._project_cfg = project_cfg
+        self._setuptools_cfg = setuptools_cfg
+
+    def __enter__(self) -> Self:
+        """When entering the context, the values of ``packages``, ``py_modules`` and
+        ``package_dir`` that are missing in ``dist`` are copied from ``setuptools_cfg``.
+        """
+        dist, cfg = self._dist, self._setuptools_cfg
+        package_dir: dict[str, str] = cfg.setdefault("package-dir", {})
+        package_dir.update(dist.package_dir or {})
+        dist.package_dir = package_dir  # needs to be the same object
+
+        dist.set_defaults._ignore_ext_modules()  # pyproject.toml-specific behaviour
+
+        # Set `name`, `py_modules` and `packages` in dist to short-circuit
+        # auto-discovery, but avoid overwriting empty lists purposefully set by users.
+        if dist.metadata.name is None:
+            dist.metadata.name = self._project_cfg.get("name")
+        if dist.py_modules is None:
+            dist.py_modules = cfg.get("py-modules")
+        if dist.packages is None:
+            dist.packages = cfg.get("packages")
+
+        return super().__enter__()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """When exiting the context, if values of ``packages``, ``py_modules`` and
+        ``package_dir`` are missing in ``setuptools_cfg``, copy from ``dist``.
+        """
+        # If anything was discovered set them back, so they count in the final config.
+        self._setuptools_cfg.setdefault("packages", self._dist.packages)
+        self._setuptools_cfg.setdefault("py-modules", self._dist.py_modules)
+        return super().__exit__(exc_type, exc_value, traceback)
+
+
+class _ExperimentalConfiguration(SetuptoolsWarning):
+    _SUMMARY = (
+        "`{subject}` in `pyproject.toml` is still *experimental* "
+        "and likely to change in future releases."
+    )
+
+
+class _ToolsTypoInMetadata(SetuptoolsWarning):
+    _SUMMARY = (
+        "Ignoring [tools.setuptools] in pyproject.toml, did you mean [tool.setuptools]?"
+    )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/setupcfg.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/setupcfg.py
new file mode 100644
index 0000000000000000000000000000000000000000..80ebe3d9bd53e337d013d1012670f9aad458a473
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/config/setupcfg.py
@@ -0,0 +1,775 @@
+"""
+Load setuptools configuration from ``setup.cfg`` files.
+
+**API will be made private in the future**
+
+To read project metadata, consider using
+``build.util.project_wheel_metadata`` (https://pypi.org/project/build/).
+For simple scenarios, you can also try parsing the file directly
+with the help of ``configparser``.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import functools
+import os
+from collections import defaultdict
+from functools import partial
+from functools import wraps
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Any,
+    Dict,
+    Generic,
+    Iterable,
+    Tuple,
+    TypeVar,
+    Union,
+)
+
+from .._path import StrPath
+from ..errors import FileError, OptionError
+from ..extern.packaging.markers import default_environment as marker_env
+from ..extern.packaging.requirements import InvalidRequirement, Requirement
+from ..extern.packaging.specifiers import SpecifierSet
+from ..extern.packaging.version import InvalidVersion, Version
+from ..warnings import SetuptoolsDeprecationWarning
+from . import expand
+
+if TYPE_CHECKING:
+    from distutils.dist import DistributionMetadata
+
+    from setuptools.dist import Distribution
+
+SingleCommandOptions = Dict["str", Tuple["str", Any]]
+"""Dict that associate the name of the options of a particular command to a
+tuple. The first element of the tuple indicates the origin of the option value
+(e.g. the name of the configuration file where it was read from),
+while the second element of the tuple is the option value itself
+"""
+AllCommandOptions = Dict["str", SingleCommandOptions]  # cmd name => its options
+Target = TypeVar("Target", bound=Union["Distribution", "DistributionMetadata"])
+
+
+def read_configuration(
+    filepath: StrPath, find_others=False, ignore_option_errors=False
+) -> dict:
+    """Read given configuration file and returns options from it as a dict.
+
+    :param str|unicode filepath: Path to configuration file
+        to get options from.
+
+    :param bool find_others: Whether to search for other configuration files
+        which could be on in various places.
+
+    :param bool ignore_option_errors: Whether to silently ignore
+        options, values of which could not be resolved (e.g. due to exceptions
+        in directives such as file:, attr:, etc.).
+        If False exceptions are propagated as expected.
+
+    :rtype: dict
+    """
+    from setuptools.dist import Distribution
+
+    dist = Distribution()
+    filenames = dist.find_config_files() if find_others else []
+    handlers = _apply(dist, filepath, filenames, ignore_option_errors)
+    return configuration_to_dict(handlers)
+
+
+def apply_configuration(dist: Distribution, filepath: StrPath) -> Distribution:
+    """Apply the configuration from a ``setup.cfg`` file into an existing
+    distribution object.
+    """
+    _apply(dist, filepath)
+    dist._finalize_requires()
+    return dist
+
+
+def _apply(
+    dist: Distribution,
+    filepath: StrPath,
+    other_files: Iterable[StrPath] = (),
+    ignore_option_errors: bool = False,
+) -> tuple[ConfigHandler, ...]:
+    """Read configuration from ``filepath`` and applies to the ``dist`` object."""
+    from setuptools.dist import _Distribution
+
+    filepath = os.path.abspath(filepath)
+
+    if not os.path.isfile(filepath):
+        raise FileError(f'Configuration file {filepath} does not exist.')
+
+    current_directory = os.getcwd()
+    os.chdir(os.path.dirname(filepath))
+    filenames = [*other_files, filepath]
+
+    try:
+        _Distribution.parse_config_files(dist, filenames=filenames)  # type: ignore[arg-type] # TODO: fix in distutils stubs
+        handlers = parse_configuration(
+            dist, dist.command_options, ignore_option_errors=ignore_option_errors
+        )
+        dist._finalize_license_files()
+    finally:
+        os.chdir(current_directory)
+
+    return handlers
+
+
+def _get_option(target_obj: Target, key: str):
+    """
+    Given a target object and option key, get that option from
+    the target object, either through a get_{key} method or
+    from an attribute directly.
+    """
+    getter_name = f'get_{key}'
+    by_attribute = functools.partial(getattr, target_obj, key)
+    getter = getattr(target_obj, getter_name, by_attribute)
+    return getter()
+
+
+def configuration_to_dict(handlers: tuple[ConfigHandler, ...]) -> dict:
+    """Returns configuration data gathered by given handlers as a dict.
+
+    :param list[ConfigHandler] handlers: Handlers list,
+        usually from parse_configuration()
+
+    :rtype: dict
+    """
+    config_dict: dict = defaultdict(dict)
+
+    for handler in handlers:
+        for option in handler.set_options:
+            value = _get_option(handler.target_obj, option)
+            config_dict[handler.section_prefix][option] = value
+
+    return config_dict
+
+
+def parse_configuration(
+    distribution: Distribution,
+    command_options: AllCommandOptions,
+    ignore_option_errors=False,
+) -> tuple[ConfigMetadataHandler, ConfigOptionsHandler]:
+    """Performs additional parsing of configuration options
+    for a distribution.
+
+    Returns a list of used option handlers.
+
+    :param Distribution distribution:
+    :param dict command_options:
+    :param bool ignore_option_errors: Whether to silently ignore
+        options, values of which could not be resolved (e.g. due to exceptions
+        in directives such as file:, attr:, etc.).
+        If False exceptions are propagated as expected.
+    :rtype: list
+    """
+    with expand.EnsurePackagesDiscovered(distribution) as ensure_discovered:
+        options = ConfigOptionsHandler(
+            distribution,
+            command_options,
+            ignore_option_errors,
+            ensure_discovered,
+        )
+
+        options.parse()
+        if not distribution.package_dir:
+            distribution.package_dir = options.package_dir  # Filled by `find_packages`
+
+        meta = ConfigMetadataHandler(
+            distribution.metadata,
+            command_options,
+            ignore_option_errors,
+            ensure_discovered,
+            distribution.package_dir,
+            distribution.src_root,
+        )
+        meta.parse()
+        distribution._referenced_files.update(
+            options._referenced_files, meta._referenced_files
+        )
+
+    return meta, options
+
+
+def _warn_accidental_env_marker_misconfig(label: str, orig_value: str, parsed: list):
+    """Because users sometimes misinterpret this configuration:
+
+    [options.extras_require]
+    foo = bar;python_version<"4"
+
+    It looks like one requirement with an environment marker
+    but because there is no newline, it's parsed as two requirements
+    with a semicolon as separator.
+
+    Therefore, if:
+        * input string does not contain a newline AND
+        * parsed result contains two requirements AND
+        * parsing of the two parts from the result (";")
+        leads in a valid Requirement with a valid marker
+    a UserWarning is shown to inform the user about the possible problem.
+    """
+    if "\n" in orig_value or len(parsed) != 2:
+        return
+
+    markers = marker_env().keys()
+
+    try:
+        req = Requirement(parsed[1])
+        if req.name in markers:
+            _AmbiguousMarker.emit(field=label, req=parsed[1])
+    except InvalidRequirement as ex:
+        if any(parsed[1].startswith(marker) for marker in markers):
+            msg = _AmbiguousMarker.message(field=label, req=parsed[1])
+            raise InvalidRequirement(msg) from ex
+
+
+class ConfigHandler(Generic[Target]):
+    """Handles metadata supplied in configuration files."""
+
+    section_prefix: str
+    """Prefix for config sections handled by this handler.
+    Must be provided by class heirs.
+
+    """
+
+    aliases: dict[str, str] = {}
+    """Options aliases.
+    For compatibility with various packages. E.g.: d2to1 and pbr.
+    Note: `-` in keys is replaced with `_` by config parser.
+
+    """
+
+    def __init__(
+        self,
+        target_obj: Target,
+        options: AllCommandOptions,
+        ignore_option_errors,
+        ensure_discovered: expand.EnsurePackagesDiscovered,
+    ):
+        self.ignore_option_errors = ignore_option_errors
+        self.target_obj = target_obj
+        self.sections = dict(self._section_options(options))
+        self.set_options: list[str] = []
+        self.ensure_discovered = ensure_discovered
+        self._referenced_files: set[str] = set()
+        """After parsing configurations, this property will enumerate
+        all files referenced by the "file:" directive. Private API for setuptools only.
+        """
+
+    @classmethod
+    def _section_options(cls, options: AllCommandOptions):
+        for full_name, value in options.items():
+            pre, sep, name = full_name.partition(cls.section_prefix)
+            if pre:
+                continue
+            yield name.lstrip('.'), value
+
+    @property
+    def parsers(self):
+        """Metadata item name to parser function mapping."""
+        raise NotImplementedError(
+            '%s must provide .parsers property' % self.__class__.__name__
+        )
+
+    def __setitem__(self, option_name, value):
+        target_obj = self.target_obj
+
+        # Translate alias into real name.
+        option_name = self.aliases.get(option_name, option_name)
+
+        try:
+            current_value = getattr(target_obj, option_name)
+        except AttributeError as e:
+            raise KeyError(option_name) from e
+
+        if current_value:
+            # Already inhabited. Skipping.
+            return
+
+        try:
+            parsed = self.parsers.get(option_name, lambda x: x)(value)
+        except (Exception,) * self.ignore_option_errors:
+            return
+
+        simple_setter = functools.partial(target_obj.__setattr__, option_name)
+        setter = getattr(target_obj, 'set_%s' % option_name, simple_setter)
+        setter(parsed)
+
+        self.set_options.append(option_name)
+
+    @classmethod
+    def _parse_list(cls, value, separator=','):
+        """Represents value as a list.
+
+        Value is split either by separator (defaults to comma) or by lines.
+
+        :param value:
+        :param separator: List items separator character.
+        :rtype: list
+        """
+        if isinstance(value, list):  # _get_parser_compound case
+            return value
+
+        if '\n' in value:
+            value = value.splitlines()
+        else:
+            value = value.split(separator)
+
+        return [chunk.strip() for chunk in value if chunk.strip()]
+
+    @classmethod
+    def _parse_dict(cls, value):
+        """Represents value as a dict.
+
+        :param value:
+        :rtype: dict
+        """
+        separator = '='
+        result = {}
+        for line in cls._parse_list(value):
+            key, sep, val = line.partition(separator)
+            if sep != separator:
+                raise OptionError(f"Unable to parse option value to dict: {value}")
+            result[key.strip()] = val.strip()
+
+        return result
+
+    @classmethod
+    def _parse_bool(cls, value):
+        """Represents value as boolean.
+
+        :param value:
+        :rtype: bool
+        """
+        value = value.lower()
+        return value in ('1', 'true', 'yes')
+
+    @classmethod
+    def _exclude_files_parser(cls, key):
+        """Returns a parser function to make sure field inputs
+        are not files.
+
+        Parses a value after getting the key so error messages are
+        more informative.
+
+        :param key:
+        :rtype: callable
+        """
+
+        def parser(value):
+            exclude_directive = 'file:'
+            if value.startswith(exclude_directive):
+                raise ValueError(
+                    'Only strings are accepted for the {0} field, '
+                    'files are not accepted'.format(key)
+                )
+            return value
+
+        return parser
+
+    def _parse_file(self, value, root_dir: StrPath):
+        """Represents value as a string, allowing including text
+        from nearest files using `file:` directive.
+
+        Directive is sandboxed and won't reach anything outside
+        directory with setup.py.
+
+        Examples:
+            file: README.rst, CHANGELOG.md, src/file.txt
+
+        :param str value:
+        :rtype: str
+        """
+        include_directive = 'file:'
+
+        if not isinstance(value, str):
+            return value
+
+        if not value.startswith(include_directive):
+            return value
+
+        spec = value[len(include_directive) :]
+        filepaths = [path.strip() for path in spec.split(',')]
+        self._referenced_files.update(filepaths)
+        return expand.read_files(filepaths, root_dir)
+
+    def _parse_attr(self, value, package_dir, root_dir: StrPath):
+        """Represents value as a module attribute.
+
+        Examples:
+            attr: package.attr
+            attr: package.module.attr
+
+        :param str value:
+        :rtype: str
+        """
+        attr_directive = 'attr:'
+        if not value.startswith(attr_directive):
+            return value
+
+        attr_desc = value.replace(attr_directive, '')
+
+        # Make sure package_dir is populated correctly, so `attr:` directives can work
+        package_dir.update(self.ensure_discovered.package_dir)
+        return expand.read_attr(attr_desc, package_dir, root_dir)
+
+    @classmethod
+    def _get_parser_compound(cls, *parse_methods):
+        """Returns parser function to represents value as a list.
+
+        Parses a value applying given methods one after another.
+
+        :param parse_methods:
+        :rtype: callable
+        """
+
+        def parse(value):
+            parsed = value
+
+            for method in parse_methods:
+                parsed = method(parsed)
+
+            return parsed
+
+        return parse
+
+    @classmethod
+    def _parse_section_to_dict_with_key(cls, section_options, values_parser):
+        """Parses section options into a dictionary.
+
+        Applies a given parser to each option in a section.
+
+        :param dict section_options:
+        :param callable values_parser: function with 2 args corresponding to key, value
+        :rtype: dict
+        """
+        value = {}
+        for key, (_, val) in section_options.items():
+            value[key] = values_parser(key, val)
+        return value
+
+    @classmethod
+    def _parse_section_to_dict(cls, section_options, values_parser=None):
+        """Parses section options into a dictionary.
+
+        Optionally applies a given parser to each value.
+
+        :param dict section_options:
+        :param callable values_parser: function with 1 arg corresponding to option value
+        :rtype: dict
+        """
+        parser = (lambda _, v: values_parser(v)) if values_parser else (lambda _, v: v)
+        return cls._parse_section_to_dict_with_key(section_options, parser)
+
+    def parse_section(self, section_options):
+        """Parses configuration file section.
+
+        :param dict section_options:
+        """
+        for name, (_, value) in section_options.items():
+            with contextlib.suppress(KeyError):
+                # Keep silent for a new option may appear anytime.
+                self[name] = value
+
+    def parse(self) -> None:
+        """Parses configuration file items from one
+        or more related sections.
+
+        """
+        for section_name, section_options in self.sections.items():
+            method_postfix = ''
+            if section_name:  # [section.option] variant
+                method_postfix = '_%s' % section_name
+
+            section_parser_method: Callable | None = getattr(
+                self,
+                # Dots in section names are translated into dunderscores.
+                ('parse_section%s' % method_postfix).replace('.', '__'),
+                None,
+            )
+
+            if section_parser_method is None:
+                raise OptionError(
+                    "Unsupported distribution option section: "
+                    f"[{self.section_prefix}.{section_name}]"
+                )
+
+            section_parser_method(section_options)
+
+    def _deprecated_config_handler(self, func, msg, **kw):
+        """this function will wrap around parameters that are deprecated
+
+        :param msg: deprecation message
+        :param func: function to be wrapped around
+        """
+
+        @wraps(func)
+        def config_handler(*args, **kwargs):
+            kw.setdefault("stacklevel", 2)
+            _DeprecatedConfig.emit("Deprecated config in `setup.cfg`", msg, **kw)
+            return func(*args, **kwargs)
+
+        return config_handler
+
+
+class ConfigMetadataHandler(ConfigHandler["DistributionMetadata"]):
+    section_prefix = 'metadata'
+
+    aliases = {
+        'home_page': 'url',
+        'summary': 'description',
+        'classifier': 'classifiers',
+        'platform': 'platforms',
+    }
+
+    strict_mode = False
+    """We need to keep it loose, to be partially compatible with
+    `pbr` and `d2to1` packages which also uses `metadata` section.
+
+    """
+
+    def __init__(
+        self,
+        target_obj: DistributionMetadata,
+        options: AllCommandOptions,
+        ignore_option_errors: bool,
+        ensure_discovered: expand.EnsurePackagesDiscovered,
+        package_dir: dict | None = None,
+        root_dir: StrPath = os.curdir,
+    ):
+        super().__init__(target_obj, options, ignore_option_errors, ensure_discovered)
+        self.package_dir = package_dir
+        self.root_dir = root_dir
+
+    @property
+    def parsers(self):
+        """Metadata item name to parser function mapping."""
+        parse_list = self._parse_list
+        parse_file = partial(self._parse_file, root_dir=self.root_dir)
+        parse_dict = self._parse_dict
+        exclude_files_parser = self._exclude_files_parser
+
+        return {
+            'platforms': parse_list,
+            'keywords': parse_list,
+            'provides': parse_list,
+            'obsoletes': parse_list,
+            'classifiers': self._get_parser_compound(parse_file, parse_list),
+            'license': exclude_files_parser('license'),
+            'license_files': parse_list,
+            'description': parse_file,
+            'long_description': parse_file,
+            'version': self._parse_version,
+            'project_urls': parse_dict,
+        }
+
+    def _parse_version(self, value):
+        """Parses `version` option value.
+
+        :param value:
+        :rtype: str
+
+        """
+        version = self._parse_file(value, self.root_dir)
+
+        if version != value:
+            version = version.strip()
+            # Be strict about versions loaded from file because it's easy to
+            # accidentally include newlines and other unintended content
+            try:
+                Version(version)
+            except InvalidVersion as e:
+                raise OptionError(
+                    f'Version loaded from {value} does not '
+                    f'comply with PEP 440: {version}'
+                ) from e
+
+            return version
+
+        return expand.version(self._parse_attr(value, self.package_dir, self.root_dir))
+
+
+class ConfigOptionsHandler(ConfigHandler["Distribution"]):
+    section_prefix = 'options'
+
+    def __init__(
+        self,
+        target_obj: Distribution,
+        options: AllCommandOptions,
+        ignore_option_errors: bool,
+        ensure_discovered: expand.EnsurePackagesDiscovered,
+    ):
+        super().__init__(target_obj, options, ignore_option_errors, ensure_discovered)
+        self.root_dir = target_obj.src_root
+        self.package_dir: dict[str, str] = {}  # To be filled by `find_packages`
+
+    @classmethod
+    def _parse_list_semicolon(cls, value):
+        return cls._parse_list(value, separator=';')
+
+    def _parse_file_in_root(self, value):
+        return self._parse_file(value, root_dir=self.root_dir)
+
+    def _parse_requirements_list(self, label: str, value: str):
+        # Parse a requirements list, either by reading in a `file:`, or a list.
+        parsed = self._parse_list_semicolon(self._parse_file_in_root(value))
+        _warn_accidental_env_marker_misconfig(label, value, parsed)
+        # Filter it to only include lines that are not comments. `parse_list`
+        # will have stripped each line and filtered out empties.
+        return [line for line in parsed if not line.startswith("#")]
+
+    @property
+    def parsers(self):
+        """Metadata item name to parser function mapping."""
+        parse_list = self._parse_list
+        parse_bool = self._parse_bool
+        parse_dict = self._parse_dict
+        parse_cmdclass = self._parse_cmdclass
+
+        return {
+            'zip_safe': parse_bool,
+            'include_package_data': parse_bool,
+            'package_dir': parse_dict,
+            'scripts': parse_list,
+            'eager_resources': parse_list,
+            'dependency_links': parse_list,
+            'namespace_packages': self._deprecated_config_handler(
+                parse_list,
+                "The namespace_packages parameter is deprecated, "
+                "consider using implicit namespaces instead (PEP 420).",
+                # TODO: define due date, see setuptools.dist:check_nsp.
+            ),
+            'install_requires': partial(
+                self._parse_requirements_list, "install_requires"
+            ),
+            'setup_requires': self._parse_list_semicolon,
+            'tests_require': self._parse_list_semicolon,
+            'packages': self._parse_packages,
+            'entry_points': self._parse_file_in_root,
+            'py_modules': parse_list,
+            'python_requires': SpecifierSet,
+            'cmdclass': parse_cmdclass,
+        }
+
+    def _parse_cmdclass(self, value):
+        package_dir = self.ensure_discovered.package_dir
+        return expand.cmdclass(self._parse_dict(value), package_dir, self.root_dir)
+
+    def _parse_packages(self, value):
+        """Parses `packages` option value.
+
+        :param value:
+        :rtype: list
+        """
+        find_directives = ['find:', 'find_namespace:']
+        trimmed_value = value.strip()
+
+        if trimmed_value not in find_directives:
+            return self._parse_list(value)
+
+        # Read function arguments from a dedicated section.
+        find_kwargs = self.parse_section_packages__find(
+            self.sections.get('packages.find', {})
+        )
+
+        find_kwargs.update(
+            namespaces=(trimmed_value == find_directives[1]),
+            root_dir=self.root_dir,
+            fill_package_dir=self.package_dir,
+        )
+
+        return expand.find_packages(**find_kwargs)
+
+    def parse_section_packages__find(self, section_options):
+        """Parses `packages.find` configuration file section.
+
+        To be used in conjunction with _parse_packages().
+
+        :param dict section_options:
+        """
+        section_data = self._parse_section_to_dict(section_options, self._parse_list)
+
+        valid_keys = ['where', 'include', 'exclude']
+
+        find_kwargs = dict([
+            (k, v) for k, v in section_data.items() if k in valid_keys and v
+        ])
+
+        where = find_kwargs.get('where')
+        if where is not None:
+            find_kwargs['where'] = where[0]  # cast list to single val
+
+        return find_kwargs
+
+    def parse_section_entry_points(self, section_options):
+        """Parses `entry_points` configuration file section.
+
+        :param dict section_options:
+        """
+        parsed = self._parse_section_to_dict(section_options, self._parse_list)
+        self['entry_points'] = parsed
+
+    def _parse_package_data(self, section_options):
+        package_data = self._parse_section_to_dict(section_options, self._parse_list)
+        return expand.canonic_package_data(package_data)
+
+    def parse_section_package_data(self, section_options):
+        """Parses `package_data` configuration file section.
+
+        :param dict section_options:
+        """
+        self['package_data'] = self._parse_package_data(section_options)
+
+    def parse_section_exclude_package_data(self, section_options):
+        """Parses `exclude_package_data` configuration file section.
+
+        :param dict section_options:
+        """
+        self['exclude_package_data'] = self._parse_package_data(section_options)
+
+    def parse_section_extras_require(self, section_options):
+        """Parses `extras_require` configuration file section.
+
+        :param dict section_options:
+        """
+        parsed = self._parse_section_to_dict_with_key(
+            section_options,
+            lambda k, v: self._parse_requirements_list(f"extras_require[{k}]", v),
+        )
+
+        self['extras_require'] = parsed
+
+    def parse_section_data_files(self, section_options):
+        """Parses `data_files` configuration file section.
+
+        :param dict section_options:
+        """
+        parsed = self._parse_section_to_dict(section_options, self._parse_list)
+        self['data_files'] = expand.canonic_data_files(parsed, self.root_dir)
+
+
+class _AmbiguousMarker(SetuptoolsDeprecationWarning):
+    _SUMMARY = "Ambiguous requirement marker."
+    _DETAILS = """
+    One of the parsed requirements in `{field}` looks like a valid environment marker:
+
+        {req!r}
+
+    Please make sure that the configuration file is correct.
+    You can use dangling lines to avoid this problem.
+    """
+    _SEE_DOCS = "userguide/declarative_config.html#opt-2"
+    # TODO: should we include due_date here? Initially introduced in 6 Aug 2022.
+    # Does this make sense with latest version of packaging?
+
+    @classmethod
+    def message(cls, **kw):
+        docs = f"https://setuptools.pypa.io/en/latest/{cls._SEE_DOCS}"
+        return cls._format(cls._SUMMARY, cls._DETAILS, see_url=docs, format_args=kw)
+
+
+class _DeprecatedConfig(SetuptoolsDeprecationWarning):
+    _SEE_DOCS = "userguide/declarative_config.html"
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/extern/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/extern/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9b6eea70db876b6814c0eb585da57bf41ee57ca
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/extern/__init__.py
@@ -0,0 +1,92 @@
+import importlib.util
+import sys
+
+
+class VendorImporter:
+    """
+    A PEP 302 meta path importer for finding optionally-vendored
+    or otherwise naturally-installed packages from root_name.
+    """
+
+    def __init__(self, root_name, vendored_names=(), vendor_pkg=None):
+        self.root_name = root_name
+        self.vendored_names = set(vendored_names)
+        self.vendor_pkg = vendor_pkg or root_name.replace('extern', '_vendor')
+
+    @property
+    def search_path(self):
+        """
+        Search first the vendor package then as a natural package.
+        """
+        yield self.vendor_pkg + '.'
+        yield ''
+
+    def _module_matches_namespace(self, fullname):
+        """Figure out if the target module is vendored."""
+        root, base, target = fullname.partition(self.root_name + '.')
+        return not root and any(map(target.startswith, self.vendored_names))
+
+    def load_module(self, fullname):
+        """
+        Iterate over the search path to locate and load fullname.
+        """
+        root, base, target = fullname.partition(self.root_name + '.')
+        for prefix in self.search_path:
+            extant = prefix + target
+            try:
+                __import__(extant)
+            except ImportError:
+                continue
+            mod = sys.modules[extant]
+            sys.modules[fullname] = mod
+            return mod
+        else:
+            raise ImportError(
+                "The '{target}' package is required; "
+                "normally this is bundled with this package so if you get "
+                "this warning, consult the packager of your "
+                "distribution.".format(**locals())
+            )
+
+    def create_module(self, spec):
+        return self.load_module(spec.name)
+
+    def exec_module(self, module):
+        pass
+
+    def find_spec(self, fullname, path=None, target=None):
+        """Return a module spec for vendored names."""
+        return (
+            importlib.util.spec_from_loader(fullname, self)
+            if self._module_matches_namespace(fullname)
+            else None
+        )
+
+    def install(self):
+        """
+        Install this importer into sys.meta_path if not already present.
+        """
+        if self not in sys.meta_path:
+            sys.meta_path.append(self)
+
+
+# [[[cog
+# import cog
+# from tools.vendored import yield_top_level
+# names = "\n".join(f"    {x!r}," for x in yield_top_level('setuptools'))
+# cog.outl(f"names = (\n{names}\n)")
+# ]]]
+names = (
+    'backports',
+    'importlib_metadata',
+    'importlib_resources',
+    'jaraco',
+    'more_itertools',
+    'ordered_set',
+    'packaging',
+    'tomli',
+    'wheel',
+    'zipp',
+)
+# [[[end]]]
+VendorImporter(__name__, names, 'setuptools._vendor').install()
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/extern/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/extern/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..17721a8eb1e0bd7d5e0e6802dd2e84a2e182cf17
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/setuptools/extern/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..92eb4a9f16dcdb59fa6be3aefbaa4c47206ba462
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4bfc36d5befee0dd43ee880b0f4bf5159328b5b2
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81628691c9156855944d1611783429c396687b97
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/_symmetric_memory/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/_symmetric_memory/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a76f8d1c72ed682c7ae991beafa8aabc94504d2e
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/_symmetric_memory/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/_symmetric_memory/__pycache__/_nvshmem_triton.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/_symmetric_memory/__pycache__/_nvshmem_triton.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b0bad9aa22fbf2347319f116d6cd84773d6152ac
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/_symmetric_memory/__pycache__/_nvshmem_triton.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b8bb60342d83fabaec5086b810511d0d3ab77089
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_api.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_api.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f64d59d6cdea001c99fec0db9ca2b78dad6a34b
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_api.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_argmin_argmax.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_argmin_argmax.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5086eea1c3f20bfd951056ac635f7cd55ccf2884
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_argmin_argmax.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_collective_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_collective_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e475dc13c8fda84a27eafa34eb6afecb237a72eb
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_collective_utils.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_dispatch.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_dispatch.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e60a11b0d23516a7317690921f3ad354be05742
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_dispatch.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_dtensor_spec.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_dtensor_spec.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..810816b718ab5f73157f46781ed5e76a365e2f4a
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_dtensor_spec.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_op_schema.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_op_schema.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..019abea359f60ac66632875b7f24eb78eb0c43a4
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_op_schema.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_random.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_random.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ca44994018d3cf4e076330110c233f75e333ecfc
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_random.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_redistribute.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_redistribute.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..de4152ff7a493672ae379cf234400d84bec5d824
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_redistribute.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_sharding_prop.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_sharding_prop.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6eb09bd3c5593676aeb35cfc7ae77a57dbc15ca1
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_sharding_prop.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_shards_wrapper.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_shards_wrapper.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16b518d0ddd502fbf0760381f04d2bac455137ad
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_shards_wrapper.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_tp_conv.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_tp_conv.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b34b28a614ae65a25061bf8cac31580165a8696
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_tp_conv.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08a45de7714a0120c4320ccfb23f54704c761b5f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/_utils.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/device_mesh.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/device_mesh.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..623c8d7685e3236d877578cbe56cc7dd826911f6
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/device_mesh.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/placement_types.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/placement_types.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3ad741f531f78ba3ec942875d5b627935ba67327
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/__pycache__/placement_types.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..85313673e0f3e23390c30e0fcdb23e4a0317338d
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_common_rules.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_common_rules.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a650d461ed84ebe023001408596f1827cebdfb1
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_common_rules.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_conv_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_conv_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4c1bfd4b4659af70eeb44d78d023de52bbcf4c56
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_conv_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_einsum_strategy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_einsum_strategy.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9008622b61da339e1a4d4ea13bfaebe083de6ef
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_einsum_strategy.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_embedding_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_embedding_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2b0ff9ece9a4300160ac8f5a86aca561006564ba
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_embedding_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_mask_buffer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_mask_buffer.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6c5f7c98aabc10d35dcc90927f2f9df5ad90e560
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_mask_buffer.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_math_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_math_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf85aab773427cc6685d1630ce747fa53941ce8f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_math_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_matrix_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_matrix_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9ad31d1fa853c96f5df6f37e3a9ec01adbb85a26
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_matrix_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_pointwise_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_pointwise_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..52661bbc244b86c1ace7d0c68e3a16208cb03bbc
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_pointwise_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_random_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_random_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b88e0a510a4211c4d4235f50fc2349def4a8a7cf
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_random_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_tensor_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_tensor_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3753611e6f20b51ddcfd958f6136d0f9bd34588e
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_tensor_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_view_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_view_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fea1423041a38b6f335b20b1c8cfe1dca811af3
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/_view_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/registration.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/registration.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..482eb3dced58b2e54b29d5e006209283e8db15a8
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/registration.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c768a61c7b4e1ca184b0eda0f5bda183e78bf885
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/__pycache__/utils.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_common_rules.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_common_rules.py
new file mode 100644
index 0000000000000000000000000000000000000000..2312f8e56c554b03e04a2d93fe45b899cf948916
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_common_rules.py
@@ -0,0 +1,285 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+import string
+from typing import cast
+
+import torch
+from torch.distributed.tensor._dtensor_spec import DTensorSpec, TensorMeta
+from torch.distributed.tensor._op_schema import OpSchema, OutputSharding
+from torch.distributed.tensor._ops.utils import prod
+from torch.distributed.tensor._utils import compute_local_shape_and_global_offset
+
+
+def _replace_char_in_str(string: str, new_char: str, idx: int) -> str:
+    return string[:idx] + new_char + string[idx + 1 :]
+
+
+def _gen_reshard_suggestions(
+    op_schema: OpSchema,
+    input_dims: list[str],
+    input_specs: tuple[DTensorSpec, ...],
+    dim_to_sharding: dict[str, int],
+    pending_sum: list[int],
+) -> OutputSharding:
+    suggested_arg_specs: list[DTensorSpec] = []
+    for input_dim, input_spec in zip(input_dims, input_specs):
+        dim_map = [dim_to_sharding[dim] for dim in input_dim]
+        suggested_arg_specs.append(
+            DTensorSpec.from_dim_map(
+                mesh=input_spec.mesh,
+                dim_map=dim_map,
+                sums=pending_sum,
+                tensor_meta=input_spec.tensor_meta,
+            )
+        )
+    suggested_schema = OpSchema(op_schema.op, tuple(suggested_arg_specs), {})
+    suggested_schema._inplace_rewrap_schema_suggestion(op_schema)
+    return OutputSharding(
+        None,
+        redistribute_schema=suggested_schema,
+    )
+
+
+def einop_rule(
+    equation: str,
+    op_schema: OpSchema,
+    *,
+    linearity: bool = False,
+    enforce_sharding: dict[str, int] | None = None,
+) -> OutputSharding:
+    """
+    Propagate the sharding of inputs to output for ops whose data moves according to einsum notation.
+
+    This is mostly borrowed from @zdevito's sharding simulator. Examples:
+        mk,kn->mn - einsum
+        ij,ij->ij - addition
+        ij,j->ij - broadcasted addition
+        ij->i - reduction
+    Other ops could use this propagation algorithm when applied, note
+    that einsum propagation only deal with list of specs (DTensor specs)
+    as it only works on list of tensors!
+
+    linearity in einop_rule means that the calling op `f` follows this rule:
+        f(a + b) = f(a) + f(b)
+
+    In this case we can propagate the partial sum, note that linearity in einop
+    only applies to partial sum, not other operations like min/max (which are
+    associative but not linear).
+    """
+    # parse einop equation and extract arg specs
+    inputs, outputs = equation.split("->")
+    input_dims, output_dims = inputs.split(","), outputs.split(",")
+    input_specs = op_schema.args_spec
+    # NOTE: only support single output unless needed in future
+    output_dim = output_dims[0]
+
+    dim_to_sharding: dict[str, int] = {}
+    dim_to_size: dict[str, int] = {}
+    # record pending sum, key is mesh dimension, value is pending sum
+    # counter across input specs
+    pending_sums_counter: dict[int, int] = {}
+    seen_shardings: dict[int, str] = {}
+    needs_reshard = False
+
+    def merge_sharding(dim: str, a: int, b: int) -> int:
+        # merge the sharding of inputs if it's able to merge, i.e. we can merge
+        # replicate and shard to shard, but this will trigger an reshard operation
+        if a != b:
+            if a == -1 or b == -1:
+                # reshard the replicate to match the sharded one
+                nonlocal needs_reshard
+                needs_reshard = True
+                return a if a != -1 else b
+            else:
+                # TODO: further merge the sharding properly (i.e. reshard one input to replicate)
+                raise RuntimeError(
+                    f"{equation}: dim {dim} sharded two different ways: {a} and {b}"
+                )
+        else:
+            return a
+
+    for input_dim, input_spec in zip(input_dims, input_specs):
+        # deal with partial sums
+        input_sums = input_spec.sums
+        for sum_dim in input_sums:
+            if sum_dim not in pending_sums_counter:
+                seen_shardings[sum_dim] = "+"
+            # update pending sum counter for pending sum mesh
+            # dimension with the occurrence from each input
+            pending_sums_counter[sum_dim] = pending_sums_counter.get(sum_dim, 0) + 1
+
+        for idx, (dim, mesh_dim) in enumerate(zip(input_dim, input_spec.dim_map)):
+            if enforce_sharding and dim in enforce_sharding:
+                if enforce_sharding[dim] != mesh_dim:
+                    needs_reshard = True
+                dim_to_sharding[dim] = enforce_sharding[dim]
+                dim_to_size[dim] = input_spec.shape[idx]
+            elif dim not in dim_to_sharding:
+                dim_to_sharding[dim] = mesh_dim
+                dim_to_size[dim] = input_spec.shape[idx]
+            else:
+                dim_to_sharding[dim] = merge_sharding(
+                    dim, dim_to_sharding[dim], mesh_dim
+                )
+                assert dim_to_size[dim] == input_spec.shape[idx]
+
+            # after merging sharding, we check if there're multiple
+            # sharding on the same mesh dim.
+            merged_sharding_for_dim = dim_to_sharding[dim]
+            if merged_sharding_for_dim != -1:
+                if (
+                    merged_sharding_for_dim in seen_shardings
+                    and dim != seen_shardings[merged_sharding_for_dim]
+                ):
+                    needs_reshard = True
+                    seen_shardings[merged_sharding_for_dim] += dim
+                else:
+                    seen_shardings[merged_sharding_for_dim] = dim
+
+    if pending_sums_counter and not linearity:
+        # return reshard suggestion with no pending sum, because we already properly
+        # merge the sharding, this reshard suggestion is legit to use
+        return _gen_reshard_suggestions(
+            op_schema, input_dims, input_specs, dim_to_sharding, []
+        )
+    else:
+        # It's a op that support linearity, but not all input arguments are partial
+        # we fail the sharding propagation with suggestion to make all inputs be
+        # partial on the corresponding mesh dim (all inputs should be partial for
+        # the mesh dims in order to execute locally and delay the sum reduction)
+        for value in pending_sums_counter.values():
+            if value != len(input_specs):
+                needs_reshard = True
+
+    for mesh_dim, dims in seen_shardings.items():
+        if len(dims) > 1:
+            # we found different input dims are being sharded on the same mesh dim
+            # in order to perform local op computation, we need to reshard inputs
+            # base on some simple heuristics, now we simply pick the one with least comm
+            # volume. (i.e. the input with least size)
+            # TODO: consider a more advanced heuristic to pick the best sharding
+            costs = []
+            for d in dims:
+                cost = 0
+                for input_dim, input_spec in zip(input_dims, input_specs):
+                    if (
+                        d in input_dim
+                        and input_spec.dim_map[input_dim.index(d)] == mesh_dim
+                    ):
+                        assert input_spec.tensor_meta is not None
+                        global_shape = input_spec.tensor_meta.shape
+                        local_shape, _ = compute_local_shape_and_global_offset(
+                            global_shape,
+                            input_spec.mesh,
+                            input_spec.placements,
+                            skip_offset=True,
+                        )
+                        cost += prod(local_shape) * input_spec.mesh.size(mesh_dim)
+                # pyrefly: ignore [bad-argument-type]
+                costs.append(cost)
+            d_to_keep_sharding = dims[costs.index(max(costs))]
+            for d in dims:
+                # update dim_to_sharding to keep the sharding of the dim with
+                # highest comm and make the rest of the dims to replicate
+                if d != d_to_keep_sharding:
+                    dim_to_sharding[d] = -1
+
+    pending_sums = list(pending_sums_counter.keys())
+    if needs_reshard:
+        return _gen_reshard_suggestions(
+            op_schema, input_dims, input_specs, dim_to_sharding, pending_sums
+        )
+
+    # generate output pending sum if a dim is sharded, and it appears in input
+    # but not output
+    for dim, shard_on_mesh in dim_to_sharding.items():
+        if dim not in output_dims[0] and shard_on_mesh != -1:
+            pending_sums.append(shard_on_mesh)
+
+    # if no need to reshard, we directly generate the output sharding
+    output_dim_map = []
+    output_shape = []
+    for dim in output_dim:
+        if dim == "1":
+            # find output dim that is a singleton dimension, mark sharding and shape
+            output_dim_map.append(-1)
+            output_shape.append(1)
+        else:
+            output_dim_map.append(dim_to_sharding[dim])
+            output_shape.append(dim_to_size[dim])
+
+    # XXX: since we still need to have intermediate shape calculation, we need
+    # to pass in the shape here. We should remove this once sharding decomp works
+    # for ops like addmm
+    assert input_specs[0].tensor_meta is not None
+    tensor_meta = TensorMeta(
+        torch.Size(output_shape),
+        input_specs[0].tensor_meta.stride,
+        input_specs[0].tensor_meta.dtype,
+    )
+    return OutputSharding(
+        DTensorSpec.from_dim_map(
+            input_specs[0].mesh,
+            output_dim_map,
+            pending_sums,
+            tensor_meta=tensor_meta,
+        )
+    )
+
+
+def pointwise_rule(op_schema: OpSchema, linearity: bool = False) -> OutputSharding:
+    """
+    Propagate the sharding for pointwise operations.
+
+    Examples:
+        ij,ij->ij - addition/mul
+        ij,j->ij - broadcasted addition
+    """
+    alphabet = string.ascii_lowercase
+    # find the max_dim first in case we need to broadcasting
+    input_specs = op_schema.args_spec
+    max_dim = max(input.ndim for input in input_specs)
+    dimchars = []
+    singleton_counter: list[int] = [0] * max_dim
+    for input in input_specs:
+        start_dim = max_dim - input.ndim
+        p = alphabet[start_dim:max_dim]
+        # handle the "broadcasting to a common shape case"
+        # see https://pytorch.org/docs/stable/notes/broadcasting.html
+        # If any of the dimensions is singleton dimension (i.e. 1).
+        # we mark the dim char as a special "1" to distinguish with
+        # the non-singleton dimension, so that sharding propagation
+        # should just ignore the singleton dimension.
+        if len(input_specs) > 1:
+            for i in range(max_dim):
+                if i < start_dim:
+                    # treat the leading miss dim chars as singleton
+                    singleton_counter[i] += 1
+                elif input.shape[i - start_dim] == 1:
+                    # mark singleton dim char as a special "1" in einop rule
+                    singleton_counter[i] += 1
+                    p = _replace_char_in_str(p, "1", (i - start_dim))
+
+        dimchars.append(p)
+    out_dimchars = alphabet[:max_dim]
+    # check if we replace the all inputs dim char with singleton dimension,
+    # if we replace all inputs, we also need to replace the output dimension.
+    for output_dim_idx in range(len(out_dimchars)):
+        if singleton_counter[output_dim_idx] == len(input_specs):
+            out_dimchars = _replace_char_in_str(out_dimchars, "1", output_dim_idx)
+
+    fmt = f"{','.join(p for p in dimchars)}->{out_dimchars}"
+
+    enforce_sharding: dict[str, int] = {}
+    if op_schema.is_inplace_op():
+        follow_spec = op_schema.args_spec[0]
+        enforce_sharding.update(zip(out_dimchars, follow_spec.dim_map))
+    elif op_schema.is_out_variant_op():
+        follow_spec = cast(DTensorSpec, op_schema.kwargs_schema["out"])
+        enforce_sharding.update(zip(out_dimchars, follow_spec.dim_map))
+
+    return einop_rule(
+        fmt,
+        op_schema,
+        linearity=linearity,
+        enforce_sharding=enforce_sharding,
+    )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_mask_buffer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_mask_buffer.py
new file mode 100644
index 0000000000000000000000000000000000000000..26b0a713db42c89043c01ae148a4dfd4e1b62d1a
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_mask_buffer.py
@@ -0,0 +1,43 @@
+# mypy: allow-untyped-defs
+# Copyright (c) Meta Platforms, Inc. and affiliates
+from dataclasses import dataclass
+
+import torch
+
+
+@dataclass
+class MaskBuffer:
+    data: torch.Tensor | None = None
+    # refcount allows shared usage of the MaskBuffer, as long as all users have the same data
+    refcount: int = 0
+
+    def materialize_mask(self, mask):
+        if self.refcount == 0:
+            self.data = mask
+        else:
+            assert self.data is not None
+            if not torch.equal(self.data, mask):
+                raise RuntimeError(
+                    "MaskBuffer has been materialized with conflicting data"
+                )
+        self.refcount += 1
+
+    def release_mask(self):
+        if self.refcount == 0 or self.data is None:
+            raise RuntimeError("MaskBuffer has not been materialized")
+        self.refcount -= 1
+        if self.refcount == 0:
+            self.data = None
+
+    def apply_mask(self, tensor):
+        if self.refcount == 0 or self.data is None:
+            raise RuntimeError("MaskBuffer has not been materialized")
+
+        # NOTE: MaskPartial is being used by the embedding op and the gather op.
+        # For gather, the mask has the same dimension as the output tensor, whereas
+        # the output of the embedding op has an additional dimension compare to the input,
+        # hence the output masking logic below having two different cases.
+        if tensor.ndim == self.data.ndim:
+            tensor[self.data] = 0.0
+        else:
+            tensor[self.data, :] = 0.0
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_pointwise_ops.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_pointwise_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..79030c0d4e28904af6b8d400d8cdd82872d315e3
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_pointwise_ops.py
@@ -0,0 +1,809 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+from collections.abc import Sequence
+from typing import cast
+
+import torch
+from torch.distributed.tensor._dtensor_spec import DTensorSpec
+from torch.distributed.tensor._op_schema import (
+    OpSchema,
+    OpSpec,
+    OpStrategy,
+    RuntimeSchemaInfo,
+    StrategyType,
+    TupleStrategy,
+)
+from torch.distributed.tensor._ops.registration import register_op_strategy
+from torch.distributed.tensor._ops.utils import (
+    generate_redistribute_costs,
+    infer_broadcast_dims_map,
+    map_placements_after_broadcast,
+    normalize_dim,
+)
+from torch.distributed.tensor.placement_types import (
+    _StridedShard,
+    Partial,
+    Placement,
+    Replicate,
+    Shard,
+)
+from torch.utils._typing_utils import not_none
+
+
+aten = torch.ops.aten
+# leave the remaining pointwise_ops list here for convenience,
+# Below ops are some pointwise ops that are yet to be supported,
+# they might not be a complete list.
+# pointwise_ops = [
+#     "fake_quantize_per_channel_affine",
+#     "fake_quantize_per_tensor_affine",
+#     "floor_divide",  # floor_divide is deprecated
+#     "frexp",  # multiple output pointwise op, need to add support
+#     "gradient",  #  need investigation on this op
+#     "imag",  # complex data type only
+#     "quantized_batch_norm",
+#     "quantized_max_pool1d",
+#     "quantized_max_pool2d",
+#     "real",  # complex data type only
+# ]
+
+
+pointwise_ops = [
+    # please keep the entries below alphabetically sorted
+    aten.__ilshift__.Scalar,
+    aten.__ilshift__.Tensor,
+    aten.__irshift__.Scalar,
+    aten.__irshift__.Tensor,
+    aten.__lshift__.Scalar,
+    aten.__lshift__.Tensor,
+    aten.__rshift__.Scalar,
+    aten.__rshift__.Tensor,
+    aten._conj.default,
+    aten.abs.default,
+    aten.abs.out,
+    aten.abs_.default,
+    aten.acos.default,
+    aten.acos.out,
+    aten.acos_.default,
+    aten.acosh.default,
+    aten.acosh.out,
+    aten.acosh_.default,
+    aten.add.Scalar,
+    aten.add.out,
+    aten.add_.Scalar,
+    aten.addcdiv.default,
+    aten.addcdiv.out,
+    aten.addcdiv_.default,
+    aten.addcmul.default,
+    aten.addcmul.out,
+    aten.addcmul_.default,
+    aten.angle.default,
+    aten.angle.out,
+    aten.asin.default,
+    aten.asin.out,
+    aten.asin_.default,
+    aten.asinh.default,
+    aten.asinh.out,
+    aten.asinh_.default,
+    aten.atan.default,
+    aten.atan.out,
+    aten.atan2.default,
+    aten.atan2.out,
+    aten.atan2_.default,
+    aten.atan_.default,
+    aten.atanh.default,
+    aten.atanh.out,
+    aten.atanh_.default,
+    aten.bitwise_and.Scalar,
+    aten.bitwise_and.Scalar_Tensor,
+    aten.bitwise_and.Scalar_out,
+    aten.bitwise_and.Tensor,
+    aten.bitwise_and.Tensor_out,
+    aten.bitwise_and_.Scalar,
+    aten.bitwise_and_.Tensor,
+    aten.bitwise_left_shift.Scalar_Tensor,
+    aten.bitwise_left_shift.Tensor,
+    aten.bitwise_left_shift.Tensor_Scalar,
+    aten.bitwise_left_shift.Tensor_Scalar_out,
+    aten.bitwise_left_shift.Tensor_out,
+    aten.bitwise_left_shift_.Tensor,
+    aten.bitwise_left_shift_.Tensor_Scalar,
+    aten.bitwise_not.default,
+    aten.bitwise_not.out,
+    aten.bitwise_not_.default,
+    aten.bitwise_or.Scalar,
+    aten.bitwise_or.Scalar_Tensor,
+    aten.bitwise_or.Scalar_out,
+    aten.bitwise_or.Tensor,
+    aten.bitwise_or.Tensor_out,
+    aten.bitwise_or_.Scalar,
+    aten.bitwise_or_.Tensor,
+    aten.bitwise_right_shift.Scalar_Tensor,
+    aten.bitwise_right_shift.Tensor,
+    aten.bitwise_right_shift.Tensor_Scalar,
+    aten.bitwise_right_shift.Tensor_Scalar_out,
+    aten.bitwise_right_shift.Tensor_out,
+    aten.bitwise_right_shift_.Tensor,
+    aten.bitwise_right_shift_.Tensor_Scalar,
+    aten.bitwise_xor.Scalar,
+    aten.bitwise_xor.Scalar_Tensor,
+    aten.bitwise_xor.Scalar_out,
+    aten.bitwise_xor.Tensor,
+    aten.bitwise_xor.Tensor_out,
+    aten.bitwise_xor_.Scalar,
+    aten.bitwise_xor_.Tensor,
+    aten.ceil.default,
+    aten.ceil.out,
+    aten.ceil_.default,
+    aten.clamp.default,
+    aten.clamp.Tensor,
+    aten.clamp.out,
+    aten.clamp_.default,
+    aten.clamp_.Tensor,
+    aten.clamp_min.default,
+    aten.clamp_min.Tensor,
+    aten.clamp_max.default,
+    aten.clamp_max.Tensor,
+    aten.clip.default,
+    aten.clip.out,
+    aten.clip_.default,
+    aten.conj_physical.default,
+    aten.conj_physical.out,
+    aten.conj_physical_.default,
+    aten.copysign.Scalar,
+    aten.copysign.Scalar_out,
+    aten.copysign.Tensor,
+    aten.copysign.out,
+    aten.copysign_.Scalar,
+    aten.copysign_.Tensor,
+    aten.cos.default,
+    aten.cos.out,
+    aten.cos_.default,
+    aten.cosh.default,
+    aten.cosh.out,
+    aten.cosh_.default,
+    aten.deg2rad.default,
+    aten.deg2rad.out,
+    aten.deg2rad_.default,
+    aten.digamma.default,
+    aten.digamma.out,
+    aten.digamma_.default,
+    aten.div.Tensor,
+    aten.div.Tensor_mode,
+    aten.div.out,
+    aten.div.out_mode,
+    aten.div_.Tensor,
+    aten.div_.Tensor_mode,
+    aten.eq.Tensor,
+    aten.eq.Tensor_out,
+    aten.eq.Scalar,
+    aten.eq.Scalar_out,
+    aten.erf.default,
+    aten.erf.out,
+    aten.erf_.default,
+    aten.erfc.default,
+    aten.erfc.out,
+    aten.erfc_.default,
+    aten.erfinv.default,
+    aten.erfinv.out,
+    aten.erfinv_.default,
+    aten.exp.default,
+    aten.exp.out,
+    aten.exp2.default,
+    aten.exp2.out,
+    aten.exp2_.default,
+    aten.exp_.default,
+    aten.expm1.default,
+    aten.expm1.out,
+    aten.expm1_.default,
+    aten.float_power.Scalar,
+    aten.float_power.Scalar_out,
+    aten.float_power.Tensor_Scalar,
+    aten.float_power.Tensor_Scalar_out,
+    aten.float_power.Tensor_Tensor,
+    aten.float_power.Tensor_Tensor_out,
+    aten.float_power_.Scalar,
+    aten.float_power_.Tensor,
+    aten.floor.default,
+    aten.floor.out,
+    aten.floor_.default,
+    aten.fmod.Scalar,
+    aten.fmod.Scalar_out,
+    aten.fmod.Tensor,
+    aten.fmod.Tensor_out,
+    aten.fmod_.Scalar,
+    aten.fmod_.Tensor,
+    aten.frac.default,
+    aten.frac.out,
+    aten.frac_.default,
+    aten.ge.Scalar,
+    aten.ge.Tensor,
+    aten.gelu.default,
+    aten.gt.Tensor,
+    aten.gt.Tensor_out,
+    aten.gt.Scalar,
+    aten.gt.Scalar_out,
+    aten.gt.Scalar,
+    aten.gt.Tensor,
+    aten.hypot.default,
+    aten.hypot.out,
+    aten.hypot_.default,
+    aten.i0.default,
+    aten.i0.out,
+    aten.i0_.default,
+    aten.igamma.default,
+    aten.igamma.out,
+    aten.igamma_.default,
+    aten.igammac.default,
+    aten.igammac.out,
+    aten.igammac_.default,
+    aten.isinf.default,
+    aten.isnan.default,
+    aten.isneginf.default,
+    aten.isneginf.out,
+    aten.isposinf.default,
+    aten.isposinf.out,
+    aten.ldexp.default,
+    aten.ldexp.out,
+    aten.ldexp_.default,
+    aten.lt.Tensor,
+    aten.lt.Tensor_out,
+    aten.lt.Scalar,
+    aten.lt.Scalar_out,
+    aten.le.Scalar,
+    aten.le.Tensor,
+    aten.lerp.Scalar,
+    aten.lerp.Scalar_out,
+    aten.lerp.Tensor,
+    aten.lerp.Tensor_out,
+    aten.lerp_.Scalar,
+    aten.lerp_.Tensor,
+    aten.lgamma.default,
+    aten.lgamma.out,
+    aten.lgamma_.default,
+    aten.log.default,
+    aten.log.out,
+    aten.log10.default,
+    aten.log10.out,
+    aten.log10_.default,
+    aten.log1p.default,
+    aten.log1p.out,
+    aten.log1p_.default,
+    aten.log2.default,
+    aten.log2.out,
+    aten.log2_.default,
+    aten.log_.default,
+    aten.logaddexp.default,
+    aten.logaddexp.out,
+    aten.logaddexp2.default,
+    aten.logaddexp2.out,
+    aten.logical_and.default,
+    aten.logical_and.out,
+    aten.logical_and_.default,
+    aten.logical_not.default,
+    aten.logical_not.out,
+    aten.logical_not_.default,
+    aten.logical_or.default,
+    aten.logical_or.out,
+    aten.logical_or_.default,
+    aten.logical_xor.default,
+    aten.logical_xor.out,
+    aten.logical_xor_.default,
+    aten.logit.default,
+    aten.logit.out,
+    aten.logit_.default,
+    aten.masked_fill.Scalar,
+    aten.masked_fill_.Scalar,
+    aten.maximum.default,
+    aten.maximum.out,
+    aten.minimum.default,
+    aten.minimum.out,
+    aten.mul.out,
+    aten.mvlgamma.default,
+    aten.mvlgamma.out,
+    aten.mvlgamma_.default,
+    aten.native_dropout_backward.default,
+    aten.native_dropout_backward.out,
+    aten.nan_to_num.default,
+    aten.nan_to_num.out,
+    aten.nan_to_num_.default,
+    aten.ne.Scalar,
+    aten.neg.default,
+    aten.neg.out,
+    aten.neg_.default,
+    aten.nextafter.default,
+    aten.nextafter.out,
+    aten.nextafter_.default,
+    aten.polygamma.default,
+    aten.polygamma.out,
+    aten.polygamma_.default,
+    aten.positive.default,
+    aten.pow.Scalar,
+    aten.pow.Scalar_out,
+    aten.pow.Tensor_Scalar,
+    aten.pow.Tensor_Scalar_out,
+    aten.pow.Tensor_Tensor,
+    aten.pow.Tensor_Tensor_out,
+    aten.pow_.Scalar,
+    aten.pow_.Tensor,
+    aten.reciprocal.default,
+    aten.reciprocal.out,
+    aten.reciprocal_.default,
+    aten.rad2deg.default,
+    aten.rad2deg.out,
+    aten.rad2deg_.default,
+    aten.relu.default,
+    aten.relu_.default,
+    aten.remainder.Scalar,
+    aten.remainder.Scalar_Tensor,
+    aten.remainder.Scalar_out,
+    aten.remainder.Tensor,
+    aten.remainder.Tensor_out,
+    aten.remainder_.Scalar,
+    aten.remainder_.Tensor,
+    aten.round.decimals,
+    aten.round.decimals_out,
+    aten.round.default,
+    aten.round.out,
+    aten.round_.decimals,
+    aten.round_.default,
+    aten.rsqrt.default,
+    aten.rsqrt.out,
+    aten.rsqrt_.default,
+    aten.rsub.Scalar,
+    aten.sgn.default,
+    aten.sgn.out,
+    aten.sgn_.default,
+    aten.sigmoid.default,
+    aten.sigmoid.out,
+    aten.sigmoid_.default,
+    aten.sign.default,
+    aten.sign.out,
+    aten.sign_.default,
+    aten.signbit.default,
+    aten.signbit.out,
+    aten.silu.default,
+    aten.silu.out,
+    aten.sin.default,
+    aten.sin.out,
+    aten.sin_.default,
+    aten.sinc.default,
+    aten.sinc.out,
+    aten.sinc_.default,
+    aten.sinh.default,
+    aten.sinh.out,
+    aten.sinh_.default,
+    aten.sqrt.default,
+    aten.sqrt.out,
+    aten.sqrt_.default,
+    aten.square.default,
+    aten.square.out,
+    aten.square_.default,
+    aten.sub.Scalar,
+    aten.sub.Tensor,
+    aten.sub.out,
+    aten.sub_.Scalar,
+    aten.sub_.Tensor,
+    aten.tan.default,
+    aten.tan.out,
+    aten.tan_.default,
+    aten.tanh.default,
+    aten.tanh.out,
+    aten.tanh_.default,
+    aten.true_divide.Tensor,
+    aten.trunc.default,
+    aten.trunc.out,
+    aten.trunc_.default,
+    aten.where.self,
+    aten.where.self_out,
+    aten.xlogy.OutScalar_Self,
+    aten.xlogy.OutScalar_Other,
+    aten.xlogy.OutTensor,
+    aten.xlogy.Scalar_Other,
+    aten.xlogy.Scalar_Self,
+    aten.xlogy.Tensor,
+    aten.xlogy_.Scalar_Other,
+    aten.xlogy_.Tensor,
+    # backward point-wise ops
+    # please keep the entries below alphabetically sorted
+    aten.gelu_backward.default,
+    aten.sigmoid_backward.default,
+    aten.silu_backward.default,
+    aten.tanh_backward.default,
+    aten.threshold_backward.default,
+]
+
+# the linear pointwise ops map, key is op, value is the type of linearity
+linear_pointwise_ops = {
+    aten.to.dtype: 0,
+    aten.add.Tensor: 1,
+    aten.add_.Tensor: 1,
+    aten.div.Scalar: 0,
+    aten.div_.Scalar: 0,
+    aten.mul.Scalar: 0,
+    aten.mul_.Scalar: 0,
+    aten.mul.Tensor: 2,
+    aten.mul_.Tensor: 2,
+    aten.copy_.default: 1,
+}
+
+
+def pointwise_strategy(op_schema: OpSchema, linearity: int = -1) -> OpStrategy:
+    followed_strategy_index = -1
+    max_shards = -1
+    max_ndim = -1
+
+    if op_schema.is_inplace_op():
+        # inplace op should follow the first arg strategy
+        followed_strategy = op_schema.args_schema[0]
+        followed_strategy_index = 0
+    elif op_schema.is_out_variant_op():
+        # out variant op should follow the out kwarg strategy
+        followed_strategy = op_schema.kwargs_schema["out"]
+        # out variant is technically a kwarg for the strategy to follow so it does not
+        # have an "index", we set it to a reasonably large number just to indicate it's
+        # not a valid index
+        followed_strategy_index = 100
+    else:
+        # normal pointwise op, we choose to follow the arg with
+        # the max shards in case operands needs reshard
+        # in case of multiple operands with max shard, we take
+        # the one with the max number of dimensions
+        for idx, arg_strategy in enumerate(op_schema.args_schema):
+            if not isinstance(arg_strategy, OpStrategy):
+                continue
+
+            arg_max_shards = arg_strategy.max_num_shards()
+            arg_max_ndim = arg_strategy.ndim
+            if (arg_max_shards > max_shards) or (
+                arg_max_shards == max_shards and arg_max_ndim > max_ndim
+            ):
+                followed_strategy_index = idx
+                max_shards = arg_max_shards
+                max_ndim = arg_max_ndim
+
+        followed_strategy = op_schema.args_schema[followed_strategy_index]
+
+    assert isinstance(followed_strategy, OpStrategy), (
+        f"no strategy to follow for {op_schema}!"
+    )
+    return common_pointwise_strategy(
+        op_schema.args_schema,
+        followed_strategy,
+        followed_strategy_index,
+        linearity,
+    )
+
+
+def linear_pointwise_strategy(op_schema: OpSchema) -> StrategyType:
+    """
+    Linear pointwise operators can propagate pending reductions.
+    For example, c = add(a, b); if a is pending sum, then c will be
+    pending sum as well without any communication overhead.
+
+    Note that:
+    1. Only unary and binary operations are supported, out variant
+      ops are not supported.
+    2. There're multiple types of linearity, refer to the doc of
+      common_pointwise_strategy for more details.
+    """
+    linearity_type = linear_pointwise_ops.get(op_schema.op, -1)
+    return pointwise_strategy(op_schema, linearity=linearity_type)
+
+
+def common_pointwise_strategy(
+    args_schema: Sequence[object],
+    followed_strategy: OpStrategy,
+    followed_strategy_index: int,
+    linearity: int = -1,
+    scalar_tensor_idx: int | None = None,
+) -> OpStrategy:
+    """
+    Common strategy for pointwise operations.
+
+    Args:
+        args_schema: Input arguments schema
+        followed_strategy: Strategy to follow for output placement
+        followed_strategy_index: Index of the strategy being followed
+        linearity: depending on the operator, we support different types of linearity
+            -1: the operation does not support linearity
+            0: the unary operation that supports linearity, output propagates partial.
+            1: the binary operation supports add linearity, where it requires every operand
+                to be partial, output propagates partial.
+            2: the binary operation supports multiplicative linearity, where it requires
+                the primary operand to be partial, and the other operands to be replicate,
+                output propagates partial.
+        scalar_tensor_idx: Index of the Replicate scalar tensor for which we allow the mesh
+            to be different from the mesh of followed_strategy
+    """
+    # handle broadcasting
+    common_shape = torch.broadcast_shapes(
+        *[arg.shape for arg in args_schema if isinstance(arg, OpStrategy)]
+    )
+    pointwise_strategy = OpStrategy([])
+
+    for op_spec in followed_strategy.strategies:
+        spec_to_follow = op_spec.output_spec
+
+        out_placements: list[Placement] = []
+        for placement in spec_to_follow.placements:
+            if isinstance(placement, Shard | _StridedShard):
+                shard_dim = normalize_dim(placement.dim, len(spec_to_follow.shape))
+                common_ndim = len(common_shape)
+                new_shard_dim = common_ndim - len(spec_to_follow.shape) + shard_dim
+                if isinstance(placement, _StridedShard):
+                    out_placements.append(
+                        _StridedShard(
+                            new_shard_dim, split_factor=placement.split_factor
+                        )
+                    )
+                else:
+                    out_placements.append(Shard(new_shard_dim))
+            elif isinstance(placement, Partial):
+                # note that only partial-sum and partial-avg are supported for linearity
+                partial_supports_linearity = placement.is_partial(
+                    "sum"
+                ) or placement.is_partial("avg")
+                if linearity > 0 and partial_supports_linearity:
+                    # propagate the partial placement
+                    out_placements.append(placement)
+                else:
+                    # clear the partial placement if op does not support linearity
+                    # by default we just replicate the partial, need to see if this
+                    # is optimal for all cases
+                    out_placements.append(Replicate())
+            else:
+                out_placements.append(placement)
+
+        input_specs: list[DTensorSpec] = []
+        redistribute_costs: list[list[float]] = []
+        for input_idx, input_arg in enumerate(args_schema):
+            if isinstance(input_arg, OpStrategy):
+                input_arg_spec = input_arg.strategies[0].output_spec
+
+                # sanity check that all args that follow the same strategy
+                # are on the same DeviceMesh
+                if input_arg.mesh != followed_strategy.mesh:
+                    # For the scalar tensor arg in fused ops, do not follow followed_strategy;
+                    # instead, let the input mesh and the Replicate placements propagate through.
+                    if input_idx == scalar_tensor_idx:
+                        assert all(p == Replicate() for p in input_arg_spec.placements)
+                        input_arg_target_spec = DTensorSpec(
+                            mesh=input_arg.mesh,
+                            placements=input_arg_spec.placements,
+                            tensor_meta=input_arg_spec.tensor_meta,
+                        )
+                        input_specs.append(input_arg_target_spec)
+                        redistribute_costs.append(
+                            generate_redistribute_costs(
+                                input_arg, input_arg_target_spec
+                            )
+                        )
+                        continue
+                    else:
+                        raise ValueError(
+                            f"Could not run pointwise computation across different mesh: "
+                            f"Found {input_arg.mesh} and {followed_strategy.mesh}!"
+                        )
+
+                # every arg follow the out_placements, but need to handle broadcasting
+                input_arg_dims_map = infer_broadcast_dims_map(
+                    common_shape, input_arg_spec.shape
+                )
+
+                # Determine if this input should convert Partial to Replicate base on linearity
+                should_convert_partial = (
+                    linearity == 2
+                    and input_idx
+                    != followed_strategy_index  # Don't convert the "followed" strategy
+                )
+
+                input_target_placements = map_placements_after_broadcast(
+                    tuple(out_placements),
+                    common_shape,
+                    input_arg_dims_map,
+                    partial_to_replicate=should_convert_partial,
+                )
+
+                input_arg_target_spec = DTensorSpec(
+                    mesh=followed_strategy.mesh,
+                    placements=input_target_placements,
+                    tensor_meta=input_arg_spec.tensor_meta,
+                )
+                input_specs.append(input_arg_target_spec)
+                redistribute_costs.append(
+                    generate_redistribute_costs(input_arg, input_arg_target_spec)
+                )
+
+        pointwise_strategy.strategies.append(
+            OpSpec(
+                output_specs=DTensorSpec(
+                    mesh=followed_strategy.mesh,
+                    placements=tuple(out_placements),
+                ),
+                input_specs=input_specs,
+                redistribute_cost=redistribute_costs,
+            )
+        )
+    return pointwise_strategy
+
+
+for op in linear_pointwise_ops:
+    register_op_strategy(op, schema_info=RuntimeSchemaInfo(static_kwargkey=["out"]))(
+        linear_pointwise_strategy
+    )
+
+for op in pointwise_ops:
+    register_op_strategy(op, schema_info=RuntimeSchemaInfo(static_kwargkey=["out"]))(
+        pointwise_strategy
+    )
+
+
+# TODO: add all for_each ops
+for_each_ops = [
+    aten._foreach_abs.default,
+    aten._foreach_abs_.default,
+    aten._foreach_addcdiv_.Scalar,
+    aten._foreach_addcdiv_.ScalarList,
+    aten._foreach_addcdiv_.Tensor,
+    aten._foreach_addcmul.Scalar,
+    aten._foreach_addcmul_.Scalar,
+    aten._foreach_addcmul_.ScalarList,
+    aten._foreach_addcmul_.Tensor,
+    aten._foreach_clamp_max_.Scalar,
+    aten._foreach_clamp_min_.Scalar,
+    aten._foreach_div_.List,
+    aten._foreach_div_.Scalar,
+    aten._foreach_div_.ScalarList,
+    aten._foreach_div_.Tensor,
+    aten._foreach_div.List,
+    aten._foreach_div.Scalar,
+    aten._foreach_div.ScalarList,
+    aten._foreach_div.Tensor,
+    aten._foreach_lerp_.Scalar,
+    aten._foreach_maximum_.List,
+    aten._foreach_mul.Scalar,
+    aten._foreach_mul.ScalarList,
+    aten._foreach_mul.Tensor,
+    aten._foreach_mul.List,
+    aten._foreach_mul_.Scalar,
+    aten._foreach_mul_.ScalarList,
+    aten._foreach_mul_.Tensor,
+    aten._foreach_mul_.List,
+    aten._foreach_pow.List,
+    aten._foreach_pow.ScalarList,
+    aten._foreach_neg.default,
+    aten._foreach_neg_.default,
+    aten._foreach_reciprocal_.default,
+    aten._foreach_sub.Scalar,
+    aten._foreach_sub_.Scalar,
+    aten._foreach_sub.List,
+    aten._foreach_sub_.List,
+    aten._foreach_sub.ScalarList,
+    aten._foreach_sub_.ScalarList,
+    aten._foreach_sqrt.default,
+    aten._foreach_sqrt_.default,
+    aten._foreach_zero_.default,
+    aten._foreach_exp.default,
+    aten._foreach_exp_.default,
+    aten._foreach_cos.default,
+    aten._foreach_cos_.default,
+    aten._foreach_log.default,
+    aten._foreach_log_.default,
+    aten._amp_foreach_non_finite_check_and_unscale_.default,
+]
+
+for_each_linearity_ops = [
+    aten._foreach_add.Scalar,
+    aten._foreach_add_.Scalar,
+    aten._foreach_add_.ScalarList,
+    aten._foreach_add.List,
+    aten._foreach_add_.List,
+]
+
+
+def list_pointwise_strategy(
+    op_schema: OpSchema, linearity: bool = False
+) -> StrategyType:
+    """
+    Apply the pointwise strategy to the zipped arguments. For example, if we
+    run a foreach add of two lists l1 and l2, then we apply the pointwise
+    strategy on each pair (l1[i], l2[i]). If the first argument is a list but
+    the second (or later) one is a tensor, then we broadcast the tensor by
+    replicating it into a list with the length of the first argument.
+
+    Args:
+        mesh (DeviceMesh): device mesh for pointwise ops
+        op_schema (OpSchema): schema of the operator to generate strategy for
+        linearity (bool): specify whether op(a) + op(b) = op(a + b)
+
+    Returns:
+        OpStrategy: generated strategy
+    """
+
+    def args_tuple_strategies(
+        args_schema: tuple[object, ...],
+    ) -> list[TupleStrategy | None]:
+        first_arg = args_schema[0]
+        assert isinstance(first_arg, TupleStrategy)
+        strategy_len = len(first_arg.children)
+        tuple_strategies: list[TupleStrategy | None] = []
+        for arg_idx, arg in enumerate(args_schema):
+            if isinstance(arg, TupleStrategy):
+                # every tuple strategy should have the same length
+                assert len(arg.children) == strategy_len
+                tuple_strategies.append(arg)
+            elif isinstance(arg, OpStrategy):
+                if arg_idx > 0:  # implicitly broadcast
+                    tuple_strategies.append(
+                        TupleStrategy([arg for _ in range(strategy_len)])
+                    )
+                else:
+                    raise RuntimeError(
+                        f"list op only supports tuple strategy! {op_schema}"
+                    )
+            else:
+                # insert None as placeholder so that the idx of arg is kept
+                tuple_strategies.append(None)
+        return tuple_strategies
+
+    args_strategies = args_tuple_strategies(op_schema.args_schema)
+    follow_strategy: TupleStrategy = not_none(args_strategies[0])
+    list_strategy: list[OpStrategy] = []
+
+    for child_idx, child_strtgy in enumerate(follow_strategy.children):
+        assert isinstance(child_strtgy, OpStrategy)
+        args_schema: list[OpStrategy | None] = [
+            cast(OpStrategy, arg_strategy.children[child_idx]) if arg_strategy else None
+            for arg_strategy in args_strategies
+        ]
+        pointwise_strategy: OpStrategy = common_pointwise_strategy(
+            args_schema,
+            child_strtgy,
+            linearity,
+            scalar_tensor_idx=(
+                _FUSED_OP_SCALAR_IDX if op_schema.op in fused_ops else None
+            ),
+        )
+        list_strategy.append(pointwise_strategy)
+    return TupleStrategy(list_strategy)
+
+
+def list_linear_pointwise_strategy(op_schema: OpSchema) -> StrategyType:
+    """
+    for each list op stratgy that supports linearity
+    """
+    return list_pointwise_strategy(op_schema, linearity=True)
+
+
+for op in for_each_ops:
+    register_op_strategy(op, schema_info=RuntimeSchemaInfo(needs_pytree=True))(
+        list_pointwise_strategy
+    )
+
+for op in for_each_linearity_ops:
+    register_op_strategy(op, schema_info=RuntimeSchemaInfo(needs_pytree=True))(
+        list_linear_pointwise_strategy
+    )
+
+fused_ops = [
+    aten._fused_adam_.default,
+    aten._fused_adam.default,
+    aten._fused_adam.tensor_lr,
+    aten._fused_adam_.tensor_lr,
+    aten._fused_adamw_.default,
+    aten._fused_adamw.default,
+    aten._fused_adamw.tensor_lr,
+    aten._fused_adamw_.tensor_lr,
+]
+
+
+# The state_steps arg of fused adam / adamw is a Replicate scalar tensor, which will be put on
+# the compute_mesh of an op across all parameter groups, even when not all parameter groups
+# are on the same device mesh. This idx will help avoid hitting exceptions or unnecessary
+# redistribute during sharding propagation.
+_FUSED_OP_SCALAR_IDX = 5
+
+for op in fused_ops:
+    register_op_strategy(op, schema_info=RuntimeSchemaInfo(needs_pytree=True))(
+        list_pointwise_strategy
+    )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_tensor_ops.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_tensor_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c50085de487b12fae0ea657414eedfa40ce3cb1
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_tensor_ops.py
@@ -0,0 +1,1258 @@
+# mypy: allow-untyped-defs
+# Copyright (c) Meta Platforms, Inc. and affiliates
+from collections.abc import Sequence, Sized
+from typing import cast
+
+import torch
+from torch._prims_common import IntLike
+from torch.distributed.tensor._dtensor_spec import DTensorSpec
+from torch.distributed.tensor._op_schema import (
+    OpSchema,
+    OpSpec,
+    OpStrategy,
+    OutputSharding,
+    PlacementList,
+    RuntimeSchemaInfo,
+    StrategyType,
+    TupleStrategy,
+)
+from torch.distributed.tensor._ops._common_rules import pointwise_rule
+from torch.distributed.tensor._ops._embedding_ops import MaskPartial
+from torch.distributed.tensor._ops.registration import (
+    register_op_strategy,
+    register_prop_rule,
+)
+from torch.distributed.tensor._ops.utils import (
+    expand_to_full_mesh_op_strategy,
+    generate_redistribute_costs,
+    is_tensor_dim_sharded,
+    is_tensor_evenly_shardable,
+    is_tensor_partial,
+    normalize_dim,
+    shift_shard_dims_after_insert,
+    shift_shard_dims_after_remove,
+)
+from torch.distributed.tensor.placement_types import (
+    Partial,
+    Placement,
+    Replicate,
+    Shard,
+)
+from torch.fx.experimental.symbolic_shapes import statically_known_true
+
+
+aten = torch.ops.aten
+
+
+def propagate_single_input_strategy(op_schema: OpSchema) -> StrategyType:
+    # For ops with a single tensor input, we perform a 1:1 mapping such that
+    # for each strategy that the input supports, we create a corresponding strategy.
+    # Note: this may be a complete waste of work, because it should be equivalent to
+    # `return first_input_strategy` (unless creating a deep copy is important for some reason)
+    if len([s for s in op_schema.args_schema if isinstance(s, OpStrategy)]) != 1:
+        raise AssertionError(
+            "propagate_single_input_strategy only works for single-tensor-input ops"
+        )
+    first_input_strategy = op_schema.args_schema[0]
+    if not isinstance(first_input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(first_input_strategy)}")
+    return OpStrategy(
+        [
+            OpSpec(
+                output_specs=DTensorSpec(
+                    mesh=first_input_strategy.mesh,
+                    placements=strategy.output_spec.placements,
+                    tensor_meta=strategy.output_spec.tensor_meta,
+                ),
+                input_specs=[
+                    DTensorSpec(
+                        mesh=first_input_strategy.mesh,
+                        placements=strategy.output_spec.placements,
+                        tensor_meta=strategy.output_spec.tensor_meta,
+                    )
+                ],
+                redistribute_cost=[
+                    generate_redistribute_costs(
+                        first_input_strategy, strategy.output_spec
+                    )
+                ],
+            )
+            for strategy in first_input_strategy.strategies
+        ]
+    )
+
+
+register_op_strategy(
+    [
+        aten.clone.default,
+        aten.contiguous.default,
+        aten.detach.default,
+        aten.alias.default,
+        aten.fill_.Scalar,
+        aten.view.dtype,
+        aten.zero_.default,
+    ]
+)(propagate_single_input_strategy)
+
+
+register_op_strategy(
+    aten._to_copy.default, schema_info=RuntimeSchemaInfo(static_kwargkey=["dtype"])
+)(propagate_single_input_strategy)
+
+
+@register_op_strategy(
+    [
+        aten.equal.default,
+        aten.is_same_size.default,
+    ]
+)
+def equal_strategy(op_schema: OpSchema) -> StrategyType:
+    # equal_strategy deals with ops that comparing two tensor, we need to make sure
+    # sharding layout the same with two operands, we choose to follow the arg with max
+    # num of shards, still keep is_same_size here for completeness as they share the
+    # same strategy in theory.
+    mesh = op_schema.get_mesh_from_args()
+    self_strategy, other_strategy = op_schema.args_schema
+    if not isinstance(self_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(self_strategy)}")
+    if not isinstance(other_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(other_strategy)}")
+
+    select_strategy = (
+        self_strategy
+        if self_strategy.max_num_shards() >= other_strategy.max_num_shards()
+        else other_strategy
+    )
+    equal_strategy = OpStrategy([])
+
+    for arg_strategy in select_strategy.strategies:
+        arg_spec = arg_strategy.output_spec
+        if is_tensor_partial(arg_spec):
+            # if the arg_spec have partial, reshard to replicate
+            # otherwise local shard tensor comparison would be invalid
+            output_spec = DTensorSpec(
+                mesh=mesh,
+                placements=tuple(
+                    Replicate() if isinstance(p, Partial) else p
+                    for p in arg_spec.placements
+                ),
+            )
+            equal_strategy.strategies.append(OpSpec(output_specs=output_spec))
+        else:
+            equal_strategy.strategies.append(OpSpec(arg_spec))
+    return equal_strategy
+
+
+register_op_strategy(
+    aten.empty_like.default, schema_info=RuntimeSchemaInfo(1, ["dtype"])
+)(propagate_single_input_strategy)
+
+
+@register_op_strategy(
+    [
+        aten.ones_like.default,
+        aten.rand_like.default,
+        aten.randn_like.default,
+        aten.zeros_like.default,
+    ],
+    schema_info=RuntimeSchemaInfo(1, ["dtype"]),
+)
+@register_op_strategy(
+    [aten.full_like.default],
+    schema_info=RuntimeSchemaInfo(2, ["dtype"]),
+)
+@register_op_strategy(
+    [
+        aten.randint_like.default,
+        aten.randint_like.low_dtype,
+        aten.randint_like.low_dtype_out,
+    ],
+    schema_info=RuntimeSchemaInfo(3, ["dtype"]),
+)
+def create_like_strategy(op_schema: OpSchema) -> StrategyType:
+    # create_like_strategy deals with ops that creating tensors with same
+    # shape as input, but with specific content that does not depend on
+    # the input, we can propagate sharding, but we have to make sure we
+    # move from partial to replicated.
+    select_strategy = op_schema.args_schema[0]
+    create_like_strategy = OpStrategy([])
+    if not isinstance(select_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(select_strategy)}")
+    for arg_strategy in select_strategy.strategies:
+        arg_spec = arg_strategy.output_spec
+        output_spec = DTensorSpec(
+            mesh=select_strategy.mesh,
+            placements=tuple(
+                Replicate() if isinstance(p, Partial) else p
+                for p in arg_spec.placements
+            ),
+        )
+        create_like_strategy.strategies.append(
+            OpSpec(output_specs=output_spec, input_specs=(arg_spec,))
+        )
+
+    return create_like_strategy
+
+
+@register_op_strategy(
+    [
+        aten.new_empty.default,
+        aten.new_full.default,
+        aten.new_ones.default,
+        aten.new_zeros.default,
+        aten.new_empty_strided.default,
+    ],
+    schema_info=RuntimeSchemaInfo(1, ["dtype"]),
+)
+def new_factory_strategy(op_schema: OpSchema) -> StrategyType:
+    # Currently there are two strategies:
+    # 1. let the output be replicated
+    # 2. let the output follow the input if input and output have the same shape
+    input_strategy = op_schema.args_schema[0]
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+
+    mesh = input_strategy.mesh
+    input_shape = input_strategy.shape
+    output_shape = op_schema.args_schema[1]
+    if not isinstance(output_shape, list):
+        raise AssertionError(f"Expected list, got {type(output_shape)}")
+
+    new_factory_strategy = OpStrategy([])
+    for arg_strategy in input_strategy.strategies:
+        input_spec = arg_strategy.output_spec
+        replica_spec = DTensorSpec(mesh, tuple([Replicate()] * mesh.ndim))
+        new_factory_strategy.strategies.append(
+            OpSpec(
+                output_specs=replica_spec,
+                input_specs=(input_spec,),
+                redistribute_cost=[[0.0] * len(input_strategy.strategies)],
+            )
+        )
+
+        if tuple(input_shape) == tuple(output_shape) and input_spec.is_sharded():
+            # NOTE: for new_empty_strided, currently the non-replicate sharding
+            #       is supported only when the shape is evenly shardable
+            if (
+                op_schema.op == aten.new_empty_strided.default
+                and not is_tensor_evenly_shardable(input_shape, input_spec)
+            ):
+                continue
+
+            new_factory_strategy.strategies.append(
+                OpSpec(
+                    output_specs=input_spec,
+                    input_specs=(input_spec,),
+                    # encouraging new tensor placement to be the same as input
+                    redistribute_cost=[[-0.1] * len(input_strategy.strategies)],
+                )
+            )
+
+    return new_factory_strategy
+
+
+@register_op_strategy(aten.bucketize.Tensor)
+def gen_bucketize_strategy(op_schema: OpSchema) -> StrategyType:
+    """Just propagate input sharding, but expect replicated for boundaries input."""
+    mesh = op_schema.get_mesh_from_args()
+    input_strategy, boundaries_strategy = op_schema.args_schema
+    bucketize_strategy = OpStrategy([])
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+    if not isinstance(boundaries_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(boundaries_strategy)}")
+    for arg_strategy in input_strategy.strategies:
+        arg_spec = DTensorSpec(
+            mesh,
+            arg_strategy.output_spec.placements,
+            arg_strategy.output_spec.tensor_meta,
+        )
+        replica_spec = DTensorSpec(
+            mesh,
+            tuple([Replicate()] * mesh.ndim),
+            boundaries_strategy.strategies[0].output_spec.tensor_meta,
+        )
+        bucketize_strategy.strategies.append(
+            OpSpec(
+                output_specs=arg_spec,
+                input_specs=(arg_spec, replica_spec),
+                redistribute_cost=[
+                    generate_redistribute_costs(input_strategy, arg_spec),
+                    generate_redistribute_costs(boundaries_strategy, replica_spec),
+                ],
+            )
+        )
+
+    return bucketize_strategy
+
+
+@register_op_strategy(aten.select.int, schema_info=RuntimeSchemaInfo(1))
+def select_int_strategy(op_schema: OpSchema) -> StrategyType:
+    """
+    In this select op, first determine the input specs, then determine the output specs.
+    - Input specs:
+        - If the input is sharded on the selected dim, unshard it and change to replicate.
+        - Otherwise, keep the original input specs.
+    - Output specs:
+        - It checks the input specs with the following cases:
+        - Case 1 shard_dim == selected_dim: not possible as the input is already unsharded.
+        - Case 2 shard_dim < selected_dim: keep the input specs.
+        - Case 3 shard_dim > selected_dim: shard_dim -= 1.
+    """
+    input_strategy = op_schema.args_schema[0]
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+    if len(op_schema.args_schema) != 3:
+        raise AssertionError(f"Expected 3 args, got {len(op_schema.args_schema)}")
+    selected_dim, index = (
+        cast(int, op_schema.args_schema[1]),
+        cast(int, op_schema.args_schema[2]),
+    )
+    input_shape = input_strategy.shape
+    input_ndim = input_strategy.ndim
+    selected_dim = normalize_dim(selected_dim, input_ndim)
+    index = normalize_dim(index, input_shape[selected_dim])
+
+    select_strategy = OpStrategy([])
+    for arg_strategy in input_strategy.strategies:
+        arg_spec = arg_strategy.output_spec
+
+        # determine input spec
+        input_specs = arg_spec
+        if is_tensor_dim_sharded(arg_spec, dim=selected_dim):
+            # if input is sharded on the selected dim, need to unshard it, change to replicate
+            arg_target_placements = unshard_tensor_dim(
+                arg_spec.placements, dim=selected_dim
+            )
+            input_specs = DTensorSpec(arg_spec.mesh, arg_target_placements)  # R
+
+        # determine output spec
+        output_specs = input_specs
+        if input_specs.is_sharded():
+            # handle cases with sharded_dim != selected_dim
+            output_placements = shift_shard_dims_after_remove(
+                input_specs.placements, selected_dim
+            )
+            output_specs = DTensorSpec(
+                arg_spec.mesh, placements=tuple(output_placements)
+            )
+
+        select_strategy.strategies.append(
+            OpSpec(
+                output_specs=output_specs,
+                input_specs=(input_specs,),
+            )
+        )
+    return select_strategy
+
+
+@register_op_strategy(
+    aten.select_backward.default,
+    schema_info=RuntimeSchemaInfo(1),
+)
+def select_backward_strategy(op_schema: OpSchema) -> OpStrategy:
+    # func: select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor
+    args_schema = op_schema.args_schema
+    input_strategy, dim = args_schema[0], args_schema[2]
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {input_strategy}")
+    if not isinstance(dim, int):
+        raise AssertionError(f"Expected int, got {type(dim)}")
+    output_strategies: list[OpSpec] = []
+    for placement_strategy in input_strategy.strategies:
+        input_spec = placement_strategy.output_spec
+        # NOTE: shard_dim is guaranteed to exist because
+        # grad_input has one more dim than grad_output
+        output_placements = shift_shard_dims_after_insert(input_spec.placements, dim)
+        output_specs = DTensorSpec(input_spec.mesh, tuple(output_placements))
+        output_strategies.append(
+            OpSpec(output_specs=output_specs, input_specs=(input_spec,))
+        )
+    return OpStrategy(output_strategies)
+
+
+@register_op_strategy(aten.slice.Tensor, schema_info=RuntimeSchemaInfo(1))
+def gen_slice_strategy(op_schema: OpSchema) -> StrategyType:
+    """Forward all shardings except the slice dimension."""
+    defaults = (None, 0, None, None, 1)
+    input_strategy, dim, start, end, step = (
+        op_schema.args_schema + defaults[len(op_schema.args_schema) :]
+    )
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+
+    mesh = input_strategy.mesh
+    input_shape = input_strategy.shape
+    input_ndim = input_strategy.ndim
+    if not isinstance(dim, int):
+        raise AssertionError(f"Expected int, got {type(dim)}")
+    if start is None:
+        start = 0
+    if end is None or statically_known_true(end > input_shape[dim]):
+        end = input_shape[dim]
+    if not isinstance(start, IntLike):
+        raise AssertionError(f"Expected IntLike, got {type(start)}")
+    if not isinstance(end, IntLike):
+        raise AssertionError(f"Expected IntLike, got {type(end)}")
+    if not isinstance(step, IntLike):
+        raise AssertionError(f"Expected IntLike, got {type(step)}")
+
+    # normalize args
+    slice_dim = normalize_dim(dim, input_ndim)  # type: ignore[arg-type]
+    start = normalize_dim(start, input_shape[dim])  # type: ignore[arg-type]
+    end = normalize_dim(end, input_shape[dim])  # type: ignore[arg-type]
+
+    statically_redundant_slice = (
+        statically_known_true(start == 0)
+        and statically_known_true(end == input_shape[dim])
+        and statically_known_true(step == 1)
+    )
+
+    slice_strategy = OpStrategy([])
+
+    for arg_strategy in input_strategy.strategies:
+        arg_spec = arg_strategy.output_spec
+        if (
+            not is_tensor_dim_sharded(arg_spec, dim=slice_dim)
+            or statically_redundant_slice
+        ):
+            # only add the strategy if the slice dim is not sharded
+            out_spec = DTensorSpec(mesh, arg_spec.placements)
+            slice_strategy.strategies.append(
+                OpSpec(
+                    output_specs=out_spec,
+                    input_specs=(arg_spec,),
+                    redistribute_cost=[[0.0] * len(input_strategy.strategies)],
+                )
+            )
+    if not slice_strategy.strategies:
+        # if all strategies are filtered out, unsharding all specs on slice dim
+        # of the input strategy, and use that as the op strategy
+        for arg_strategy in input_strategy.strategies:
+            arg_spec = arg_strategy.output_spec
+            unshard_spec = DTensorSpec(
+                mesh, unshard_tensor_dim(arg_spec.placements, dim=slice_dim)
+            )
+            slice_strategy.strategies.append(
+                OpSpec(
+                    output_specs=unshard_spec,
+                    redistribute_cost=[
+                        generate_redistribute_costs(input_strategy, unshard_spec)
+                    ],
+                )
+            )
+    return slice_strategy
+
+
+@register_op_strategy(
+    aten.slice_backward.default,
+    schema_info=RuntimeSchemaInfo(1),
+)
+def slice_backward_rules(op_schema: OpSchema) -> OpStrategy:
+    # func: slice_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt start, SymInt end, SymInt step) -> Tensor
+    args_schema = op_schema.args_schema
+    input_strategy, dim = args_schema[0], args_schema[2]
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {input_strategy}")
+    output_strategies: list[OpSpec] = []
+    for placement_strategy in input_strategy.strategies:
+        output_spec = placement_strategy.output_spec
+        new_placements: list[Placement] = []
+        for placement in output_spec.placements:
+            # Redistribute to replicate only if the dim is sharded and matches the slice dim
+            if isinstance(placement, Shard) and placement.dim == dim:
+                new_placements.append(Replicate())
+            else:
+                new_placements.append(placement)
+        new_spec = DTensorSpec(output_spec.mesh, tuple(new_placements))
+        redistribute_cost = [generate_redistribute_costs(input_strategy, new_spec)]
+        new_strategy = OpSpec(
+            output_specs=new_spec, redistribute_cost=redistribute_cost
+        )
+        output_strategies.append(new_strategy)
+    return OpStrategy(output_strategies)
+
+
+def unshard_tensor_dim(
+    placements: Sequence[Placement], dim: int
+) -> tuple[Placement, ...]:
+    """Disallow the given tensor dimension to be sharded."""
+    return tuple(
+        p if (not isinstance(p, Shard) or p.dim != dim) else Replicate()
+        for p in placements
+    )
+
+
+def replicate_tensor_dim(
+    placements: Sequence[Placement], dim: int
+) -> tuple[Placement, ...]:
+    """Force the given tensor dimension to be replicated."""
+    # Not using p.is_shard() to avoid mypy complain about Placement not having
+    # attribute dim.
+    return tuple(
+        Replicate() if p.is_partial() or isinstance(p, Shard) and p.dim == dim else p
+        for p in placements
+    )
+
+
+@register_op_strategy(aten.slice_scatter.default, schema_info=RuntimeSchemaInfo(2))
+def gen_slice_scatter_strategy(op_schema: OpSchema) -> StrategyType:
+    # 1. number of dimensions in input and src need to match.
+    # 2. number of elements on all non-dim need to match between input and src.
+    # 3. number of elements in src in dim need to match the slice size.
+    # Given the above:
+    # - We suggest for src to follow the sharding of input, except on the scatter dimension,
+    #   where our best bet for now is to make them replicated as a fall-back.
+    #   TODO: Ideally we'd like to make sure the output is re-sharded afterwards to keep input sharding.
+    mesh = op_schema.get_mesh_from_args()
+    input_strategy = op_schema.args_schema[0]
+    src_strategy = op_schema.args_schema[1]
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+    if not isinstance(src_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(src_strategy)}")
+    input_ndim = input_strategy.ndim
+    slice_dim = (
+        cast(int, op_schema.args_schema[2]) if len(op_schema.args_schema) > 2 else 0
+    )
+    slice_dim = normalize_dim(slice_dim, input_ndim)
+
+    slice_scatter_strategy = OpStrategy([])
+    # by default follow the input strategy for both input and src
+    for arg_strategy in input_strategy.strategies:
+        arg_spec = arg_strategy.output_spec
+        if not (
+            is_tensor_dim_sharded(arg_spec, dim=slice_dim)
+            or is_tensor_partial(arg_spec)
+        ):
+            input_spec = DTensorSpec(mesh, arg_spec.placements, arg_spec.tensor_meta)
+            # TODO: need to relax the constraint to src
+            src_spec = DTensorSpec(mesh, arg_spec.placements)
+            # only add the strategy if the slice_scatter dim is not sharded or partial
+            slice_scatter_strategy.strategies.append(
+                OpSpec(
+                    output_specs=arg_spec,
+                    input_specs=(input_spec, src_spec),
+                    redistribute_cost=[
+                        generate_redistribute_costs(input_strategy, input_spec),
+                        generate_redistribute_costs(src_strategy, src_spec),
+                    ],
+                )
+            )
+
+    if not slice_scatter_strategy.strategies:
+        # if all strategies are filtered out, replicating all specs on slice_scatter dim
+        # of the input strategy, and use that as the op strategy
+        for arg_strategy in input_strategy.strategies:
+            arg_spec = arg_strategy.output_spec
+            new_placement = replicate_tensor_dim(arg_spec.placements, dim=slice_dim)
+            input_spec = DTensorSpec(mesh, new_placement)
+            src_spec = DTensorSpec(mesh, new_placement)
+            slice_scatter_strategy.strategies.append(
+                OpSpec(
+                    output_specs=input_spec,
+                    input_specs=(input_spec, src_spec),
+                    redistribute_cost=[
+                        generate_redistribute_costs(input_strategy, input_spec),
+                        generate_redistribute_costs(src_strategy, src_spec),
+                    ],
+                )
+            )
+    return slice_scatter_strategy
+
+
+@register_op_strategy(aten._local_scalar_dense.default)
+def replica_only_strategy(op_schema: OpSchema) -> StrategyType:
+    """Only allow replication on the input/output."""
+    input_strategy = op_schema.args_schema[0]
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+    mesh = input_strategy.mesh
+    replicate_spec = DTensorSpec(mesh, tuple([Replicate()] * mesh.ndim))
+    return OpStrategy([OpSpec(replicate_spec)])
+
+
+@register_op_strategy(
+    [
+        aten.scatter_.value,
+        aten.scatter.value,
+        aten.scatter_.src,
+        aten.scatter.src,
+    ],
+    schema_info=RuntimeSchemaInfo(1),
+)
+def scatter_strategy(op_schema: OpSchema) -> StrategyType:
+    mesh = op_schema.get_mesh_from_args()
+    single_mesh_dim_strategies = []
+
+    # placement list stores placements of [output, input, index, src]
+    # first we always have replicate all for inputs and output
+    if len(op_schema.args_strategy) < 3:
+        # scatter_.src/scatter.src with src be float number instead of tensor
+        all_replicate: PlacementList = [Replicate()] * 3
+    else:
+        all_replicate = [Replicate()] * 4
+    single_mesh_dim_strategies.append(all_replicate)
+
+    # TODO: see if we can support input sharding pattern
+    op_strategy = expand_to_full_mesh_op_strategy(
+        mesh,
+        op_schema,
+        single_mesh_dim_strategies,
+        inplace_op=op_schema.is_inplace_op(),
+    )
+    return op_strategy
+
+
+@register_op_strategy(aten.scatter_add.default, schema_info=RuntimeSchemaInfo(1))
+def scatter_add_strategy(op_schema: OpSchema) -> StrategyType:
+    input_strategy = op_schema.args_schema[0]
+    dim = op_schema.args_schema[1]
+    index_strategy = op_schema.args_schema[2]
+
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+    if not isinstance(index_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(index_strategy)}")
+    if not isinstance(dim, int):
+        raise AssertionError(f"Expected int, got {type(dim)}")
+    dim = normalize_dim(dim, input_strategy.ndim)
+    mesh = input_strategy.mesh
+    input_shape = input_strategy.shape
+    index_shape = index_strategy.shape
+
+    single_mesh_dim_strategies = []
+
+    # placement list stores placements of [output, input, index, src]
+    # first we always have replicate all for inputs and output
+    all_replicate: PlacementList = [Replicate()] * 4
+    single_mesh_dim_strategies.append(all_replicate)
+
+    if len(input_shape) == len(index_shape):
+        for d in range(len(input_shape)):
+            if d != dim and input_shape[d] == index_shape[d]:
+                sharding: PlacementList = [Shard(d), Shard(d), Shard(d), Shard(d)]
+                single_mesh_dim_strategies.append(sharding)
+
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_mesh_dim_strategies, input_index=1
+    )
+
+
+@register_op_strategy(aten.gather.default, schema_info=RuntimeSchemaInfo(1))
+def gather_strategy(op_schema: OpSchema) -> StrategyType:
+    mesh = op_schema.get_mesh_from_args()
+    input_strategy = cast(OpStrategy, op_schema.args_schema[0])
+    dim = cast(int, op_schema.args_schema[1])
+    dim = normalize_dim(dim, input_strategy.ndim)
+    index_strategy = cast(OpStrategy, op_schema.args_schema[2])
+
+    input_shape = input_strategy.shape
+    index_shape = index_strategy.shape
+
+    single_mesh_dim_strategies = []
+
+    # placement list stores placements of [output, input, index]
+    # first we always have replicate all for inputs and output
+    all_replicate: PlacementList = [Replicate()] * 3
+    single_mesh_dim_strategies.append(all_replicate)
+
+    # input sharding, input sharded, index accepts mask partial, output follows index
+    # this only works when the input is sharded on the gather dimension, and
+    # index has size 1 on the gather dimension
+    if dim < len(index_shape) and index_shape[dim] == 1:
+        index_partial_placement = MaskPartial(offset_shape=input_shape, offset_dim=dim)
+        input_sharding: PlacementList = [
+            index_partial_placement,
+            Shard(dim),
+            index_partial_placement,
+        ]
+        single_mesh_dim_strategies.append(input_sharding)
+
+    # index sharding, input replicated, index sharded, output follows index
+    # this only works when the sharding dimension is the gather dimension
+    index_sharding: PlacementList = [Shard(dim), Replicate(), Shard(dim)]
+    single_mesh_dim_strategies.append(index_sharding)
+
+    if len(input_shape) == len(index_shape):
+        for d in range(len(input_shape)):
+            if d != dim:
+                sharding: PlacementList = [Shard(d), Shard(d), Shard(d)]
+                single_mesh_dim_strategies.append(sharding)
+
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_mesh_dim_strategies, input_index=1
+    )
+
+
+def _derive_follow_placements_from_tuple_strategy(
+    op: torch._ops.OpOverload,
+    tuple_strategy: TupleStrategy,
+) -> Sequence[Placement]:
+    """
+    derive the placements to follow from the tuple strategy, mainly used by
+    aten.stack, aten.cat, where each operand have the same shape, and correspondingly
+    expecting the same sharding
+    """
+
+    def merge_placement(
+        cur_placement: Placement, new_placement: Placement
+    ) -> Placement:
+        # semantic if we already have a follow placement, we
+        # check each placement for the current arg placement
+        # to see if we want to merge/adjust the placement to follow
+        # the priority: Partial -> Shard -> Replicate
+        if cur_placement == new_placement:
+            return cur_placement
+
+        if cur_placement.is_partial():
+            if new_placement.is_shard():
+                # follow new placement
+                return new_placement
+            elif new_placement.is_partial():
+                # different partial types, we can't merge and have to replicate all here
+                return Replicate()
+            else:
+                # follow partial
+                return cur_placement
+        elif cur_placement.is_shard():
+            if new_placement.is_shard():
+                # cur/new placement are different sharding (i.e. different shard dim)
+                # currently fallback to replicate all args
+                return Replicate()
+            else:
+                # for partial/replicate, follow the current shard placement
+                return cur_placement
+        else:
+            # current replicate, just follow new placement
+            return new_placement
+
+    follow_placements: list[Placement] | None = None
+    mesh = tuple_strategy.child_mesh(0)
+    for arg_strategy in tuple_strategy.children:
+        if not isinstance(arg_strategy, OpStrategy):
+            raise AssertionError(f"Expected OpStrategy, got {type(arg_strategy)}")
+        if arg_strategy.mesh != mesh:
+            raise ValueError(
+                f"All operands in {op} must have the same mesh, "
+                f"but got {arg_strategy.mesh} and {mesh}."
+            )
+
+        for placement_strategy in arg_strategy.strategies:
+            arg_placements = placement_strategy.output_spec.placements
+            if follow_placements is None:
+                follow_placements = list(arg_placements)
+                continue
+            if follow_placements is None:
+                raise AssertionError(
+                    "follow_placements should not be None at this point"
+                )
+            for mesh_idx in range(mesh.ndim):
+                # merge placements with the priority
+                follow_placements[mesh_idx] = merge_placement(
+                    follow_placements[mesh_idx], arg_placements[mesh_idx]
+                )
+    if follow_placements is None:
+        raise AssertionError("follow placements should not be None!")
+    return follow_placements
+
+
+@register_op_strategy(aten.stack.default, RuntimeSchemaInfo(1, needs_pytree=True))
+def stack_strategy(op_schema: OpSchema) -> StrategyType:
+    args_schema = op_schema.args_schema
+    input_tuple_strategy = args_schema[0]
+    if not isinstance(input_tuple_strategy, TupleStrategy):
+        raise AssertionError(f"Expected TupleStrategy, got {input_tuple_strategy}")
+    input_strategies: list[OpStrategy] = []
+    for child in input_tuple_strategy.children:
+        assert isinstance(child, OpStrategy), f"Expected OpStrategy, got {child}"
+        input_strategies.append(child)
+    first_input_strategy = input_strategies[0]
+    common_input_ndim = first_input_strategy.ndim
+    dim = cast(int, args_schema[1]) if len(args_schema) > 1 else 0
+    # normalize the dim to be within the common input ndim
+    dim = normalize_dim(dim, common_input_ndim)
+
+    mesh = first_input_strategy.mesh
+
+    follow_placements = _derive_follow_placements_from_tuple_strategy(
+        op_schema.op, input_tuple_strategy
+    )
+
+    # create op strategy base on the follow placements
+    op_strategy = OpStrategy([])
+
+    input_specs = tuple(
+        DTensorSpec(mesh, tuple(follow_placements))
+        for _ in range(len(input_tuple_strategy.children))
+    )
+
+    # stack op would "insert" new dim, so all sharded dim >= the inserted dim need to
+    # be normalized with the new Shard placement
+    follow_placements = shift_shard_dims_after_insert(follow_placements, dim)
+    output_spec = DTensorSpec(mesh, tuple(follow_placements))
+    redistribute_cost = [
+        generate_redistribute_costs(input_strategies[i], input_specs[i])
+        for i in range(len(input_specs))
+    ]
+    op_strategy.strategies.append(
+        OpSpec(
+            output_specs=output_spec,
+            input_specs=input_specs,
+            redistribute_cost=redistribute_cost,
+        )
+    )
+    return op_strategy
+
+
+@register_op_strategy(aten.cat.default, RuntimeSchemaInfo(1, needs_pytree=True))
+def cat_strategy(op_schema: OpSchema) -> StrategyType:
+    args_schema = op_schema.args_schema
+    input_tuple_strategy = args_schema[0]
+    if not isinstance(input_tuple_strategy, TupleStrategy):
+        raise AssertionError(f"Expected TupleStrategy, got {input_tuple_strategy}")
+    num_input_tensor = len(input_tuple_strategy.children)
+    first_input_strategy = input_tuple_strategy.children[0]
+    if not isinstance(first_input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {first_input_strategy}")
+    common_input_ndim = first_input_strategy.ndim
+    dim = cast(int, args_schema[1]) if len(args_schema) > 1 else 0
+    # normalize the dim to be within the common input ndim
+    dim = normalize_dim(dim, common_input_ndim)
+
+    mesh = first_input_strategy.mesh
+
+    op_strategy = OpStrategy([])
+    # use a set to deduplicate strategies with the same placement
+    strategies_placement_pool = set()
+    for this_strategy in input_tuple_strategy.children:
+        # check strategy of each tensor to be concatenated
+        if not isinstance(this_strategy, OpStrategy):
+            raise AssertionError(f"Expected OpStrategy, got {type(this_strategy)}")
+        if this_strategy.mesh != mesh:
+            raise AssertionError("cat op doesn't support cross mesh concatenation")
+        for op_spec in this_strategy.strategies:
+            # Check each OpSpec of the tensor, the placement in this OpSpec
+            # is used as the exemplar strategy that other tensors and output
+            # tensor should follow. We also need to deduplicate the output
+            # strategy with the same placement.
+            if not isinstance(op_spec, OpSpec):
+                raise AssertionError(f"Expected OpSpec, got {type(op_spec)}")
+            # exemplar OpSpec to follow
+            exemplar_spec = op_spec.output_spec
+            # check if the tensor is sharded on the concat dim
+            if is_tensor_dim_sharded(exemplar_spec, dim):
+                # if the tensor is sharded on the concat dim, we need to unshard it
+                # first
+                exemplar_placement = unshard_tensor_dim(exemplar_spec.placements, dim)
+            else:
+                exemplar_placement = exemplar_spec.placements
+            if exemplar_placement not in strategies_placement_pool:
+                strategies_placement_pool.add(exemplar_placement)
+                # assert isinstance(exemplar_placement, Tuple)
+                redistribute_costs = []
+                input_specs = []
+                for idx in range(num_input_tensor):
+                    # extract the strategy for the idx tensors to build the tensor_metadata and redistribute_cost
+                    that_tensor_strategy = input_tuple_strategy.children[idx]
+                    if not isinstance(that_tensor_strategy, OpStrategy):
+                        raise AssertionError(
+                            f"Expected OpStrategy, got {type(that_tensor_strategy)}"
+                        )
+                    input_spec = DTensorSpec(
+                        mesh,
+                        exemplar_placement,
+                        tensor_meta=that_tensor_strategy.strategies[
+                            0
+                        ].output_spec.tensor_meta,
+                    )
+                    input_specs.append(input_spec)
+                    redistribute_costs.append(
+                        generate_redistribute_costs(that_tensor_strategy, input_spec)
+                    )
+                op_strategy.strategies.append(
+                    OpSpec(
+                        output_specs=DTensorSpec(mesh, exemplar_placement),
+                        input_specs=tuple(input_specs),
+                        redistribute_cost=redistribute_costs,
+                    )
+                )
+    return op_strategy
+
+
+@register_prop_rule(aten.index_select.default, schema_info=RuntimeSchemaInfo(1))
+def prop_index_select(op_schema: OpSchema) -> OutputSharding:
+    values_spec, dim, indices_spec = op_schema.args_schema
+
+    if not isinstance(values_spec, DTensorSpec):
+        raise AssertionError(f"Expected DTensorSpec, got {type(values_spec)}")
+    if not isinstance(dim, int):
+        raise AssertionError(f"Expected int, got {type(dim)}")
+    if not isinstance(indices_spec, DTensorSpec):
+        raise AssertionError(f"Expected DTensorSpec, got {type(indices_spec)}")
+
+    all_indices_spec: list[DTensorSpec | None] = [
+        indices_spec if dim == i else None for i in range(values_spec.ndim)
+    ]
+
+    result = prop_index(
+        OpSchema(
+            op=op_schema.op,
+            args_schema=(values_spec, all_indices_spec),
+            kwargs_schema=op_schema.kwargs_schema,
+        )
+    )
+    if result.redistribute_schema:
+        schema_suggestion = result.redistribute_schema
+        result.redistribute_schema = OpSchema(
+            op=op_schema.op,
+            args_schema=(
+                schema_suggestion.args_schema[0],
+                dim,
+                schema_suggestion.args_schema[1][dim],  # type: ignore[index]
+            ),
+            kwargs_schema=op_schema.kwargs_schema,
+        )
+    return result
+
+
+@register_op_strategy(
+    [
+        aten.index_put.default,
+        aten._index_put_impl_.default,
+    ],
+    schema_info=RuntimeSchemaInfo(needs_pytree=True),
+)
+def prop_index_put(op_schema: OpSchema) -> StrategyType:
+    # We have 3 DTensor spec from argument `in`, `indices` and `values`
+    # accordingly.
+    in_spec, indices_spec, values_spec, *_ = op_schema.args_schema
+    if not isinstance(in_spec, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(in_spec)}")
+    # `indices`` is a tuple of scalar LongTensor, so we use TupleStrategy.
+    if not isinstance(indices_spec, TupleStrategy):
+        raise AssertionError(f"Expected TupleStrategy, got {type(indices_spec)}")
+    if not isinstance(values_spec, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(values_spec)}")
+    mesh = values_spec.mesh
+    op_strategy = OpStrategy([])
+    # 1. `indices` should all be replicated first.
+    indices_redistribute_costs = []
+    new_indices_spec: list[DTensorSpec | None] = []
+    for indices_spec_child in indices_spec.children:
+        if not isinstance(indices_spec_child, OpStrategy):
+            raise AssertionError(f"Expected OpStrategy, got {type(indices_spec_child)}")
+
+        replicated_spec = DTensorSpec(
+            mesh=mesh,
+            placements=tuple([Replicate()] * mesh.ndim),
+            tensor_meta=indices_spec_child.strategies[0].output_spec.tensor_meta,
+        )
+        new_indices_spec.append(replicated_spec)
+        child_costs = generate_redistribute_costs(indices_spec_child, replicated_spec)
+        indices_redistribute_costs.append(child_costs)
+
+    # 2. For placement rule of `values` and `in`, assume `values` shape =
+    # [a,b,c,d,e,f], `in` shape = [d,e,f]. Then `values`'s a,b,c (selected dim)
+    # must be replicated and d,e,f (nonselected dim) in both `values` and `in`
+    # should follow the same sharding (replicate or shard, but not partial).
+    size_offset = (
+        in_spec.strategies[0].output_spec.ndim
+        - values_spec.strategies[0].output_spec.ndim
+    )
+    # We can either let `values` follow `in`'s placements or reverse.
+    for exemplar_spec in [in_spec, values_spec]:
+        # use exemplar_spec as the target spec
+        for strategy in exemplar_spec.strategies:
+            in_spec_new_placements: list[Placement] = []
+            values_spec_new_placements: list[Placement] = []
+            placements = strategy.output_spec.placements
+            for placement in placements:
+                if placement.is_shard():
+                    if not isinstance(placement, Shard):
+                        raise AssertionError(f"Expected Shard, got {type(placement)}")
+                    if exemplar_spec is in_spec:
+                        # let `values_spce` follow `in_spec`
+                        if placement.dim < size_offset:
+                            # sharded on selected dim, need to change to replicate
+                            in_spec_new_placements.append(Replicate())
+                            values_spec_new_placements.append(Replicate())
+                        else:
+                            in_spec_new_placements.append(placement)
+                            values_spec_new_placements.append(
+                                Shard(placement.dim - size_offset)
+                            )
+                    else:
+                        # let `in_spec` follow `values_spec`
+                        in_spec_new_placements.append(
+                            Shard(placement.dim + size_offset)
+                        )
+                        values_spec_new_placements.append(placement)
+                else:
+                    in_spec_new_placements.append(Replicate())
+                    values_spec_new_placements.append(Replicate())
+            new_in_spec = DTensorSpec(
+                mesh=mesh,
+                placements=tuple(in_spec_new_placements),
+                tensor_meta=in_spec.strategies[0].output_spec.tensor_meta,
+            )
+            new_values_spec = DTensorSpec(
+                mesh=mesh,
+                placements=tuple(values_spec_new_placements),
+                tensor_meta=values_spec.strategies[0].output_spec.tensor_meta,
+            )
+            output_spec = DTensorSpec(
+                mesh=mesh,
+                placements=tuple(in_spec_new_placements),
+                tensor_meta=in_spec.strategies[0].output_spec.tensor_meta,
+            )
+            cost_in_spec = generate_redistribute_costs(in_spec, new_in_spec)
+            cost_values_spec = generate_redistribute_costs(values_spec, new_values_spec)
+            op_strategy.strategies.append(
+                OpSpec(
+                    input_specs=(
+                        new_in_spec,
+                        *new_indices_spec,  # type: ignore[arg-type]
+                        new_values_spec,
+                    ),
+                    output_specs=output_spec,
+                    redistribute_cost=[
+                        cost_in_spec,
+                        *indices_redistribute_costs,
+                        cost_values_spec,
+                    ],
+                )
+            )
+    return op_strategy
+
+
+@register_prop_rule(aten.index.Tensor, schema_info=RuntimeSchemaInfo(needs_pytree=True))
+def prop_index(op_schema: OpSchema) -> OutputSharding:
+    """
+    Expect replicated on the first input; _mostly_ pointwise on the second input.
+
+    TODO: exception: when the dtype of second input is "bool", then a torch.nonzero needs to be triggered first.
+    """
+    # Current sharding constraints:
+    # For values:
+    #   1. We currently require that the dimension of values_spec be replicated or partial
+    #      if they are being indexed on.
+    #   2. Other dimensions of values_spec can remain sharded if they are so.
+    # For indices:
+    #   Indices can be either sharded or replicated. All index tensors need to be sharded
+    #   in a compatible way, following the pointwise rule (including resolving Partial
+    #   into either sharded or replicated)
+
+    values_spec, multi_indices_spec = op_schema.args_schema
+    if not isinstance(values_spec, DTensorSpec):
+        raise AssertionError(f"Expected DTensorSpec, got {type(values_spec)}")
+    if not isinstance(multi_indices_spec, list):
+        raise AssertionError(f"Expected list, got {type(multi_indices_spec)}")
+    multi_indices_spec = cast(list[DTensorSpec | None], multi_indices_spec)
+    valid_indices_spec: list[tuple[int, DTensorSpec]] = [
+        (i, a) for i, a in enumerate(multi_indices_spec) if a is not None
+    ]
+
+    # 1. All indices have to be sharded equally. Moreover, indices can be broadcast.
+    #    Here, we piggyback on the pointwise sharding rule for indices.
+    indices_out = pointwise_rule(
+        OpSchema(
+            op=op_schema.op,
+            args_schema=tuple(v[1] for v in valid_indices_spec),
+            kwargs_schema={},
+        )
+    )
+    need_reshard_on_indices = indices_out.output_spec is None
+
+    if not need_reshard_on_indices:
+        # this means that our inputs are already sharded properly and we will use that as our indices_spec
+        if not isinstance(indices_out.output_spec, DTensorSpec):
+            raise AssertionError(
+                f"Expected DTensorSpec, got {type(indices_out.output_spec)}"
+            )
+        indices_spec: DTensorSpec = indices_out.output_spec
+    else:
+        if indices_out.redistribute_schema is None:
+            raise AssertionError("redistribute_schema should not be None")
+        valid_indices_suggestion = indices_out.redistribute_schema
+        for i, v in enumerate(valid_indices_suggestion.args_spec):
+            multi_indices_spec[valid_indices_spec[i][0]] = v
+        # we'll need to call pointwise_rule again to see what's our ideal indices_spec and then
+        # use that to compute our ideal values_spec
+        indices_output_spec = pointwise_rule(valid_indices_suggestion).output_spec
+        if not isinstance(indices_output_spec, DTensorSpec):
+            raise AssertionError(
+                f"Expected DTensorSpec, got {type(indices_output_spec)}"
+            )
+        indices_spec = indices_output_spec
+
+    lookup_dims = {v[0] for v in valid_indices_spec}
+
+    need_reshard_on_values = tuple(
+        (isinstance(vp, Shard) and (vp.dim in lookup_dims or isinstance(ip, Shard)))
+        for vp, ip in zip(values_spec.placements, indices_spec.placements)
+    )
+
+    if not need_reshard_on_indices and not any(need_reshard_on_values):
+        value_placements = values_spec.placements
+
+        all_dims_consecutive = all(
+            b[0] - a[0] == 1
+            for b, a in zip(valid_indices_spec[1:], valid_indices_spec[:-1])
+        )
+        if all_dims_consecutive:
+            # if all index vectors are consecutives, insert at the dimension of the first index
+            insert_dim: int = valid_indices_spec[0][0]
+        else:
+            # else, insert on the first dimension
+            insert_dim = 0
+
+        def place(vp: Placement, ip: Placement) -> Placement:
+            if isinstance(vp, Shard):
+                return Shard(
+                    vp.dim
+                    if vp.dim < insert_dim
+                    # accounts for the offset in output dimensions
+                    else vp.dim
+                    + indices_spec.ndim
+                    - sum(1 if vp.dim > v[0] else 0 for v in valid_indices_spec)
+                )
+            if isinstance(ip, Shard):
+                return Shard(ip.dim + insert_dim)
+            # Partial or Replicated
+            return vp
+
+        value_placements = tuple(
+            place(vp, ip)
+            for vp, ip in zip(values_spec.placements, indices_spec.placements)
+        )
+        result = OutputSharding(
+            output_spec=DTensorSpec(
+                mesh=values_spec.mesh,
+                placements=value_placements,
+            )
+        )
+        return result
+    else:
+        result = OutputSharding(
+            output_spec=None,
+            redistribute_schema=OpSchema(
+                op=op_schema.op,
+                args_schema=(
+                    DTensorSpec(
+                        mesh=values_spec.mesh,
+                        placements=tuple(
+                            Replicate() if need_reshard_on_values[i] else v
+                            for i, v in enumerate(values_spec.placements)
+                        ),
+                        tensor_meta=values_spec.tensor_meta,
+                    ),
+                    multi_indices_spec,
+                ),
+                kwargs_schema=op_schema.kwargs_schema,
+            ),
+        )
+        return result
+
+
+@register_op_strategy(
+    [
+        aten.split.Tensor,
+        aten.split_with_sizes.default,
+        aten.split_with_sizes_copy.default,
+    ],
+    RuntimeSchemaInfo(1),
+)
+def split_strategy(op_schema: OpSchema) -> OpStrategy:
+    input_strategy = op_schema.args_schema[0]
+    split_size_or_sections = op_schema.args_schema[1]
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+    input_ndim = input_strategy.ndim
+    split_dim = (
+        cast(int, op_schema.args_schema[2]) if len(op_schema.args_schema) > 2 else 0
+    )
+    dim = normalize_dim(split_dim, input_ndim)
+
+    def size_split(N, i) -> list:
+        # Last chunk will be smaller if the tensor size N
+        # along the given dimension dim is not divisible by i.
+        if not i > 0:
+            raise AssertionError(f"Split size must be positive, got {i}")
+        return [i] * (N // i) + ([N % i] if N % i != 0 else [])
+
+    output_size_list = (
+        size_split(input_strategy.shape[dim], split_size_or_sections)
+        if isinstance(split_size_or_sections, int)
+        else split_size_or_sections
+    )
+    if not isinstance(output_size_list, Sized):
+        raise AssertionError(f"Expected Sized, got {type(output_size_list)}")
+
+    all_strategies = []
+    for strategy in input_strategy.strategies:
+        spec = strategy.output_spec
+        placements = spec.placements
+        if is_tensor_dim_sharded(spec, dim=dim):
+            # if the input is sharded on the split dim, we need to unshard it
+            placements = unshard_tensor_dim(spec.placements, dim=dim)
+
+        input_spec = DTensorSpec(spec.device_mesh, placements, spec.tensor_meta)
+        output_specs = tuple(
+            DTensorSpec(spec.device_mesh, placements)
+            for _ in range(len(output_size_list))
+        )
+        all_strategies.append(
+            OpSpec(
+                output_specs=output_specs,
+                input_specs=(input_spec,),
+                redistribute_cost=[
+                    generate_redistribute_costs(input_strategy, input_spec)
+                ],
+            )
+        )
+
+    return OpStrategy(all_strategies)
+
+
+# TODO: fix remaining failures in xfail("unbind") in test_dtensor_ops.py
+#       and remove this xfail item
+@register_op_strategy(aten.unbind.int, schema_info=RuntimeSchemaInfo(1))
+def gen_unbind_strategy(op_schema: OpSchema) -> StrategyType:
+    """Forward all shardings except the unbind dimension."""
+    input_strategy = op_schema.args_schema[0]
+    if not isinstance(input_strategy, OpStrategy):
+        raise AssertionError(f"Expected OpStrategy, got {type(input_strategy)}")
+    input_ndim = input_strategy.ndim
+    input_shape = input_strategy.shape
+    unbind_dim = (
+        cast(int, op_schema.args_schema[1]) if len(op_schema.args_schema) > 1 else 0
+    )
+    unbind_dim = normalize_dim(unbind_dim, input_ndim)
+
+    mesh = input_strategy.mesh
+    unbind_strategy = OpStrategy([])
+    for arg_strategy in input_strategy.strategies:
+        arg_spec = arg_strategy.output_spec
+        if is_tensor_dim_sharded(arg_spec, dim=unbind_dim):
+            raise RuntimeError(
+                f"Attempted to unbind along the sharded dimension {unbind_dim}. ",
+                "It cannot be performed without redistribution, which is disallowed "
+                "by the current operator.",
+            )
+        # only add the strategy if the unbind dim is not sharded
+        output_placements = shift_shard_dims_after_remove(
+            arg_spec.placements, unbind_dim
+        )
+        output_specs = tuple(
+            DTensorSpec(mesh, tuple(output_placements))
+            for _ in range(input_shape[unbind_dim])
+        )
+        unbind_strategy.strategies.append(
+            OpSpec(
+                output_specs=output_specs,
+                input_specs=(arg_spec,),
+                redistribute_cost=[[0.0] * len(input_strategy.strategies)],
+            )
+        )
+    return unbind_strategy
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_view_ops.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_view_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee157aa26df4bc3e3a76052ae0b66255b12f7617
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/_view_ops.py
@@ -0,0 +1,798 @@
+# mypy: allow-untyped-defs
+# Copyright (c) Meta Platforms, Inc. and affiliates
+from collections.abc import Callable, Iterable, Sequence
+from dataclasses import dataclass
+from typing import cast, Optional
+
+import torch
+from torch import Tensor
+from torch._prims_common import DimsType
+from torch.distributed.tensor._dtensor_spec import DTensorSpec
+from torch.distributed.tensor._op_schema import (
+    OpSchema,
+    OpSpec,
+    OpStrategy,
+    RuntimeSchemaInfo,
+    StrategyType,
+)
+from torch.distributed.tensor._ops.registration import register_op_strategy
+from torch.distributed.tensor._ops.utils import (
+    generate_redistribute_costs,
+    normalize_dim,
+    normalize_dims,
+    prod,
+)
+from torch.distributed.tensor.placement_types import (
+    _StridedShard,
+    Placement,
+    Replicate,
+    Shard,
+)
+
+
+aten = torch.ops.aten
+
+Shape = tuple[int, ...]
+
+
+@dataclass
+class DimSpec:
+    """Specifies how an output dimension maps to an input dimension."""
+
+    def inputs(self) -> Iterable["DimSpec"]:
+        return ()
+
+
+# Rules that map each dimension of the output to dimensions of the input tensor
+DimMap = tuple[DimSpec, ...]
+
+
+@dataclass
+class Singleton(DimSpec):
+    """Output dimension is a singleton."""
+
+
+@dataclass
+class InputDim(DimSpec):
+    """Output dimension maps directly to an input dimension."""
+
+    input_dim: int
+
+
+@dataclass
+class Broadcast(DimSpec):
+    """Output is the broadcast of a singleton input dimension."""
+
+    dim: DimSpec
+    dim_size: int
+
+    @classmethod
+    def new(cls, dim: DimSpec, dim_size: int) -> DimSpec:
+        return Broadcast(dim, dim_size)
+
+    def inputs(self) -> Iterable[DimSpec]:
+        return (self.dim,)
+
+
+@dataclass
+class NewDim(DimSpec):
+    """This is a new dimension created by the op."""
+
+    size: int
+
+    @classmethod
+    def new(cls, size: int) -> DimSpec:
+        return Singleton() if size == 1 else NewDim(size)
+
+
+@dataclass
+class Repeat(DimSpec):
+    """Output dimension is the input dimension repeated n-times."""
+
+    input_dim: DimSpec
+    times: int
+
+    @classmethod
+    def new(cls, dim: DimSpec, times: int) -> DimSpec:
+        if times == 1:
+            return dim
+        elif isinstance(dim, Singleton):
+            # repeating a singleton is the same as broadcasting it
+            return Broadcast(dim, times)
+        else:
+            return Repeat(dim, times)
+
+    def inputs(self) -> Iterable[DimSpec]:
+        return (self.input_dim,)
+
+
+@dataclass
+class Flatten(DimSpec):
+    """Flatten a set of input dimensions, ensuring right-most adjacent elements remain adjacent in the output."""
+
+    input_dims: Sequence[DimSpec]
+
+    @classmethod
+    def new(cls, dims: Sequence[DimSpec]) -> DimSpec:
+        if len(dims) == 0:
+            # flattening a scalar leads to a singleton
+            return Singleton()
+        elif len(dims) == 1:
+            # flattening a single dimension is no-op
+            return dims[0]
+        else:
+            return Flatten(dims)
+
+    def inputs(self) -> Iterable[DimSpec]:
+        return self.input_dims
+
+
+@dataclass
+class Split(DimSpec):
+    """
+    This dimension is a member of a decomposition of the input dim.
+
+    Note that input_dim itself could be a Flattened set of input dims.
+    """
+
+    input_dim: DimSpec
+    group_shape: Shape
+    split_id: int
+
+    @classmethod
+    def new(cls, dim: DimSpec, group_shape: tuple[int, ...], idx: int) -> DimSpec:
+        if not len(group_shape) > 0:
+            raise AssertionError(
+                f"Expected group_shape length > 0, got {len(group_shape)}"
+            )
+        if len(group_shape) == 1:
+            # not really a group, just return the input dim back
+            if not idx == 0:
+                raise AssertionError(f"Expected idx == 0, got {idx}")
+            return dim
+        elif group_shape[idx] == 1:
+            return Singleton()
+        else:
+            # remove singletons from group
+            # group_mapping = [(new_index, (shape, old_index)) ...]
+            group_mapping = list(
+                enumerate((s, i) for i, s in enumerate(group_shape) if s != 1)
+            )
+            new_group_shape = tuple(m[1][0] for m in group_mapping)
+            new_idx = next(filter(lambda x: x[1][1] == idx, group_mapping))[0]
+            return Split(dim, new_group_shape, new_idx)
+
+    def inputs(self) -> Iterable[DimSpec]:
+        return (self.input_dim,)
+
+
+def dim_pad_left(ndim: int, min_dims: int) -> DimMap:
+    return (Singleton(),) * max(0, min_dims - ndim) + tuple(
+        InputDim(i) for i in range(ndim)
+    )
+
+
+def dim_atleast_3d(ndim: int) -> DimMap:
+    if ndim == 0:
+        return (Singleton(), Singleton(), Singleton())
+    elif ndim == 1:
+        return (Singleton(), InputDim(0), Singleton())
+    elif ndim == 2:
+        return (InputDim(0), InputDim(1), Singleton())
+    else:
+        return tuple(InputDim(i) for i in range(ndim))
+
+
+def expand(input_shape: Shape, shape: Shape) -> DimMap:
+    """Implement broadcast on multiple dimensions."""
+    if not len(shape) >= len(input_shape):
+        raise AssertionError(
+            f"Expected len(shape) >= len(input_shape), got {len(shape)} < {len(input_shape)}"
+        )
+
+    # 1. create padded input dimensions
+    padded_input = dim_pad_left(len(input_shape), len(shape))
+    # 2. check that input shapes are compatible
+    mapping = []
+    for p, desired_s in zip(padded_input, shape):
+        if isinstance(p, Singleton):
+            actual_s = 1
+            if not desired_s >= 0:
+                raise AssertionError(f"Expected desired_s >= 0, got {desired_s}")
+        else:
+            if not isinstance(p, InputDim):
+                raise AssertionError(f"DimSpec not supported in expand: {p}")
+            actual_s = input_shape[p.input_dim]
+            if not (actual_s == 1 or desired_s == -1 or desired_s == actual_s):
+                raise AssertionError(
+                    f"Expected actual_s == 1 or desired_s == -1 or "
+                    f"desired_s == actual_s, got actual_s={actual_s}, desired_s={desired_s}"
+                )
+        mapping.append(
+            p
+            if desired_s in (1, -1) or desired_s == actual_s
+            else Broadcast.new(p, desired_s)
+        )
+    return tuple(mapping)
+
+
+def normalize_sizes(sizes: Shape | tuple[Shape]) -> Shape:
+    if isinstance(sizes[0], int):
+        return cast(Shape, sizes)
+    elif len(sizes) == 1:
+        return sizes[0]
+    else:
+        raise RuntimeError("Size must be int... or tuple")
+
+
+def dim_flatten(ndim: int, start_dim=0, end_dim=-1) -> DimMap:
+    if ndim == 0:
+        return (Singleton(),)
+    elif ndim == 1:
+        return (InputDim(0),)
+    else:
+        # only flattening dims from start_dim to end_dim (inclusive)
+        # other dims are passed through
+        if end_dim < 0:
+            end_dim += ndim
+        results: list[DimSpec] = [InputDim(i) for i in range(start_dim)]
+        results.append(
+            Flatten.new(tuple(InputDim(i) for i in range(start_dim, end_dim + 1)))
+        )
+        results.extend([InputDim(i) for i in range(end_dim + 1, ndim)])
+        return tuple(results)
+
+
+def dim_movedim(
+    ndim: int,
+    input: DimsType,
+    destination: DimsType,
+) -> DimMap:
+    input = normalize_dims(input, ndim)
+    destination = normalize_dims(destination, ndim)
+
+    if not len(input) == len(destination):
+        raise AssertionError(
+            f"Expected len(input) == len(destination), got {len(input)} != {len(destination)}"
+        )
+    input_set = set(input)
+    if not len(input_set) == len(input):
+        raise AssertionError("Found repeated input dims")
+    if not len(set(destination)) == len(destination):
+        raise AssertionError("Found repeated output dims")
+    if not max(input) < ndim:
+        raise AssertionError(f"Expected max(input) < ndim, got {max(input)} >= {ndim}")
+    if not max(destination) < ndim:
+        raise AssertionError(
+            f"Expected max(destination) < ndim, got {max(destination)} >= {ndim}"
+        )
+
+    dest = [-1] * ndim
+    for i, d in zip(input, destination):
+        dest[d] = i
+
+    unused_inputs_iter = iter(i for i in range(ndim) if i not in input_set)
+    for i in range(ndim):
+        if dest[i] == -1:
+            dest[i] = next(unused_inputs_iter)
+
+    return tuple(InputDim(i) for i in dest)
+
+
+def dim_repeat(ndim: int, sizes: Shape) -> DimMap:
+    sizes = normalize_sizes(sizes)
+    if not len(sizes) >= ndim:
+        raise AssertionError(
+            f"Number of dimensions of repeat dims {sizes} can not be smaller than number of dimensions of tensor {ndim}."
+        )
+    pad = len(sizes) - ndim
+    return tuple(Repeat.new(Singleton(), s) for s in sizes[:pad]) + tuple(
+        Repeat.new(InputDim(i), s) for i, s in enumerate(sizes[pad:])
+    )
+
+
+def infer_size(total_size: int, sizes: Shape) -> Shape:
+    """
+    One dimension input to view may be "-1".
+
+    Infer the size of this dimension given the total_size.
+    """
+    infers = [i for i, s in enumerate(sizes) if s == -1]
+    size = prod(sizes)
+    if not len(infers) <= 1:
+        raise AssertionError("can only infer one size")
+    if infers:
+        size = -size
+        missing_size = total_size // size
+        if not total_size % size == 0:
+            raise AssertionError(
+                f"size inferred for -1 is not integral {sizes} should have {total_size} elements."
+            )
+        return tuple(s if s != -1 else missing_size for s in sizes)
+    if not size == total_size:
+        raise AssertionError(f"sizes do not match {total_size} vs {size}")
+    return sizes
+
+
+def view_groups(from_size: Shape, to_size: Shape) -> DimMap:
+    """
+    Decompose a reshape operation into forwarding, flattening, or splitting dimensions for each output dimension.
+
+    A view or reshape operation can be decomposed into a set of 3 types of smaller operations:
+    1) Forward a dimension from input to output
+    2) Flatten a set of dimensions into a single dimension
+    3) Split one dimension into multiple dimensions
+
+    view_groups identifies these operations and returns, for each output dimension, what
+    is operation was performed in the input dimension. For example:
+
+        view_groups([2, 3, 4], [2, 12]) -> (
+            InputDim(0),
+            Flatten((InputDim(1), InputDim(2)))
+        )
+
+    - output dimension 0 maps to input dimension 0
+    - output dimension 1 maps to a flattened input dimensions 1 and 2
+
+
+        view_groups([2, 3], [3, 2]) -> (
+            Split(Flatten((InputDim(0), InputDim(1))), (3, 2), 0),
+            Split(Flatten((InputDim(0), InputDim(1))), (3, 2), 1),
+        )
+
+    - in the above, input is flattened into a single dimension and then split
+      into two separate dimensions with different sizes from the input.
+    """
+    from_nelem = prod(from_size)
+    to_size = infer_size(from_nelem, normalize_sizes(to_size))
+
+    if not from_nelem == prod(to_size):
+        raise AssertionError("Total view shape does not add up")
+
+    from_idx = 0
+    to_idx = 0
+    from_len = len(from_size)
+    to_len = len(to_size)
+
+    result_pp = []
+
+    while from_idx < from_len or to_idx < to_len:
+        from_group_dim, to_group_shape = [], []
+
+        if from_idx >= from_len:
+            f = 1
+        else:
+            f = from_size[from_idx]
+            from_group_dim.append(from_idx)
+            from_idx += 1
+
+        if to_idx >= to_len:
+            t = 1
+        else:
+            t = to_size[to_idx]
+            to_group_shape.append(t)
+            to_idx += 1
+
+        # if any of the groups is singleton, great, we need to backtrack though
+        if f == 1 and t != 1:
+            # produces ([1], [])
+            to_idx -= 1
+            to_group_shape = []
+        elif f != 1 and t == 1:
+            # produces ([], [1])
+            from_idx -= 1
+            from_group_dim = []
+        else:
+            # produces ([1], [1]),  ([2], [2]), ([2,3], [6])
+            while f != t:
+                if f < t:
+                    nf = from_size[from_idx]
+                    from_group_dim.append(from_idx)
+                    from_idx += 1
+                    f *= nf
+                else:
+                    nt = to_size[to_idx]
+                    to_group_shape.append(nt)
+                    to_idx += 1
+                    t *= nt
+
+        if len(to_group_shape) > 0:
+            flattened = Flatten.new(
+                tuple(InputDim(fi) for fi in from_group_dim if from_size[fi] >= 1)
+            )
+            result_pp += [
+                Split.new(flattened, tuple(to_group_shape), i)
+                for i in range(len(to_group_shape))
+            ]
+
+    return tuple(result_pp)
+
+
+def dim_tile(ndim: int, dims: tuple[int, ...]) -> DimMap:
+    if len(dims) < ndim:
+        dims = (1,) * (ndim - len(dims)) + dims
+    return dim_repeat(ndim, dims)
+
+
+def dim_transpose(ndim: int, dim1: int, dim2: int) -> DimMap:
+    dim1 = normalize_dim(dim1, ndim)
+    dim2 = normalize_dim(dim2, ndim)
+    if not dim1 < ndim:
+        raise AssertionError(f"Expected dim1 < ndim, got {dim1} >= {ndim}")
+    if not dim2 < ndim:
+        raise AssertionError(f"Expected dim2 < ndim, got {dim2} >= {ndim}")
+    dimmap = [InputDim(i) for i in range(ndim)]
+    swapdim = dimmap[dim1]
+    dimmap[dim1] = dimmap[dim2]
+    dimmap[dim2] = swapdim
+    return tuple(dimmap)
+
+
+def dim_squeeze(shape: Shape, dim: int | None = None) -> DimMap:
+    # FIXME: this is wrong when dim=None and one of the dimensions
+    # equals size of the mesh. For example squeeze(DTensor(tensor(4), Shard[0])) could
+    # end up as squeeze(tensor(1)) if we have 4 devices; this would lead to
+    # removal of a dimension that is not actually a singleton.
+    return tuple(
+        InputDim(i)
+        for i, s in enumerate(shape)
+        if s > 1 or (dim is not None and i != normalize_dim(dim, len(shape)))
+    )
+
+
+def dim_unsqueeze(ndim: int, dim: int) -> DimMap:
+    dims = tuple(InputDim(i) for i in range(ndim))
+    if dim < 0:
+        dim += ndim + 1
+    return dims[:dim] + (Singleton(),) + dims[dim:]
+
+
+def dim_view_as_real(shape: Shape) -> DimMap:
+    ndim = len(shape)
+    results: list[DimSpec] = [InputDim(i) for i in range(ndim - 1)]
+    # each complex number is split into two real numbers,
+    # resulting in one more dimension of size 2
+    results.append(Split(InputDim(ndim - 1), (shape[-1], 2), 0))
+    results.append(Split(InputDim(ndim - 1), (shape[-1], 2), 1))
+    return tuple(results)
+
+
+def dim_reduction(ndim: int, dim_or_dims: DimsType | None, keepdim: bool) -> DimMap:
+    """
+    General fallback for reduction ops where Partial() does not apply.
+
+    This will cause incoming tensor to be replicated on the reducing dimensions.
+    """
+    if dim_or_dims is None:
+        dim_or_dims = tuple(range(ndim))
+    if isinstance(dim_or_dims, int):
+        dim_or_dims = (dim_or_dims,)
+    dim_or_dims = tuple(d if d >= 0 else d + ndim for d in dim_or_dims)
+    return tuple(
+        InputDim(i) if i not in dim_or_dims else Singleton()
+        for i in range(ndim)
+        if i not in dim_or_dims or keepdim
+    )
+
+
+dim_maps: dict[Callable[..., torch.Tensor], Callable[..., DimMap]] = {
+    torch.atleast_1d: lambda x: dim_pad_left(x.ndim, 1),
+    torch.atleast_2d: lambda x: dim_pad_left(x.ndim, 2),
+    torch.atleast_3d: lambda x: dim_atleast_3d(x.ndim),
+    torch.broadcast_to: lambda input, shape: expand(input.shape, shape),
+    Tensor.expand: lambda self, *sizes: expand(self.shape, normalize_sizes(sizes)),
+    torch.flatten: lambda tensor: dim_flatten(tensor.ndim),
+    torch.movedim: lambda input, source, destination: dim_movedim(
+        input.ndim, source, destination
+    ),
+    torch.permute: lambda input, dims: tuple(
+        InputDim(i) for i in normalize_dims(dims, input.ndim)
+    ),
+    torch.ravel: lambda tensor: dim_flatten(tensor.ndim),
+    Tensor.repeat: lambda self, *sizes: dim_repeat(self.ndim, sizes),
+    torch.reshape: lambda input, shape: view_groups(input.shape, shape),
+    torch.squeeze: lambda input, dim=None: dim_squeeze(input.shape, dim),
+    torch.tile: lambda input, dims: dim_tile(input.ndim, dims),
+    torch.transpose: lambda input, dim0, dim1: dim_transpose(input.ndim, dim0, dim1),
+    torch.unsqueeze: lambda input, dim: dim_unsqueeze(input.ndim, dim),
+    Tensor.view: lambda input, *shape: view_groups(input.shape, shape),
+    torch.view_as_complex: lambda input: dim_flatten(input.ndim, input.ndim - 2),
+    torch.view_as_real: lambda input: dim_view_as_real(input.shape),
+}
+
+
+def propagate_shape_and_sharding(
+    input_src_placements: Sequence[Placement],
+    global_input_shape: Shape,
+    rule: DimMap,
+    mesh_sizes: Shape,
+    strict_view: bool = False,
+) -> tuple[Sequence[Placement], Sequence[Placement]]:
+    """
+    Determine input target sharding and output sharding based on
+    given global tensor shape and input source sharding.
+
+    Sharding propagation follows mapped dimensions:
+    - An output dimension that maps directly to an input dimension is sharded equally
+    - An output dimension that is a flattened set of input dimensions can only be
+      sharded if only the leftmost flattened dimension is sharded.
+    - An output dimension that is a split of the input dimension can only be sharded
+      if the leftmost split size is divisible by the mesh dimension
+    """
+    if not len(input_src_placements) == len(mesh_sizes):
+        raise AssertionError(f"{input_src_placements} != {mesh_sizes}")
+    # for each input dim, for each mesh dim, provides a list of possible shardable dimensions
+    mesh_ndim = len(mesh_sizes)
+    shardable_dims: dict[int, list[bool]] = {}
+
+    # in case an input dimension disappears (e.g. collapsing, reduction)
+    # we cannot shard in that dimension (we need a replication fall-back rule)
+    seen_input_dims: set[int] = set()
+
+    def collect_used_inputs(cmd: DimSpec) -> None:
+        if isinstance(cmd, InputDim):
+            seen_input_dims.add(cmd.input_dim)
+        for inp in cmd.inputs():
+            collect_used_inputs(inp)
+
+    for cmd in rule:
+        collect_used_inputs(cmd)
+    for dim in range(len(global_input_shape)):
+        shardable_dims[dim] = [dim in seen_input_dims] * mesh_ndim
+
+    def maybe_get_shard_mesh_dim_and_placement(
+        input_dim: InputDim,
+    ) -> tuple[Optional[int], Optional[Shard | _StridedShard]]:
+        # if input_dim is sharded, return the mesh_dim and shard placement
+        for i, placement in enumerate(input_src_placements):
+            if (
+                isinstance(placement, Shard | _StridedShard)
+                and placement.dim == input_dim.input_dim
+            ):
+                return i, placement
+        return None, None
+
+    # NOTE: This function has three responsibilities:
+    # 1. determine "theoretically" if an output dimension can be sharded, i.e. fill the shardable_dims map
+    # 2. determine "theoretically" the corresponding input dimension to shard on, via return value
+    # 3. throw an error when strict_view is enabled and we cannot shard an output dimension
+    # 1 and 2 doesn't require the info of whether current input is sharded.
+    # 3 requires that info, to decide whether we can error out. Maybe we can refactor
+    # to make this function purely "theoretical".
+    def get_in_dim_to_shard(cmd: DimSpec) -> InputDim | None:
+        if isinstance(cmd, InputDim):
+            return cmd
+        elif isinstance(cmd, Flatten):
+            for i, dim in enumerate(cmd.input_dims):
+                # so far all Flatten is always composed of InputDims; revisit this if needed
+                if not isinstance(dim, InputDim):
+                    raise AssertionError(f"Expected InputDim, got {type(dim)}")
+                can_shard_dim = True
+                shard_mesh_dim, shard_placement = (
+                    maybe_get_shard_mesh_dim_and_placement(dim)
+                )
+                input_sharded = shard_mesh_dim is not None
+                if i > 0:
+                    can_shard_dim = False
+                    if strict_view and input_sharded:
+                        raise RuntimeError(
+                            f"Attempted to flatten multiple dimensions, with dimension {dim.input_dim} being sharded. ",
+                            "It cannot be performed without redistribution, which is disallowed by the current operator.",
+                        )
+                elif input_sharded:
+                    if not (shard_placement is not None and shard_mesh_dim is not None):
+                        raise AssertionError(
+                            "Expected shard_placement and shard_mesh_dim to be not None"
+                        )
+                    tensor_dim_size = global_input_shape[shard_placement.dim]
+                    mesh_dim_size = mesh_sizes[shard_mesh_dim]
+                    if tensor_dim_size % mesh_dim_size != 0:
+                        can_shard_dim = False
+                        if strict_view:
+                            raise RuntimeError(
+                                f"Attempted to flatten unevenly sharded dimension {i}, "
+                                "which would require resharding the input. "
+                                "Please explicitly redistribute the tensor instead."
+                            )
+                shardable_dims[dim.input_dim] = [can_shard_dim] * mesh_ndim
+
+            if not isinstance(cmd.input_dims[0], InputDim):
+                raise AssertionError(
+                    f"Expected InputDim, got {type(cmd.input_dims[0])}"
+                )
+            return cmd.input_dims[0]
+        elif isinstance(cmd, Split):
+            in_dim = get_in_dim_to_shard(cmd.input_dim)
+            out_size = cmd.group_shape[cmd.split_id]
+            if cmd.split_id == 0 and in_dim is not None:
+                # we need to check that the input dimension is divisible
+                # by the size of the submesh we're sharding it on
+                # NOTE: it would be possible to shard the same input dimension
+                # on more than one mesh dimension. In that case, the dimension
+                # needs to be divisible by the product of mesh sizes.
+                # In order to keep the problem more tractable, we will not consider
+                # double resharding as a suggestion (e.g. [Shard(0), Shard(0) ])
+                # but we will allow it if that's the input and it's compatible
+
+                # 1. is this dimension shardable on each individual mesh dim?
+                shardable_dims[in_dim.input_dim] = [
+                    out_size % mesh_dim_size == 0 for mesh_dim_size in mesh_sizes
+                ]
+
+                shard_mesh_dim, _ = maybe_get_shard_mesh_dim_and_placement(in_dim)
+                if strict_view and shard_mesh_dim is not None:
+                    if not shardable_dims[in_dim.input_dim][shard_mesh_dim]:
+                        raise RuntimeError(
+                            f"Attempted to split the sharded dimension {in_dim.input_dim} into multiple subdimensions. ",
+                            "It cannot be performed without redistribution, which is disallowed by the current operator.",
+                        )
+
+                # 2. here we special case things like [Shard(0), Shard(0)]
+                submesh_size = 1
+                for size, shard in zip(mesh_sizes, input_src_placements):
+                    if isinstance(shard, Shard | _StridedShard) and shard.dim == in_dim:
+                        submesh_size *= size
+                if not out_size % submesh_size == 0:
+                    raise AssertionError(
+                        f"Resulting dimension size {out_size} is not divisible by its mesh dimension {submesh_size}."
+                    )
+
+            # we will only shard our first component of the split
+            return in_dim if cmd.split_id == 0 else None
+        elif isinstance(cmd, Repeat):
+            in_dim = get_in_dim_to_shard(cmd.input_dim)
+            if in_dim is not None:
+                shardable_dims[in_dim.input_dim] = [False] * mesh_ndim
+            return None
+        else:
+            return None
+
+    # for each output dim, find the corresponding input dim in terms of sharding prop
+    shard_dim_map = {}
+    for dim, cmd in enumerate(rule):
+        in_dim = get_in_dim_to_shard(cmd)
+        if in_dim is not None:
+            shard_dim_map[in_dim.input_dim] = dim
+
+    input_tgt_placements = [
+        (
+            Replicate()
+            if isinstance(p, Shard | _StridedShard)
+            and not shardable_dims[p.dim][mesh_dim]
+            else p
+        )
+        for mesh_dim, p in enumerate(input_src_placements)
+    ]
+
+    def _rewrite_shard_dim(p: Shard | _StridedShard):
+        """
+        Rewrite the shard dim to the corresponding tensor dim in output.
+        For ``_StridedShard``, we can safely keep the placement type and
+        ``split_factor`` unchanged and only rewrite the ``dim`` because:
+        1. ``_StridedShard`` has no impact on sharding (i.e. how
+            tensor is partitioned) compared to ``Shard``. It only changes
+            how shards permute across the devices.
+        2. ``view()`` op on DTensor strictly forbids shard redistribution
+            which means if ``view()`` may cause shard permutation across
+            devices, it should be rejected. This is enforced in today's
+            sharding prop for ``view()``.
+        3. Since DTensor ``view()`` won't introduce any redistribution,
+            it's certain that ``placements`` won't change except the
+            inner ``dim`` attribute of ``Shard`` or ``_StridedShard``.
+        """
+        if isinstance(p, _StridedShard):
+            return _StridedShard(shard_dim_map[p.dim], split_factor=p.split_factor)
+        else:
+            return Shard(shard_dim_map[p.dim])
+
+    output_placements = [
+        _rewrite_shard_dim(p) if isinstance(p, Shard | _StridedShard) else p
+        for p in input_tgt_placements
+    ]
+
+    return input_tgt_placements, output_placements
+
+
+def register_op_strategy_map(
+    aten_op_overload: torch._ops.OpOverload,
+    local_op_name: Callable[..., torch.Tensor],
+    schema_info: RuntimeSchemaInfo | None = None,
+    strict_view: bool = False,
+) -> None:
+    """
+    Helper that registers strategies for view-like operators that follow a pattern:
+      (1) define the way input dims are split/combined to form output dims (dim_maps)
+      (2) register a strategy for the op schema that uses the dim_map as a sharding prop rule
+
+    strict_view: if True, we will error out if the view-operation would require resharding the input.
+       Currently, this should be set to 'true' for any "view" ops.
+       We could diverge behavior for "reshape" ops which could perform a redistribute implicitly.
+    """
+    dim_map: Callable[..., DimMap] = dim_maps[local_op_name]
+
+    @register_op_strategy(aten_op_overload, schema_info=schema_info)
+    def reshape_strategy(op_schema: OpSchema) -> StrategyType:
+        rules = dim_map(*op_schema.args_schema, **op_schema.kwargs_schema)
+        input_strategy = cast(OpStrategy, op_schema.args_schema[0])
+        mesh = op_schema.get_mesh_from_args(validate=False)
+
+        global_in_shape = input_strategy.shape
+        if global_in_shape is None:
+            raise AssertionError("Shape required.")
+
+        output_strategy = OpStrategy([])
+        for input_placement_strategy in input_strategy.strategies:
+            input_src_spec = input_placement_strategy.output_spec
+
+            input_tgt_placements, output_placements = propagate_shape_and_sharding(
+                input_src_spec.placements,
+                tuple(global_in_shape),
+                rules,
+                mesh.shape,
+                strict_view,
+            )
+
+            # TODO: optimize this. we shouldn't simply blindly replicate
+            #       unshardable dims ...
+            # FIXME: this can be wrong for situations where we have
+            #        [Shard(0), Shard(0)]
+            input_tgt_spec = DTensorSpec(
+                placements=tuple(input_tgt_placements),
+                mesh=mesh,
+                tensor_meta=input_src_spec.tensor_meta,
+            )
+            redistribute_costs: list[list[float]] = [
+                generate_redistribute_costs(input_strategy, input_tgt_spec)
+            ]
+
+            output_spec = DTensorSpec(mesh=mesh, placements=tuple(output_placements))
+            output_strategy.strategies.append(
+                OpSpec(
+                    output_specs=output_spec,
+                    input_specs=(input_tgt_spec,),
+                    redistribute_cost=redistribute_costs,
+                )
+            )
+
+        return output_strategy
+
+
+register_op_strategy_map(aten.squeeze.default, torch.squeeze)
+register_op_strategy_map(
+    aten.squeeze_.dim, torch.squeeze, schema_info=RuntimeSchemaInfo(1)
+)
+register_op_strategy_map(
+    aten.squeeze.dim, torch.squeeze, schema_info=RuntimeSchemaInfo(1)
+)
+register_op_strategy_map(
+    aten.view.default,
+    Tensor.view,
+    schema_info=RuntimeSchemaInfo(1),
+    strict_view=True,
+)
+register_op_strategy_map(
+    aten.reshape.default, torch.reshape, schema_info=RuntimeSchemaInfo(1)
+)
+register_op_strategy_map(
+    aten._unsafe_view.default,
+    Tensor.view,
+    schema_info=RuntimeSchemaInfo(1),
+    strict_view=True,
+)
+register_op_strategy_map(
+    aten.unsqueeze.default, torch.unsqueeze, schema_info=RuntimeSchemaInfo(1)
+)
+register_op_strategy_map(
+    aten.expand.default, Tensor.expand, schema_info=RuntimeSchemaInfo(1)
+)
+register_op_strategy_map(
+    aten.permute.default, torch.permute, schema_info=RuntimeSchemaInfo(1)
+)
+register_op_strategy_map(
+    aten.repeat.default, Tensor.repeat, schema_info=RuntimeSchemaInfo(1)
+)
+register_op_strategy_map(
+    aten.transpose.int, torch.transpose, schema_info=RuntimeSchemaInfo(1)
+)
+register_op_strategy_map(aten.view_as_complex.default, torch.view_as_complex)
+register_op_strategy_map(aten.view_as_real.default, torch.view_as_real)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2022c214298f26df41503988fa8684ab20ca3bf
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/_ops/utils.py
@@ -0,0 +1,388 @@
+# mypy: allow-untyped-defs
+# Copyright (c) Meta Platforms, Inc. and affiliates
+import functools
+import itertools
+import operator
+from collections.abc import Callable, Iterable, Sequence
+from typing import cast
+
+import torch
+from torch._prims_common import DimsSequenceType, DimsType
+from torch.distributed.tensor._collective_utils import redistribute_cost
+from torch.distributed.tensor._dtensor_spec import DTensorSpec
+from torch.distributed.tensor._op_schema import (
+    OpSchema,
+    OpSpec,
+    OpStrategy,
+    PlacementList,
+    StrategyType,
+)
+from torch.distributed.tensor.device_mesh import DeviceMesh
+from torch.distributed.tensor.placement_types import (
+    _StridedShard,
+    Partial,
+    Placement,
+    Replicate,
+    Shard,
+)
+
+
+def replicate_op_strategy(op_schema: OpSchema) -> StrategyType:
+    """
+    Fallback strategy all use Replication()
+    """
+    args_strategy = op_schema.args_strategy
+    kwargs_strategy = op_schema.kwargs_strategy
+    inputs_strategy = args_strategy + kwargs_strategy
+
+    output_type = [str(ret.type) for ret in op_schema.op._schema.returns]
+    output_len = output_type.count("Tensor")
+    # TODO(zpcore): Confirm if view op can be handle properly or not. Prevent
+    # handling view ops until confirmed.
+    if op_schema.op.is_view:
+        raise RuntimeError(
+            "fallback strategy is unable to handle view ops until confirmed"
+        )
+    if "List[Tensor]" in output_type:
+        raise RuntimeError(
+            "fallback strategy is unable to handle ops with List[Tensor] output "
+            "because size of the list may depend on the op's input value"
+        )
+
+    mesh = inputs_strategy[0].mesh
+
+    dim_sharding: PlacementList = [Replicate()] * (output_len + len(inputs_strategy))
+    single_dim_placement = [dim_sharding]
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_dim_placement, input_index=output_len
+    )
+
+
+def as_list(
+    x: list[object] | object,
+    # pyre-fixme[11]: Annotation `immutable_list` is not defined as a type.
+) -> list[object] | torch.fx.immutable_collections.immutable_list:  # type: ignore[valid-type]
+    # During tracing, `aten.sum.dim_IntList` uses `immutable_list` for its args,
+    # which is an object but treated as a list by the tracer. Therefore, keep
+    # `immutable_list` intact here as well.
+    if type(x) is list or isinstance(x, torch.fx.immutable_collections.immutable_list):
+        return x
+    else:
+        return [x]
+
+
+def normalize_dim(dim: int, ndim: int) -> int:
+    return dim if dim >= 0 else dim + ndim
+
+
+def normalize_dims(dims: DimsType, ndim: int) -> DimsSequenceType:
+    """Normalize a dim or a sequence of dims, so that they are all positive."""
+    if isinstance(dims, int):
+        dims = (normalize_dim(dims, ndim),)
+    elif isinstance(dims, list):
+        dims = [normalize_dim(dim, ndim) for dim in dims]
+    elif isinstance(dims, tuple):
+        dims = tuple(normalize_dim(dim, ndim) for dim in dims)
+    return dims
+
+
+def prod(xs: Iterable[int]) -> int:
+    return functools.reduce(operator.mul, xs, 1)
+
+
+def is_tensor_shardable(
+    shape: Sequence[int],
+    spec: DTensorSpec,
+    allow_unbacked_sharding: bool | None = None,
+) -> bool:
+    """
+    Check if the shape is shardable according to the spec.
+
+    allow_unbacked_sharding: determines the fallback value if unbacked shapes are involved,
+    and the queried shape properties are not statically known.
+
+    e.g. when asking if u0 is shardable on num_shards, and u0 has generic bounds [0, inf],
+    the behavior of allow_unbacked_sharding is:
+
+        None: will data-dependent error
+        True: assumes shardability; we return True, allowing zero-size shards at runtime when u0 < num_shards.
+        False: returns False, and lower-bounding u0, e.g. torch._check(u0 >= num_shards), is needed to enable sharding.
+    """
+    from torch.fx.experimental.symbolic_shapes import guard_or_false, guard_or_true
+
+    assert allow_unbacked_sharding in [None, True, False]
+    guard_fn = {
+        None: bool,
+        True: guard_or_false,
+        False: guard_or_true,
+    }[allow_unbacked_sharding]
+
+    # number of shards in each tensor dimension
+    shards_map = [1] * len(shape)
+    for i, placement in enumerate(spec.placements):
+        if placement.is_shard():
+            shard_dim = cast(Shard, placement).dim
+            if shard_dim >= len(shape):
+                return False
+            shards_map[shard_dim] *= spec.mesh.size(i)
+
+    for i, dim_size in enumerate(shape):
+        # TODO: maybe we should determine is_shardable based on
+        #       whether it's evenly sharded or not
+        if shards_map[i] > 1 and guard_fn(dim_size < shards_map[i]):
+            return False
+
+    return True
+
+
+def is_tensor_evenly_shardable(shape: Sequence[int], spec: DTensorSpec) -> bool:
+    """Check if the shape is evenly shardable according to the spec."""
+    # number of shards in each tensor dimension
+    shards_map = [1] * len(shape)
+    for i, placement in enumerate(spec.placements):
+        if placement.is_shard():
+            shard_dim = cast(Shard, placement).dim
+            shards_map[shard_dim] *= spec.mesh.size(i)
+
+    for i, dim_size in enumerate(shape):
+        if shards_map[i] > 1 and (dim_size % shards_map[i] != 0):
+            return False
+
+    return True
+
+
+def is_tensor_evenly_shardable_on_dim(
+    shape: Sequence[int], spec: DTensorSpec, dim: int
+) -> bool:
+    """Check if the shape is evenly shardable according to the spec on dim."""
+    dim = normalize_dim(dim, len(shape))
+
+    num_shards = 1
+    for i, placement in enumerate(spec.placements):
+        if placement.is_shard():
+            shard_dim = cast(Shard, placement).dim
+            if shard_dim == dim:
+                num_shards *= spec.mesh.size(i)
+
+    return shape[dim] % num_shards == 0
+
+
+def is_tensor_dim_sharded(spec: DTensorSpec, dim: int) -> bool:
+    """Return True if tensor dim is sharded."""
+    return any(p.is_shard(dim) for p in spec.placements)
+
+
+def is_tensor_partial(spec: DTensorSpec) -> bool:
+    """Return True if tensor is partial on the mesh."""
+    return any(p.is_partial() for p in spec.placements)
+
+
+def infer_broadcast_dims_map(
+    common_shape: torch.Size, input_shape: torch.Size
+) -> list[int]:
+    # infer the broadcast dims map, where it maps from the common shape dim to the input shape dim
+    # this is aligned with the broadcast semantics
+    # e.g. if common_shape = [1, 2, 3, 4] and input_shape = [2, 3, 4],
+    # broadcast_dims_map will be [-1, 0, 1, 2]
+    # meaning that dim 0 in the output has no mapping to the input, and dim 1 in the output maps to dim 0 in the input
+    common_ndim = len(common_shape)
+    input_ndim = len(input_shape)
+    broadcast_dims_map = [-1] * common_ndim
+    for idx in range(-1, -1 - input_ndim, -1):
+        if input_shape[idx] == common_shape[idx]:
+            broadcast_dims_map[common_ndim + idx] = input_ndim + idx
+    return broadcast_dims_map
+
+
+def map_placements_after_broadcast(
+    placements: tuple[Placement, ...],
+    shape: torch.Size,
+    broadcast_dims_map: list[int],
+    partial_to_replicate: bool = False,
+) -> tuple[Placement, ...]:
+    """Map each placement based on the output shape after broadcast."""
+    new_placements: list[Placement] = []
+    for placement in placements:
+        if isinstance(placement, Partial):
+            if partial_to_replicate:
+                # map the partial placement to replicate
+                new_placements.append(Replicate())
+            else:
+                new_placements.append(placement)
+        elif isinstance(placement, Replicate):
+            new_placements.append(placement)
+        else:
+            assert isinstance(placement, Shard | _StridedShard)
+            shard_dim = normalize_dim(placement.dim, len(shape))
+            new_shard_dim = broadcast_dims_map[shard_dim]
+            if new_shard_dim != -1:
+                # there's a map from the common shape shard dim to
+                # the input shape shard dim before broadcasting,
+                # use that instead
+                if isinstance(placement, _StridedShard):
+                    new_placements.append(
+                        _StridedShard(
+                            new_shard_dim, split_factor=placement.split_factor
+                        )
+                    )
+                else:
+                    new_placements.append(Shard(new_shard_dim))
+            else:
+                # there's no map between common shape shard dim and
+                # the input shape shard dim before broadcasting,
+                # in this case it means implicit broadcasting happen
+                # in this dim, so we can just mark it as replicate
+                # and implicit broadcast will broadcast automatically
+                # to the sharded shape
+                new_placements.append(Replicate())
+
+    return tuple(new_placements)
+
+
+def generate_redistribute_costs(
+    src_strategy: OpStrategy, dst_spec: DTensorSpec
+) -> list[float]:
+    """Generates one row in the 'redistribute_costs' matrix in an OpSpec
+    The length of the returned list will match the number of strategies in 'src_strategy'.
+
+    Each value in the row is the cost of redistributing from a particular src_strategy to dst_spec.
+    """
+    redistribute_costs: list[float] = [
+        redistribute_cost(strat.output_spec, dst_spec)
+        for strat in src_strategy.strategies
+    ]
+
+    return redistribute_costs
+
+
+def expand_to_full_mesh_op_strategy(
+    mesh: DeviceMesh,
+    op_schema: OpSchema,
+    single_mesh_dim_strategies: list[PlacementList],
+    *,
+    input_index: int = 1,
+    inplace_op: bool = False,
+    is_valid_strategy_cb: Callable[
+        [list[DTensorSpec], tuple[DTensorSpec | None, ...]], bool
+    ]
+    | None = None,
+) -> OpStrategy:
+    """
+    Convenience function to allow writing a sharding strategy considering only a single mesh dimension,
+    and have it expanded combinatorically to all mesh dimensions.
+
+    Args:
+        mesh (DeviceMesh): the device mesh to expand the strategy to
+        op_schema (OpSchema): the op schema
+        single_mesh_dim_strategies (list[PlacementList]): the sharding strategies to expand. The outer list is over
+            different strategies.  The inner PlacementList is over the outputs and inputs of the op. If input_index is 1,
+            a PlacementList looks like [output_placement, input_placement1, input_placement2, ...].
+        input_index: the number of outputs of the op, defaults to 1
+        inplace_op: whether the op is inplace or not, defaults to False
+        is_valid_strategy_cb: a callback function to filter out invalid sharding rules, defaults to None.
+
+    Example: Let's say `my_op(tensor_x, tensor_y) - > output_tensor`  can support sharding or replicating tensor_x,
+    but always requires tensor_y to be replicated.  We can specify these valid combinations ignoring mesh dims.
+    Then, we can rely on `expand_to_full_mesh_op_strategy` to create every possible combination of these shardings
+    over multiple mesh dimensions, filtering out any combinations that are invalid based on the actual mesh dim size.
+
+        single_mesh_dim_strategies = [
+            # first strategy: return output sharded on first dim, shard tensor_x on its first dim, replicate tensor_y
+            [Shard(0), Shard(0), Replicate()]
+            # second strategy: replicate output, and both inputs
+            [Replicate(), Replicate(), Replicate()]
+        ]
+    """
+    # Expand the single_mesh_dim_strategies to full mesh dim strategies.
+    all_mesh_dim_strategies = [single_mesh_dim_strategies] * mesh.ndim
+
+    strategy_combs = itertools.product(*all_mesh_dim_strategies)
+
+    all_strategies = []
+    for strategy_comb in strategy_combs:
+        spec_list: list[DTensorSpec | None] = []
+        for specs in zip(*strategy_comb):
+            if specs[0] is not None:
+                # TODO: we should fill in tensor_meta here.  If nothing else, it helps the filter strategy callback
+                # pyrefly: ignore [bad-argument-type]
+                spec_list.append(DTensorSpec(mesh, specs))
+            else:
+                spec_list.append(None)
+
+        input_specs: list[DTensorSpec] = [
+            s for s in spec_list[input_index:] if isinstance(s, DTensorSpec)
+        ]
+
+        args_strategy = op_schema.args_strategy
+        kwargs_strategy = op_schema.kwargs_strategy
+        input_args_strategy = args_strategy + kwargs_strategy
+
+        if len(input_specs) != len(input_args_strategy):
+            raise AssertionError(
+                f"input_specs({len(input_specs)}) != strategies({len(input_args_strategy)}: "
+                f"{len(args_strategy)} args + {len(kwargs_strategy)} kwargs)"
+            )
+        self_spec = input_args_strategy[0].strategies[0].output_spec
+
+        if inplace_op and self_spec.placements != input_specs[0].placements:
+            # if it's inplace op, we would only allow the OpSpec to be added when the
+            # input_spec matches the first argument's runtime sharding, otherwise we skip
+            continue
+
+        output_specs: tuple[DTensorSpec | None, ...]
+        if input_index > 1:
+            output_specs = tuple(spec_list[:input_index])
+        else:
+            if spec_list[0] is not None:
+                output_specs = spec_list[0]  # type: ignore[assignment]
+            else:
+                raise RuntimeError("output spec is None")
+
+        # check all inputs are shardable
+        if not all(
+            is_tensor_shardable(inp.shape, s)
+            for inp, s in zip(input_args_strategy, input_specs)
+        ):
+            continue
+
+        # perform additional op-specific filtering
+        if is_valid_strategy_cb is not None:
+            if not is_valid_strategy_cb(input_specs, output_specs):
+                continue
+
+        redistribute_cost = [
+            generate_redistribute_costs(input_strategy, input_spec)
+            for input_strategy, input_spec in zip(input_args_strategy, input_specs)
+        ]
+
+        strategy = OpSpec(
+            output_specs=output_specs,
+            input_specs=input_specs,
+            redistribute_cost=redistribute_cost,
+        )
+        all_strategies.append(strategy)
+    return OpStrategy(all_strategies)
+
+
+def shift_shard_dims_after_insert(
+    placements: Sequence[Placement], insert_dim: int = 0
+) -> Sequence[Placement]:
+    normalized_placements: list[Placement] = []
+    for placement in placements:
+        if isinstance(placement, Shard) and placement.dim >= insert_dim:
+            normalized_placements.append(Shard(placement.dim + 1))
+        else:
+            normalized_placements.append(placement)
+    return normalized_placements
+
+
+def shift_shard_dims_after_remove(
+    placements: Sequence[Placement], remove_dim: int = 0
+) -> Sequence[Placement]:
+    normalized_placements: list[Placement] = []
+    for placement in placements:
+        if isinstance(placement, Shard) and placement.dim > remove_dim:
+            normalized_placements.append(Shard(placement.dim - 1))
+        else:
+            normalized_placements.append(placement)
+    return normalized_placements
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6aeca3b93a12deba923e8dd3094d36583cecd26
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__init__.py
@@ -0,0 +1,52 @@
+# mypy: allow-untyped-defs
+import torch._C
+from torch.distributed.tensor.debug._comm_mode import CommDebugMode
+from torch.distributed.tensor.debug._visualize_sharding import visualize_sharding
+
+
+__all__ = ["CommDebugMode", "visualize_sharding"]
+
+
+def _get_python_sharding_prop_cache_info():
+    """
+    Get the cache info for the Python sharding propagation cache, used for debugging purpose only.
+    This would return a named tuple showing hits, misses, maxsize and cursize of the sharding
+    propagator cache. Note that directly calling into the sharding propagator does not share cache
+    state with the DTensor dispatch fast path!
+    """
+    from torch.distributed.tensor._api import DTensor
+
+    return (
+        DTensor._op_dispatcher.sharding_propagator.propagate_op_sharding.cache_info()  # type:ignore[attr-defined]
+    )
+
+
+def _get_fast_path_sharding_prop_cache_stats():
+    """
+    Get a tuple (hits, misses) for the fast path sharding propagation cache, used for debugging
+    only.
+    """
+    return torch._C._get_DTensor_sharding_propagator_cache_stats()
+
+
+def _clear_python_sharding_prop_cache():
+    """
+    Clears the cache for the Python sharding propagation cache, used for debugging purpose only.
+    """
+    from torch.distributed.tensor._api import DTensor
+
+    return (
+        DTensor._op_dispatcher.sharding_propagator.propagate_op_sharding.cache_clear()  # type:ignore[attr-defined]
+    )
+
+
+def _clear_fast_path_sharding_prop_cache():
+    """
+    Clears the cache for the fast path sharding propagation cache, used for debugging purpose only.
+    """
+    torch._C._clear_DTensor_sharding_propagator_cache()
+
+
+# Set namespace for exposed private names
+CommDebugMode.__module__ = "torch.distributed.tensor.debug"
+visualize_sharding.__module__ = "torch.distributed.tensor.debug"
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e09badc1ce21f724983f02034b644dc339e7e60b
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_comm_mode.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_comm_mode.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..227f44811fa5ada0af8e761ebffa27c4833e9151
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_comm_mode.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_op_coverage.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_op_coverage.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c22a390c86b561140379befff49623097dd3643f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_op_coverage.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_visualize_sharding.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_visualize_sharding.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8c5f5d03cec28a8f8ad8455b0293b2964d006a25
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/__pycache__/_visualize_sharding.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_comm_mode.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_comm_mode.py
new file mode 100644
index 0000000000000000000000000000000000000000..66ec0bfff5f9c74dfc4760729eead2bbf5c09e70
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_comm_mode.py
@@ -0,0 +1,740 @@
+# mypy: allow-untyped-defs
+import copy
+import json
+import re
+import weakref
+from collections import defaultdict
+from typing import Any
+
+import torch
+import torch.nn
+from torch._guards import detect_fake_mode
+from torch.autograd.graph import register_multi_grad_hook
+from torch.distributed._tools.mod_tracker import ModTracker
+from torch.distributed.tensor._api import DTensor
+from torch.nn.modules.module import (
+    register_module_forward_hook,
+    register_module_forward_pre_hook,
+    register_module_full_backward_pre_hook,
+)
+from torch.utils._python_dispatch import TorchDispatchMode
+from torch.utils._pytree import tree_flatten
+
+
+__all__ = ["CommDebugMode"]
+
+funcol_native = torch.ops._c10d_functional
+funcol_py = torch.ops.c10d_functional
+funcol_autograd = torch.ops._c10d_functional_autograd
+c10d_ops = torch.ops.c10d
+
+NATIVE_TO_PY_MAPPING = {
+    funcol_native.all_gather_into_tensor: funcol_py.all_gather_into_tensor,
+    funcol_native.all_gather_into_tensor_coalesced: funcol_py.all_gather_into_tensor_coalesced,
+    funcol_native.all_reduce: funcol_py.all_reduce,
+    funcol_native.all_reduce_coalesced: funcol_py.all_reduce_coalesced,
+    funcol_native.all_to_all_single: funcol_py.all_to_all_single,
+    funcol_native.broadcast: funcol_py.broadcast,
+    funcol_native.reduce_scatter_tensor: funcol_py.reduce_scatter_tensor,
+    funcol_native.reduce_scatter_tensor_coalesced: funcol_py.reduce_scatter_tensor_coalesced,
+    # functional ops
+    funcol_autograd.all_to_all_single: funcol_py.all_to_all_single,
+}
+
+c10d_collective_ops = {
+    c10d_ops._allgather_base_,
+    c10d_ops._reduce_scatter_base_,
+    c10d_ops.allgather_,
+    c10d_ops.allgather_coalesced_,
+    c10d_ops.allgather_into_tensor_coalesced_,
+    c10d_ops.allreduce_,
+    c10d_ops.allreduce_coalesced_,
+    c10d_ops.alltoall_,
+    c10d_ops.alltoall_base_,
+    c10d_ops.broadcast_,
+    c10d_ops.gather_,
+    c10d_ops.scatter_,
+    c10d_ops.reduce_,
+    c10d_ops.reduce_scatter_,
+    c10d_ops.reduce_scatter_tensor_coalesced_,
+}
+
+trivial_ops = {
+    "aten.detach.default",
+    "aten.t.default",
+    "aten.view.default",
+    "aten._to_copy.default",
+    "aten.as_strided.default",
+    "aten.transpose.int",
+}
+
+
+class _CommModeModuleTracker(ModTracker):
+    """
+    Inherits ModuleTracker and expands on its functionality to track the
+    parameters and sharding information of a model at a module-level
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.module_helper_dict = {}
+        self.module_parameters_dict = {}
+        self.module_parents_dict = {}
+        self.register_forward_hook_handles = {}
+        self.parent_dict = {}
+        self.parent_list = []
+        self.sharding_dict = {}
+        self.activation_checkpointing = False
+        self.name = ""
+
+    def _fw_set_module_hook(self, mod, input, output):
+        """
+        Updates the current module after module finishes running and
+        all other hooks are resolved
+        """
+
+        if self.is_bw:
+            self.activation_checkpointing = True
+        else:
+            self.activation_checkpointing = False
+
+        if not self.activation_checkpointing:
+            # module is no longer parent of next modules
+            self.parent_list.pop()
+
+            # set current module to previous parent module
+            self.name = self.parent_list[-1]
+
+    def _fw_pre_hook(self, mod, input):
+        """
+        This function is called before the forward pass of a module. It
+        collects the parameters and sharding information of a module and
+        stores it in a dictionary.
+        """
+        if self.is_bw:
+            self.activation_checkpointing = True
+        else:
+            self.activation_checkpointing = False
+
+        self.name = super()._get_mod_name(mod)
+        w_mod = weakref.ref(mod)
+
+        # adds current sub-module to module tracker parent class
+        super()._get_append_fn(w_mod, self.name, False)()
+
+        args, _ = tree_flatten(input)
+        tensors = [a for a in args if isinstance(a, torch.Tensor) and a.requires_grad]
+        if not self.is_bw and tensors:
+            register_multi_grad_hook(
+                tensors, super()._get_pop_fn(w_mod, self.name, True)
+            )
+
+        if not self.activation_checkpointing:
+            # contains information about module ordering and depth in the module tree
+            if self.name not in self.module_helper_dict:
+                self.module_helper_dict[self.name] = {}
+
+            self.module_helper_dict[self.name]["module_type"] = (
+                str(type(mod)).replace("<", "").replace(">", "")
+            )
+            self.module_helper_dict[self.name]["depth"] = len(self.parents) - 1
+
+            for param_name, param in mod.named_parameters(recurse=False):
+                if self.name not in self.module_parameters_dict:
+                    self.module_parameters_dict[self.name] = {}
+
+                self.module_parameters_dict[self.name][param_name] = param.data
+
+                if isinstance(param.data, DTensor):
+                    key_name = self.name + "." + param_name
+                    self.sharding_dict[key_name] = param.data.placements
+
+                    if "parameters" not in self.module_helper_dict[self.name]:
+                        self.module_helper_dict[self.name]["parameters"] = {}
+
+                    self.module_helper_dict[self.name]["parameters"][param_name] = str(
+                        param.data.placements
+                    )
+
+            # used to store module's parents to ensure correctness in backward pass/checkpointing
+            if self.name not in self.module_parents_dict:
+                self.module_parents_dict[self.name] = copy.deepcopy(self.parents)
+
+            # used to create parent-child module associations for json dumps
+            parent = self.parent_list[-1]
+            if parent not in self.parent_dict:
+                self.parent_dict[parent] = []
+
+            self.parent_dict[parent].append(self.name)
+            self.parent_list.append(self.name)
+
+            self.register_forward_hook_handles[self.name] = mod.register_forward_hook(
+                self._fw_set_module_hook
+            )
+
+    def _fw_post_hook(self, mod, input, output):  # pylint: disable=useless-parent-delegation
+        """
+        This function is called when the forward pass of a module is called.
+        It updates the module tracker and removes the module from parent data
+        """
+
+        super()._fw_post_hook(mod, input, output)
+
+    def _bw_hook(self, mod, output):
+        """
+        This function is called when the backward pass of a module is called. It
+        updates the current module for backward passes
+        """
+        self.activation_checkpointing = False
+        self.name = super()._get_mod_name(mod)
+
+    def __enter__(self):
+        self.activation_checkpointing = False
+        self.module_parameters_dict.clear()
+        self.sharding_dict.clear()
+        self.parent_dict.clear()
+        self.parent_list = ["Global"]
+        self.module_helper_dict.clear()
+        self.module_helper_dict["Global"] = {"depth": 0}
+        self.module_parents_dict.clear()
+        self.module_parents_dict["Global"] = set()
+        self._fw_pre_handle = register_module_forward_pre_hook(self._fw_pre_hook)
+        self._fw_post_handle = register_module_forward_hook(self._fw_post_hook)
+        self.register_forward_hook_handles.clear()
+        self._bw_handle = register_module_full_backward_pre_hook(self._bw_hook)
+        self.name = "Global"
+
+    def __exit__(self, *args):
+        super().__exit__(*args)
+        self._bw_handle.remove()
+
+        # removes all forward_hook handles added in the pre-hook
+        for handle in self.register_forward_hook_handles.values():
+            handle.remove()
+
+    def print_paramater_info(self):
+        print(self.module_parameters_dict)
+
+    def print_sharding_info(self):
+        for key, value in self.sharding_dict.items():
+            print(key + ": " + str(value))
+
+
+class CommDebugMode(TorchDispatchMode):
+    """
+    :class:`CommDebugMode` is a context manager that counts the number of
+    functional collectives within its context. It does this using a
+    ``TorchDispatchMode``.
+
+    .. note:: Not all collectives are supported yet.
+
+    Example usage
+
+    .. code-block:: python
+
+        mod = ...
+        comm_mode = CommDebugMode()
+        with comm_mode:
+            mod.sum().backward()
+        print(comm_mode.get_comm_counts())
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.comm_counts: dict[Any, int] = defaultdict(int)
+        self.comm_module_counts = {}
+        self.comm_module_operation_counts = {}
+        self.comm_registry = set()
+        for native_op, py_op in NATIVE_TO_PY_MAPPING.items():
+            self.comm_registry.add(native_op)
+            self.comm_registry.add(py_op)
+
+        self.comm_registry.add(torch.ops._dtensor.shard_dim_alltoall)
+        self.advanced_module_tracker = _CommModeModuleTracker()
+
+    def generate_json_dump(self, file_name="comm_mode_log.json", noise_level=3):
+        """
+        Creates json file used to build browser visual
+        0. prints module-level collective counts
+        1. prints dTensor operations not included in trivial operations
+        2. prints operations not included in trivial operations
+        3. prints all operations
+        """
+
+        (
+            include_DTensor_ops,
+            include_module_data,
+            include_ops,
+            include_trivial_ops,
+        ) = self._set_noise_parameters(noise_level)
+
+        # recursively builds json data
+        def add_json_information(json_dict, fqn):
+            json_dict["fqn"] = fqn
+            json_dict["module_type"] = ""
+            json_dict["parameters"] = []
+            json_dict["children"] = []
+            json_dict["collectives_forward"] = []
+            json_dict["collectives_backward"] = []
+            json_dict["operations_forward"] = []
+            json_dict["operations_backward"] = []
+
+            # adds module layer type and parameters, and their sharding
+            if (
+                "module_type" in self.advanced_module_tracker.module_helper_dict[fqn]
+                and include_module_data
+            ):
+                json_dict["module_type"] = (
+                    self.advanced_module_tracker.module_helper_dict[fqn]["module_type"]
+                )
+
+                if "parameters" in self.advanced_module_tracker.module_helper_dict[fqn]:
+                    for (
+                        param_name,
+                        placement,
+                    ) in self.advanced_module_tracker.module_helper_dict[fqn][
+                        "parameters"
+                    ].items():
+                        json_dict["parameters"].append((param_name, placement))
+
+            # adds module collective information
+            if fqn in self.comm_module_counts:
+                for collective, count in self.comm_module_counts[fqn][
+                    "forward"
+                ].items():
+                    json_dict["collectives_forward"].append((str(collective), count))
+
+                for collective, count in self.comm_module_counts[fqn][
+                    "backward"
+                ].items():
+                    json_dict["collectives_backward"].append((str(collective), count))
+
+            # adds module operation information
+            forward_operations = []
+            backward_operations = []
+            checkpointing_operations = []
+
+            # only get operations if the minimum operation noise level is set to true
+            if include_DTensor_ops:
+                if fqn in self.comm_module_operation_counts:
+                    (
+                        forward_operations,
+                        backward_operations,
+                        checkpointing_operations,
+                    ) = self._get_operations_list(
+                        self.comm_module_operation_counts[fqn]
+                    )
+
+            # remove all operations who don't have DTensor inputs
+            if not include_ops:
+                forward_operations = [
+                    op for op in forward_operations if len(op["input_sharding"])
+                ]
+                backward_operations = [
+                    op for op in backward_operations if len(op["input_sharding"])
+                ]
+                checkpointing_operations = [
+                    op for op in checkpointing_operations if len(op["input_sharding"])
+                ]
+
+            # remove all operations in trivial operations set
+            if not include_trivial_ops:
+                forward_operations = [
+                    op
+                    for op in forward_operations
+                    if str(op["name"]) not in trivial_ops
+                ]
+                backward_operations = [
+                    op
+                    for op in backward_operations
+                    if str(op["name"]) not in trivial_ops
+                ]
+                checkpointing_operations = [
+                    op
+                    for op in checkpointing_operations
+                    if str(op["name"]) not in trivial_ops
+                ]
+
+            # converts operation information into string format for json.dumps()
+            forward_operations = copy.deepcopy(forward_operations)
+            for op in forward_operations:
+                op["name"] = str(op["name"])
+
+                for i in range(len(op["input_sharding"])):
+                    op["input_sharding"][i] = str(op["input_sharding"][i])
+                    op["input_shape"][i] = str(op["input_shape"][i])
+
+            backward_operations = copy.deepcopy(backward_operations)
+            for op in backward_operations:
+                op["name"] = str(op["name"])
+
+                for i in range(len(op["input_sharding"])):
+                    op["input_sharding"][i] = str(op["input_sharding"][i])
+                    op["input_shape"][i] = str(op["input_shape"][i])
+
+            checkpointing_operations = copy.deepcopy(checkpointing_operations)
+            for op in checkpointing_operations:
+                op["name"] = str(op["name"])
+
+                for i in range(len(op["input_sharding"])):
+                    op["input_sharding"][i] = str(op["input_sharding"][i])
+                    op["input_shape"][i] = str(op["input_shape"][i])
+
+            json_dict["operations_forward"] = forward_operations
+            json_dict["operations_backward"] = backward_operations
+            json_dict["operations_checkpointing"] = checkpointing_operations
+
+            if fqn not in self.advanced_module_tracker.parent_dict:
+                return json_dict
+
+            # recursively adds module's children
+            for ele in self.advanced_module_tracker.parent_dict[fqn]:
+                json_dict["children"].append(add_json_information({}, ele))
+
+            return json_dict
+
+        json_dict: dict[str, Any] = {}
+        add_json_information(json_dict, "Global")
+
+        # converts dictionary into json file
+        with open(file_name, "w") as json_file:
+            json.dump(json_dict, json_file, indent=4)
+
+    def generate_comm_debug_tracing_table(self, noise_level=3):
+        """
+        Generates detailed table displaying operations and collective tracing information
+        on a module level. Amount of information is dependent on noise_level
+
+        0. prints module-level collective counts
+        1. prints dTensor operations not included in trivial operations, module information
+        2. prints operations not included in trivial operations
+        3. prints all operations
+        """
+
+        (
+            include_DTensor_ops,
+            include_module_data,
+            include_ops,
+            include_trivial_ops,
+        ) = self._set_noise_parameters(noise_level)
+
+        table = ""
+        for fqn in self.advanced_module_tracker.module_helper_dict:
+            # setting up indentations for table formatting
+            indent = "  " * (
+                2 * self.advanced_module_tracker.module_helper_dict[fqn]["depth"]
+            )
+            table += f"{indent}{fqn}\n"
+
+            if include_module_data:
+                if (
+                    "module_type"
+                    in self.advanced_module_tracker.module_helper_dict[fqn]
+                ):
+                    module_type = self.advanced_module_tracker.module_helper_dict[fqn][
+                        "module_type"
+                    ]
+                    table += f"{indent}*module type: {module_type}\n"
+
+                if "parameters" in self.advanced_module_tracker.module_helper_dict[fqn]:
+                    table += f"{indent}*Parameter List\n"
+                    for (
+                        param_name,
+                        placement,
+                    ) in self.advanced_module_tracker.module_helper_dict[fqn][
+                        "parameters"
+                    ].items():
+                        table += f"{indent} *{param_name}: {placement}\n"
+
+            indent += "  "
+            collective_indent = "  " * (
+                2 * self.advanced_module_tracker.module_helper_dict[fqn]["depth"] + 2
+            )
+            operation_indent = "  " * (
+                2 * self.advanced_module_tracker.module_helper_dict[fqn]["depth"] + 3
+            )
+
+            # separate the module's collective and operations by forward and backward
+            forward_collectives = {}
+            backward_collectives = {}
+            if fqn in self.comm_module_counts:
+                forward_collectives = self.comm_module_counts[fqn]["forward"]
+                backward_collectives = self.comm_module_counts[fqn]["backward"]
+
+            forward_operations = []
+            backward_operations = []
+            checkpointing_operations = []
+
+            if include_DTensor_ops:
+                if fqn in self.comm_module_operation_counts:
+                    (
+                        forward_operations,
+                        backward_operations,
+                        checkpointing_operations,
+                    ) = self._get_operations_list(
+                        self.comm_module_operation_counts[fqn]
+                    )
+
+            def add_tracing_information(table, collectives_dict, operation_list):
+                """
+                adds tracing information for module's forward or backward
+                """
+                for collective, count in collectives_dict.items():
+                    table += (
+                        f"\033[1;33m{collective_indent}*{collective}: {count}\033[0m\n"
+                    )
+
+                def add_operations(
+                    table, operation, collective_indent, operation_indent
+                ):
+                    """
+                    adds operation information to the table
+                    """
+                    table += f"\033[1;33m{collective_indent}**{operation_name}\033[0m\n"
+
+                    if len(operation["input_shape"]):
+                        operation_shape = operation["input_shape"]
+                        operation_sharding = operation["input_sharding"]
+                        operation_device_mesh = operation["device_mesh"]
+
+                        table += f"\033[1;31m{operation_indent}shape: {operation_shape}\033[0m\n"
+                        table += f"\033[1;31m{operation_indent}sharding: {operation_sharding}\033[0m\n"
+                        table += f"\033[1;31m{operation_indent}device mesh: {operation_device_mesh}\033[0m\n"
+
+                    return table
+
+                for operation in operation_list:
+                    operation_name = str(operation["name"])
+
+                    # include all operations
+                    if include_trivial_ops:
+                        table = add_operations(
+                            table, operation, collective_indent, operation_indent
+                        )
+
+                    # include all operations not in trivial operations
+                    elif include_ops and operation_name not in trivial_ops:
+                        table = add_operations(
+                            table, operation, collective_indent, operation_indent
+                        )
+
+                    # only include dTensor operations not in trivial set
+                    elif (
+                        include_DTensor_ops
+                        and (operation_name not in trivial_ops)
+                        and len(operation["input_shape"])
+                    ):
+                        table = add_operations(
+                            table, operation, collective_indent, operation_indent
+                        )
+
+                return table
+
+            if len(forward_collectives) or len(forward_operations):
+                table += f"{indent}FORWARD PASS\n"
+                table = add_tracing_information(
+                    table, forward_collectives, forward_operations
+                )
+
+            if len(backward_collectives) or len(backward_operations):
+                table += f"{indent}BACKWARD PASS\n"
+                table = add_tracing_information(
+                    table, backward_collectives, backward_operations
+                )
+
+            if len(checkpointing_operations):
+                table += f"{indent}ACTIVATION CHECKPOINTING\n"
+                table = add_tracing_information(table, {}, checkpointing_operations)
+
+        return table
+
+    def _get_operations_list(self, module_operation_counts):
+        forward_operations = [
+            op for op in module_operation_counts["operations_list"] if not op["is_bw"]
+        ]
+        backward_operations = [
+            op
+            for op in module_operation_counts["operations_list"]
+            if op["is_bw"] and not op["is_activation_checkpointing"]
+        ]
+        checkpointing_operations = [
+            op
+            for op in module_operation_counts["operations_list"]
+            if op["is_activation_checkpointing"]
+        ]
+
+        return forward_operations, backward_operations, checkpointing_operations
+
+    def get_total_counts(self) -> int:
+        return sum(self.comm_counts.values())
+
+    def get_comm_counts(self) -> dict[Any, int]:
+        """Returns the communication counts as a dictionary.
+
+        Returns:
+            Dict[Any, int]: The communication counts as a dictionary.
+        """
+        return self.comm_counts
+
+    def get_parameter_info(self) -> dict[str, dict[str, Any]]:
+        return self.advanced_module_tracker.module_parameters_dict
+
+    def get_sharding_info(self) -> dict[str, dict[str, Any]]:
+        return self.advanced_module_tracker.sharding_dict
+
+    def __enter__(self):
+        self.comm_counts.clear()
+        self.comm_module_counts.clear()
+        self.comm_module_counts["Global"] = {}
+        self.comm_module_counts["Global"]["forward"] = defaultdict(int)
+        self.comm_module_counts["Global"]["backward"] = defaultdict(int)
+
+        self.comm_module_operation_counts.clear()
+
+        super().__enter__()
+        self.advanced_module_tracker.__enter__()
+        return self
+
+    # pyrefly: ignore [bad-override]
+    def __exit__(self, *args):
+        self.advanced_module_tracker.__exit__()
+        super().__exit__(*args)
+
+    def log_comm_debug_tracing_table_to_file(
+        self, file_name="comm_mode_log.txt", noise_level=3
+    ):
+        """
+        Alternative to console CommDebugMode output, writes to file specified by the user
+        """
+        ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
+        table = ansi_escape.sub("", self.generate_comm_debug_tracing_table(noise_level))
+
+        with open(file_name, "w") as log_file:
+            log_file.write(table)
+
+    def _set_noise_parameters(self, noise_level):
+        """
+        sets variables controlling what information displays based on noise level
+        """
+        include_DTensor_ops = False
+        include_module_data = False
+        include_ops = False
+        include_trivial_ops = False
+
+        if noise_level > 0:
+            include_DTensor_ops = True
+            include_module_data = True
+
+        if noise_level > 1:
+            include_ops = True
+
+        if noise_level > 2:
+            include_trivial_ops = True
+
+        return (
+            include_DTensor_ops,
+            include_module_data,
+            include_ops,
+            include_trivial_ops,
+        )
+
+    def __torch_dispatch__(self, func, types, args=(), kwargs=None):
+        # When running this mode with DTensor, ordinarily all modes will
+        # run **before** subclasses get a chance to run.
+        # Returning NotImplemented here gives us a chance to let DTensor
+        # run and desugar into comms ops, before CommDebugMode sees them.
+
+        # sets up operation-level collective count
+        if self.advanced_module_tracker.name not in self.comm_module_operation_counts:
+            # dictionary should hold module input and output shape, operations list and collective counter
+            self.comm_module_operation_counts[self.advanced_module_tracker.name] = {
+                "operations_list": []
+            }
+        operation_dict = {}
+        operation_dict["name"] = func
+
+        operation_dict["input_shape"] = []
+        operation_dict["input_sharding"] = []
+        operation_dict["device_mesh"] = ""
+
+        # tracks if the operation is part of the backward pass
+        operation_dict["is_bw"] = self.advanced_module_tracker.is_bw
+
+        # tracks if the operation is part of activation checkpointing
+        operation_dict["is_activation_checkpointing"] = (
+            self.advanced_module_tracker.activation_checkpointing
+        )
+
+        if any(t == DTensor for t in types):
+            for ele in args:
+                if isinstance(ele, DTensor):
+                    # saves shapes and placements of all DTensor args
+                    operation_dict["input_shape"].append(ele.shape)
+                    operation_dict["input_sharding"].append(ele.placements)
+                    operation_dict["device_mesh"] = str(ele.device_mesh)
+
+            self.comm_module_operation_counts[self.advanced_module_tracker.name][
+                "operations_list"
+            ].append(operation_dict)
+
+            return NotImplemented
+
+        kwargs = kwargs if kwargs else {}
+        out = func(*args, **kwargs)
+        func_packet = func._overloadpacket
+
+        # We have many tests that use CommDebugMode to verify the occurrence of
+        # collectives. These tests do so by querying comm_counts with legacy
+        # funcol ops as key. For the purpose of native funcol migration, we
+        # need these tests to work for both legacy and native funcol. To avoid
+        # the need to modify all tests to accommodate the two implementations,
+        # we make CommDebugMode translate native funcol ops into legacy funcol
+        # ops until the migration finishes.
+
+        if func_packet in self.comm_registry or func_packet in c10d_collective_ops:
+            if func_packet in NATIVE_TO_PY_MAPPING:
+                func_packet = NATIVE_TO_PY_MAPPING[func_packet]
+            self.comm_counts[func_packet] += 1
+
+            key = "forward"
+            if self.advanced_module_tracker.is_bw:
+                key = "backward"
+
+            # adds collective count to current module
+            if self.advanced_module_tracker.name not in self.comm_module_counts:
+                self.comm_module_counts[self.advanced_module_tracker.name] = {}
+                self.comm_module_counts[self.advanced_module_tracker.name][
+                    "forward"
+                ] = defaultdict(int)
+                self.comm_module_counts[self.advanced_module_tracker.name][
+                    "backward"
+                ] = defaultdict(int)
+            self.comm_module_counts[self.advanced_module_tracker.name][key][
+                func_packet
+            ] += 1
+
+            # adds collective count to parent modules
+            for par in self.advanced_module_tracker.module_parents_dict[
+                self.advanced_module_tracker.name
+            ]:
+                # makes sure we aren't double counting when current sub-module hasn't been removed from parents
+                if par != self.advanced_module_tracker.name:
+                    if par not in self.comm_module_counts:
+                        self.comm_module_counts[par] = {}
+                        self.comm_module_counts[par]["forward"] = defaultdict(int)
+                        self.comm_module_counts[par]["backward"] = defaultdict(int)
+                    self.comm_module_counts[par][key][func_packet] += 1
+
+        # if tensor op uses fake tensors, return
+        if detect_fake_mode(args):
+            return out
+
+        # add tensor operation to module operation list
+        self.comm_module_operation_counts[self.advanced_module_tracker.name][
+            "operations_list"
+        ].append(operation_dict)
+
+        return out
+
+    def __repr__(self):
+        return f"CommDebugMode(get_total_counts()={self.get_total_counts()})"
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_op_coverage.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_op_coverage.py
new file mode 100644
index 0000000000000000000000000000000000000000..7315d64d697a88f012e0bd67aa2e3e6e1141d7e1
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_op_coverage.py
@@ -0,0 +1,106 @@
+# mypy: allow-untyped-defs
+from operator import itemgetter
+
+import torch
+import torch.fx
+import torch.nn as nn
+from functorch.compile import make_boxed_func
+from torch._functorch.compilers import aot_module
+from torch._inductor.decomposition import select_decomp_table
+from torch.distributed.tensor import DTensor
+
+
+inductor_decomps = select_decomp_table()
+
+graphs: list[torch.fx.GraphModule] = []
+
+
+def fwd_bwd_compiler(fx_g, _):
+    graphs.append(fx_g)
+    return make_boxed_func(fx_g)
+
+
+def get_inductor_decomp_graphs(model: nn.Module, args, kwargs):
+    """
+    Obtain forward and backward graphs of a model with inductor decompositions using tracing and aot_module.
+
+    Convenient util to get the fwd and bwd graphs of an arbitrary model
+    with inductor decompositions. Note that this would simply do tracing
+    with aot_module and don't ensure correctness. This is useful to track
+    the ops needed in DTensor.
+    """
+    compiled_mod = aot_module(
+        model, fw_compiler=fwd_bwd_compiler, decompositions=inductor_decomps
+    )
+    output = compiled_mod(*args, **kwargs)
+
+    if output.ndim != 0:
+        # if output is not a scalar tensor, by default sum it in order to
+        # run backward
+        output = output.sum()
+
+    output.backward()
+
+    # one fwd, one bwd graph
+    assert len(graphs) == 2
+    return graphs
+
+
+def print_op_coverage_summary(model: nn.Module, args, kwargs, *, output_csv=False):
+    """
+    Util to print the operator coverage summary of a certain model with tabulute.
+
+    Must have tabulate module installed.
+    """
+    # python module required for summary
+    import csv
+
+    from tabulate import tabulate
+
+    fwd_graph, bwd_graph = get_inductor_decomp_graphs(model, args, kwargs)
+
+    op_counts = {}
+
+    for node in fwd_graph.graph.nodes:
+        if node.op == "call_function" and isinstance(
+            node.target, torch._ops.OpOverload
+        ):
+            if node.target not in op_counts:
+                op_counts[node.target] = 0
+
+            op_counts[node.target] += 1
+
+    for node in bwd_graph.graph.nodes:
+        if node.op == "call_function" and isinstance(
+            node.target, torch._ops.OpOverload
+        ):
+            if node.target not in op_counts:
+                op_counts[node.target] = 0
+
+            op_counts[node.target] += 1
+
+    op_infos = []
+
+    for op, count in op_counts.items():
+        supported = op in DTensor._op_dispatcher.sharding_propagator.op_to_rules
+        op_infos.append([op, str(op._schema), count, supported])
+
+    # sort the op info base on the total count index
+    count_idx = 2
+    op_infos.sort(key=itemgetter(count_idx), reverse=True)
+
+    headers = ["Operator", "Schema", "Total Count", "Supported"]
+    # pyrefly: ignore [bad-argument-type]
+    print(tabulate(op_infos, headers=headers))
+
+    if output_csv:
+        # Open a CSV file for writing
+        with open("op_summary.csv", "w", newline="") as csv_file:
+            # Create a CSV writer object
+            csv_writer = csv.writer(csv_file)
+
+            csv_writer.writerow(headers)
+            # Write each table row to the CSV file
+            for row in op_infos:
+                # pyrefly: ignore [bad-argument-type]
+                csv_writer.writerow(row)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_visualize_sharding.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_visualize_sharding.py
new file mode 100644
index 0000000000000000000000000000000000000000..20dd0c3e9f4b47f5e8427855221b9e0c10535377
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/debug/_visualize_sharding.py
@@ -0,0 +1,227 @@
+# mypy: allow-untyped-defs
+import importlib.util
+
+import numpy as np
+
+from torch._prims_common import ShapeType
+from torch.distributed.tensor._utils import _compute_local_shape_and_global_offset
+
+
+__all__ = ["visualize_sharding"]
+
+Color = tuple[float, float, float]
+
+
+def _create_table(
+    shards: list[tuple[tuple[int, int], tuple[int, int], int]], device_kind: str = ""
+):
+    """
+    Creates a tabulate table given row and column ranges with device name
+    """
+    from tabulate import tabulate
+
+    # Extract unique row and column ranges
+    row_ranges = sorted({block[0] for block in shards})
+    col_ranges = sorted({block[1] for block in shards})
+
+    # Create a matrix initialized with empty strings
+    matrix = [["" for _ in col_ranges] for _ in row_ranges]
+
+    # Fill the matrix with values
+    for block in shards:
+        row_index = row_ranges.index(block[0])
+        col_index = col_ranges.index(block[1])
+        if matrix[row_index][col_index] == "":
+            matrix[row_index][col_index] = device_kind + ":" + str(block[2])
+        else:
+            matrix[row_index][col_index] += "," + str(block[2])
+
+    # Prepare headers
+    row_headers = [f"Row {r[0]}-{r[1]}" for r in row_ranges]
+    col_headers = [f"Col {c[0]}-{c[1]}" for c in col_ranges]
+
+    return tabulate(matrix, headers=col_headers, showindex=row_headers)
+
+
+def make_color_iter(color_map, num_rows, num_cols):
+    num_colors = num_rows * num_cols
+    for idx in range(num_colors):
+        yield color_map(idx)
+
+
+def _canonicalize_color(color: Color) -> str:
+    if isinstance(color, str):
+        return color
+    r, g, b = (int(a * 255) for a in color)
+    return f"#{r:02X}{g:02X}{b:02X}"
+
+
+def _get_text_color(color: str) -> str:
+    r, g, b = map(lambda x: int(x, 16), (color[1:3], color[3:5], color[5:7]))  # noqa: C417
+    if (r * 0.299 + g * 0.587 + b * 0.114) > 186:
+        return "#000000"
+    return "#ffffff"
+
+
+def _create_rich_table(
+    shape: ShapeType,
+    shards: list[tuple[tuple[int, int], tuple[int, int], int]],
+    device_kind: str = "",
+    scale: float = 1.0,
+    min_width: int = 9,
+    max_width: int = 80,
+):
+    import matplotlib
+    import rich.align
+    import rich.box
+    import rich.console
+    import rich.padding
+    import rich.style
+    import rich.table
+
+    dtensor_height = shape[0]
+    dtensor_width = shape[1] if len(shape) == 2 else 1
+
+    row_ranges = sorted({s[0] for s in shards})
+    col_ranges = sorted({s[1] for s in shards})
+    num_rows, num_cols = len(row_ranges), len(col_ranges)
+
+    console = rich.console.Console(width=max_width)
+    use_color = console.color_system
+    color_iter = make_color_iter(matplotlib.colormaps["tab20b"], num_rows, num_cols)
+
+    base_height = int(10 * scale)
+    aspect_ratio = (shape[1] if len(shape) == 2 else 1) / shape[0]
+    base_width = int(base_height * aspect_ratio)
+    height_to_width_ratio = 2.5
+
+    table = rich.table.Table(
+        show_header=False,
+        show_lines=not use_color,
+        padding=0,
+        highlight=not use_color,
+        pad_edge=False,
+        box=rich.box.SQUARE if not use_color else None,
+    )
+    for row in range(num_rows):
+        table_row = []
+        for col in range(num_cols):
+            entry = (
+                device_kind
+                + ":"
+                + ",".join(
+                    [
+                        str(device_id)
+                        for row_range, col_range, device_id in shards
+                        if row_range == row_ranges[row] and col_range == col_ranges[col]
+                    ]
+                )
+            )
+            width = (col_ranges[col][1] - col_ranges[col][0]) / dtensor_width
+            width = int(width * base_width * height_to_width_ratio)
+            height = (row_ranges[row][1] - row_ranges[row][0]) / dtensor_height
+            height = int(height * base_height)
+            left_padding, remainder = divmod(width - len(entry) - 2, 2)
+            right_padding = left_padding + remainder
+            top_padding, remainder = divmod(height - 2, 2)
+            bottom_padding = top_padding + remainder
+            if use_color:
+                color = _canonicalize_color(next(color_iter)[:3])
+                text_color = _get_text_color(color)
+                top_padding += 1
+                bottom_padding += 1
+                left_padding += 1
+                right_padding += 1
+            else:
+                color = None
+                text_color = None
+            padding = (
+                max(top_padding, 0),
+                max(right_padding, 0),
+                max(bottom_padding, 0),
+                max(left_padding, 0),
+            )
+            table_row.append(
+                rich.padding.Padding(
+                    rich.align.Align(entry, "center", vertical="middle"),
+                    padding,
+                    style=rich.style.Style(bgcolor=color, color=text_color),
+                )
+            )
+        table.add_row(*table_row)
+    console.print(table, end="\n\n")
+
+
+def visualize_sharding(dtensor, header="", use_rich: bool = False):
+    """
+    Visualizes sharding in the terminal for :class:`DTensor` that are 1D or 2D.
+
+    .. note:: This requires the ``tabulate`` package, or ``rich`` and ``matplotlib``.
+              No sharding info will be printed for empty tensors
+    """
+    if dtensor.numel() == 0:  # Do not print empty dtensors.
+        return
+
+    if len(dtensor.shape) >= 3:
+        raise RuntimeError("visualize sharding supports only 1D or 2D DTensor")
+
+    if dtensor.device_mesh.get_coordinate() is None:  # current rank is not in the mesh
+        return
+
+    # Only display the visualization once for each DTensor, on the rank whose
+    # coordinate is 0 on all dimensions. For example, if the mesh is a full mesh,
+    # we will only print on rank 0.
+    local_rank_zero_on_all_dim = all(
+        dtensor.device_mesh.get_local_rank(mesh_dim=dim) == 0
+        for dim in range(dtensor.device_mesh.ndim)
+    )
+    if not local_rank_zero_on_all_dim:
+        return
+
+    device_coords = {
+        int(device_index.item()): list(coord)
+        for coord, device_index in np.ndenumerate(
+            np.array(dtensor.device_mesh.mesh.tolist())
+        )
+    }
+
+    device_shard_shape_and_offsets = {
+        device_index: _compute_local_shape_and_global_offset(
+            dtensor.shape,
+            dtensor.device_mesh.shape,
+            device_coords[device_index],
+            dtensor.placements,
+        )
+        for device_index in device_coords
+    }
+
+    # Extend shards in a 1D tensor to 2D
+    device_shard_shape_and_offsets = {
+        device_index: (
+            shape if len(shape) == 2 else (shape[0], 1),
+            offset if len(offset) == 2 else (offset[0], 0),
+        )
+        for device_index, (shape, offset) in device_shard_shape_and_offsets.items()
+    }
+
+    shards = [
+        (
+            (offset[0], offset[0] + shape[0] - 1),
+            (offset[1], offset[1] + shape[1] - 1),
+            device_index,
+        )
+        for device_index, (shape, offset) in device_shard_shape_and_offsets.items()
+    ]
+
+    if (
+        importlib.util.find_spec("rich")
+        and importlib.util.find_spec("matplotlib")
+        and use_rich
+    ):
+        _create_rich_table(
+            dtensor.shape, shards, device_kind=dtensor.device_mesh.device_type
+        )
+    elif importlib.util.find_spec("tabulate"):
+        print(_create_table(shards, device_kind=dtensor.device_mesh.device_type))
+    else:
+        raise ValueError("`visualize_sharding` requires either `rich` or `tabulate`.")
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..495ab31d5c7b1dc5b20fff5f593f05790f049ad0
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_attention.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_attention.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..82e1ef3353f66fedba14724b28c8f559b62eddce
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_attention.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_func_map.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_func_map.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a8aa5a1cb955203f534d10dfbf3757b22c5b4d1e
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_func_map.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_register_sharding.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_register_sharding.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..881f4bd589829173de9e0df78cf39751416a84c2
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_register_sharding.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_tp_transform.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_tp_transform.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b67f3fb4a2ec401e56f4ef08bc6109d028cfdc05
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/__pycache__/_tp_transform.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..009255631796fc56b6607ae46b6ae4f91589e83b
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__init__.py
@@ -0,0 +1,46 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+# Context Parallel components
+
+from ._attention import (
+    _CausalBehavior,
+    _context_parallel_shard,
+    _ContextParallel,
+    _cp_options,
+    _disable_context_parallel_dispatcher,
+    _enable_context_parallel_dispatcher,
+    _is_causal_behavior,
+    _RotateMethod,
+    context_parallel,
+    context_parallel_unshard,
+    set_rotate_method,
+)
+from ._cp_custom_ops import flex_cp_allgather
+from ._load_balancer import (
+    _HeadTailLoadBalancer,
+    _LoadBalancer,
+    _PerDocumentHeadTailLoadBalancer,
+    _PTRRLoadBalancer,
+)
+
+
+__all__ = [
+    # From _attention
+    "_CausalBehavior",
+    "_context_parallel_shard",
+    "_ContextParallel",
+    "_cp_options",
+    "_disable_context_parallel_dispatcher",
+    "_enable_context_parallel_dispatcher",
+    "_is_causal_behavior",
+    "_RotateMethod",
+    "context_parallel",
+    "context_parallel_unshard",
+    "set_rotate_method",
+    # From _cp_custom_ops
+    "flex_cp_allgather",
+    # From _load_balancer
+    "_HeadTailLoadBalancer",
+    "_LoadBalancer",
+    "_PerDocumentHeadTailLoadBalancer",
+    "_PTRRLoadBalancer",
+]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..62117b9b60a6a640d61620a5911f8da0cb7f4a75
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_attention.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_attention.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2be2e377c957e4ea77ea4c94c78a4b48220ad679
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_attention.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_cp_custom_ops.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_cp_custom_ops.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d12658c29bd985d2f059b5a567b6e032c969d4d4
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_cp_custom_ops.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_load_balancer.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_load_balancer.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef8bbb9401e59416684f75d84c9be2b6dcea6038
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_load_balancer.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_sharding_rules.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_sharding_rules.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f961b1c1d03a98bd4dbd137a10d73301d93a2248
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/__pycache__/_sharding_rules.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_attention.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a1c6299dfca4912736feddd818f33e1e618e8d9
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_attention.py
@@ -0,0 +1,1675 @@
+import contextlib
+import itertools
+import logging
+import types
+from abc import ABC, abstractmethod
+from collections.abc import Callable, Generator, Mapping, Sequence
+from dataclasses import dataclass
+from enum import auto, Enum
+from functools import partial
+from typing import Any, cast, Protocol, TypeAlias
+
+import torch
+import torch.distributed as dist
+import torch.distributed._functional_collectives as ft_c
+import torch.distributed.distributed_c10d as c10d
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.distributed.device_mesh import DeviceMesh
+from torch.distributed.tensor import distribute_tensor, DTensor, Shard
+from torch.distributed.tensor.parallel import ParallelStyle
+from torch.nn.attention.flex_attention import (
+    _mask_mod_signature,
+    BlockMask,
+    create_block_mask,
+)
+from torch.utils._pytree import tree_flatten, tree_unflatten
+
+from ._cp_custom_ops import flex_cp_allgather
+from ._load_balancer import _create_default_load_balancer, _LoadBalancer
+
+
+__all__ = [
+    "_CausalBehavior",
+    "_context_parallel_shard",
+    "_ContextParallel",
+    "_cp_options",
+    "_disable_context_parallel_dispatcher",
+    "_enable_context_parallel_dispatcher",
+    "_is_causal_behavior",
+    "_RotateMethod",
+    "context_parallel",
+    "context_parallel_unshard",
+    "set_rotate_method",
+]
+
+
+class _CausalBehavior(Enum):
+    SKIP = None
+    NOT_IS_CAUSAL = False
+    IS_CAUSAL = True
+
+
+class _RotateMethod(Enum):
+    ALL_TO_ALL = auto()
+    ALL_GATHER = auto()
+
+
+aten = torch.ops.aten
+logger = logging.getLogger(__name__)
+
+
+class _DispatchMode(Enum):
+    MONKEY_PATCH = auto()
+    MODULE_WRAPPER = auto()
+
+
+_dispatch_mode: _DispatchMode = _DispatchMode.MONKEY_PATCH
+
+
+@dataclass
+class _ContextParallelOptions:
+    # Whether to upcast parameters and gradients to float32 to avoid accumulation
+    # errors. It is likely this is always True, but we currently keep this variable
+    # for experimental purposes.
+    convert_to_f32: bool = True
+    enable_load_balance: bool = True
+    rotate_method: _RotateMethod = _RotateMethod.ALL_GATHER
+
+
+_cp_options = _ContextParallelOptions()
+
+
+def _is_causal_behavior(
+    rank: int, world_size: int, i: int, is_causal: bool
+) -> _CausalBehavior:
+    """
+    Calculate is_causal behavior for each KV block. The attention can either be
+    calculated in full, not at all or with the causal mask applied.
+    """
+    if not is_causal:
+        return _CausalBehavior.NOT_IS_CAUSAL
+
+    if i == 0:
+        return _CausalBehavior.IS_CAUSAL
+
+    source_rank = (rank - i) % world_size
+    if source_rank < rank or _cp_options.enable_load_balance:
+        return _CausalBehavior.NOT_IS_CAUSAL
+    else:
+        return _CausalBehavior.SKIP
+
+
+def _maybe_wait(tensor: torch.Tensor) -> torch.Tensor:
+    """
+    When tracing the code, the result tensor is not an AsyncCollectiveTensor,
+    so we cannot call ``wait()``.
+    """
+    if isinstance(tensor, ft_c.AsyncCollectiveTensor):
+        return tensor.wait()
+    return tensor
+
+
+def _partial_update(
+    original: torch.Tensor,
+    new: torch.Tensor,
+    dim: int,
+    n_chunks: int,
+    idx: int,
+    add: bool,
+) -> torch.Tensor:
+    """
+    This API partially updates a chunk of ``original`` tensor. The ``original``
+    tensor will be first chunked along ``dim`` dimension, then the ``idx`` chunk
+    will be updated with ``new``. If ``add`` is True, the chunk will be added
+    with ``new``, otherwise the chunk will be replaced by ``new``.
+
+    The result is a tensor that is the same size as ``original``.
+    """
+    chunks = list(original.chunk(n_chunks, dim=dim))
+    assert chunks[idx].shape == new.shape, (original.shape, new.shape, idx)
+    if add:
+        chunks[idx] += new
+    else:
+        chunks[idx] = new
+    return torch.cat(chunks, dim=dim)
+
+
+class _SDPAMerger:
+    """A class to help merge the local SDPA result."""
+
+    def __init__(self, convert_to_f32: bool, seq_dim: int):
+        self._seq_dim = seq_dim
+        self._out: torch.Tensor | None = None
+        self._lse: torch.Tensor | None = None
+        self._should_lse_squeeze = False
+        self._convert_to_f32 = convert_to_f32
+        self._out_dtype = torch.float32
+        self._lse_dtype = torch.float32
+
+    def _merge_one(
+        self, block_out: torch.Tensor, block_lse: torch.Tensor, partial: bool
+    ) -> None:
+        # The cuDNN backend preserves the last dimension for LSE.
+        # Apply unsqueeze only if the input does not already have
+        # the required dimensionality.
+        if len(block_lse.shape) < len(block_out.shape):
+            block_lse = block_lse.unsqueeze(dim=-1)
+            self._should_lse_squeeze = True
+        assert len(block_lse.shape) == len(block_out.shape)
+
+        if self._lse is None:
+            self._lse = block_lse
+            self._out = block_out
+        else:
+            ROUND_ROBIN_CYCLE = 2
+            assert self._lse is not None
+            assert self._out is not None
+            lse = (
+                self._lse.chunk(ROUND_ROBIN_CYCLE, dim=self._seq_dim)[1]
+                if partial
+                else self._lse
+            )
+            out = (
+                self._out.chunk(ROUND_ROBIN_CYCLE, dim=self._seq_dim)[1]
+                if partial
+                else self._out
+            )
+
+            # The algorithm from
+            # github.com/zhuzilin/ring-flash-attention/pull/34#issuecomment-2076126795
+            # gives a relatively stable result.
+            out = out - F.sigmoid(block_lse - lse) * (out - block_out)
+            lse = lse - F.logsigmoid(lse - block_lse)
+            if partial:
+                self._lse = _partial_update(
+                    self._lse,
+                    lse,
+                    dim=self._seq_dim,
+                    n_chunks=ROUND_ROBIN_CYCLE,
+                    idx=1,
+                    add=False,
+                )
+                self._out = _partial_update(
+                    self._out,
+                    out,
+                    dim=self._seq_dim,
+                    n_chunks=ROUND_ROBIN_CYCLE,
+                    idx=1,
+                    add=False,
+                )
+            else:
+                self._lse = lse
+                self._out = out
+
+    def step(self, out: torch.Tensor, lse: torch.Tensor, partial: bool) -> None:
+        self._out_dtype = out.dtype
+        self._lse_dtype = lse.dtype
+
+        if self._convert_to_f32:
+            out = out.to(torch.float32)
+            lse = lse.to(torch.float32)
+
+        self._merge_one(out, lse, partial)
+
+    def results(self) -> tuple[torch.Tensor, torch.Tensor]:
+        assert self._out is not None
+        assert self._lse is not None
+        out = self._out.to(self._out_dtype)
+        if self._should_lse_squeeze:
+            lse = self._lse.squeeze(-1).to(self._lse_dtype)
+        else:
+            lse = self._lse.to(self._lse_dtype)
+        return out, lse
+
+
+class _AttentionOp(Protocol):
+    def __call__(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        **kwargs: object,
+    ) -> tuple[torch.Tensor, ...]: ...
+
+
+class _RingRotater(ABC):
+    @abstractmethod
+    def __init__(self, pg: dist.ProcessGroup, seq_dim: int) -> None: ...
+
+    @abstractmethod
+    def exchange_buffers(self, curr_buffer: torch.Tensor) -> None: ...
+
+    @abstractmethod
+    def next_buffer(self) -> torch.Tensor: ...
+
+
+class _AllToAllRotater(_RingRotater):
+    """Use all_to_all to send the kv to the next rank."""
+
+    def __init__(self, pg: dist.ProcessGroup, seq_dim: int) -> None:
+        self._pg = pg
+        self._seq_dim = seq_dim
+        self._buffer: torch.Tensor | None = None
+
+    def exchange_buffers(self, curr_buffer: torch.Tensor) -> None:
+        curr_buffer = curr_buffer.contiguous()
+        size = dist.get_world_size(self._pg)
+        dsts = list(range(1, size)) + [0]
+        self._buffer = ft_c.permute_tensor(curr_buffer, dsts, self._pg)
+
+    def next_buffer(self) -> torch.Tensor:
+        assert self._buffer is not None
+        return _maybe_wait(self._buffer)
+
+
+class _AllGatherRotater(_RingRotater):
+    """
+    Allgather the kv and return only the required kv.
+    Only one communication will be done.
+    """
+
+    def __init__(self, pg: dist.ProcessGroup, seq_dim: int) -> None:
+        self._pg = pg
+        self._seq_dim = seq_dim
+        self._aggregated_buffer: torch.Tensor | None = None
+        self._idx = 0
+
+    def exchange_buffers(self, curr_buffer: torch.Tensor) -> None:
+        # We only need to perform allgather once.
+        self._idx += 1
+        if self._aggregated_buffer is None:
+            self._aggregated_buffer = ft_c.all_gather_tensor(
+                curr_buffer.contiguous(), gather_dim=0, group=self._pg
+            )
+
+    def next_buffer(self) -> torch.Tensor:
+        rank = dist.get_rank(self._pg)
+        idx = rank - self._idx
+
+        assert self._aggregated_buffer is not None
+        self._aggregated_buffer = _maybe_wait(self._aggregated_buffer)
+        return self._aggregated_buffer.chunk(dist.get_world_size(self._pg))[idx]
+
+
+def _create_rotater(
+    pg: dist.ProcessGroup, seq_dim: int, method: _RotateMethod | None = None
+) -> _RingRotater:
+    if method is None:
+        method = _cp_options.rotate_method
+
+    if method == _RotateMethod.ALL_TO_ALL:
+        return _AllToAllRotater(pg, seq_dim)
+    elif method == _RotateMethod.ALL_GATHER:
+        return _AllGatherRotater(pg, seq_dim)
+    else:
+        raise NotImplementedError(f"Unknown method {method}")
+
+
+def _templated_ring_attention(
+    group: dist.ProcessGroup,
+    seq_dim: int,
+    op: _AttentionOp,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    is_causal: bool = False,
+    **kwargs: object,
+) -> tuple[torch.Tensor, ...]:
+    """
+    A generalized ring attention implementation that can support multiple attention ops.
+
+    Note [Context parallelism load balance algorithm for causal masking]
+    =====================
+    This explanation uses an example to illustrate the CP algorithm with causal
+    masking.
+
+    Consider a scenario where the sequence length of q, k, and v is 4 (e.g.,
+    q = (q0, q1, q2, q3)), and there are two ranks. For simplicity, we will discuss
+    only q and k, as v follows the same pattern as k.
+
+    The diagram below represents a complete QK^T operation without parallelism.
+    The `****` entries indicate that the result is not required due to causal
+    masking (e.g., q0k1 is marked as `****`).
+
+    +----+------------------------+
+    |    |  k0    k1   k2     k3  |
+    +----+------------------------+
+    | q0 | q0k0, ****, ****, **** |
+    | q1 | q1k0, q1k1, ****, **** |
+    | q2 | q2k0, q2k1, q2k2, **** |
+    | q3 | q3k0, q3k1, q3k2, q3k3 |
+    +----+------------------------+
+
+    ### No Load Balance:
+
+    In this scenario, each rank owns a local chunk of q, k, and v, with each chunk
+    containing two elements. Rank0 is responsible for managing (q0, q1) and (k0, k1),
+    while rank1 manages (q2, q3) and (k2, k3).
+
+    First Iteration: Both rank0 and rank1 perform SDPA with their local qkv pairs.
+    Causal masking is enabled as some results are not required (e.g., q0k1).
+
+    Second Iteration: Local queries remain the same, but local kv pairs are exchanged.
+    Rank0 now has (q0, q1) and (k2, k3); rank1 has (q2, q3) and (k0, k1). Rank0 performs
+    no computation, while rank1 computes locally without causal masking since all results
+    (q2k0, q2k1, q3k0, q3k1) are needed.
+
+    ### Round-robin Load Balance:
+
+    In this setup, each rank owns two local chunks of q, k, and v, with each chunk
+    containing one element. Rank0 manages (q0, q3) and (k0, k3); Rank1 manages (q1, q2)
+    and (k1, k2). Although the local chunks are not consecutive, they are concatenated to
+    enable SDPA to be performed in a single call for each step. Consequently, the chunk()
+    function may be required to prepare the correct q, k, and v configurations.
+
+    First Iteration: Both ranks perform SDPA with their local qkv pairs, similar to the
+    no-load-balance case. This iteration corresponds to the `if` of the
+    (`if, `elif`, `else`) in the implementation.
+
+    Second Iteration: Rank0 now has (q0, q3) and (k1, k2); rank1 has (q1, q2) and
+    (k0, k3). For rank0, no computation is needed for q0. However, computations for
+    q3k1 and q3k2 are required, so only q3 is used for SDPA. This corresponds to the
+    `else` of the (`if`, `elif`, `else`) in the implementation.
+    For rank1, k3 is not needed for q1 and q2, so only k0 is used for SDPA. This
+    corresponds to the `elif` of (`if`, `elif`, `else`) in the implementation.
+
+    Parameters
+    ----------
+    op:
+        The attention op to use
+    *args:
+        additional args are passed to the op
+    **kwargs:
+        additional kwargs are passed to the op
+
+    Returns
+    -------
+    out:
+        The merged attention output
+    softmax_lse:
+        The logsumexp of the merged attention output
+    """
+    if is_causal and (query.size(2) != key.size(2)):
+        raise NotImplementedError(
+            "is_causal requires the same query and context sequence lengths"
+        )
+    if not is_causal and _cp_options.enable_load_balance:
+        raise RuntimeError("Load balancing requires `is_causal=True`.")
+
+    assert isinstance(group, dist.ProcessGroup), (
+        "process group must be single dimension"
+    )
+    rank = dist.get_rank(group)
+    size = dist.get_world_size(group)
+
+    next_kv = None
+
+    # Without making key and value contiguous(), the loss curve is bad.
+    # TODO(fegin): figure out why this is a requirement since SDPA does not have
+    # this requirement.
+    key = key.contiguous()
+    value = value.contiguous()
+
+    sdpa_merger = _SDPAMerger(_cp_options.convert_to_f32, seq_dim=seq_dim)
+
+    rest: list[Any]
+    out: torch.Tensor
+    logsumexp: torch.Tensor
+
+    rotater = _create_rotater(group, 2)
+
+    for i in range(size):
+        if i > 0:
+            # Wait for the kv from the (cp_rank - 1) rank.
+            next_kv = rotater.next_buffer()
+            key = next_kv[: key.numel()].reshape(key.shape)
+            value = next_kv[key.numel() :].reshape(value.shape)
+
+        if i < (size - 1):
+            # Send the k, v to the next rank
+            next_kv = torch.cat([key.flatten(), value.flatten()])
+            next_kv = rotater.exchange_buffers(next_kv)
+
+        is_causal_behavior = _is_causal_behavior(
+            rank=rank, world_size=size, i=i, is_causal=is_causal
+        )
+
+        # For a detailed understanding of the load balancing algorithm, see
+        # Note [Context parallelism load balance algorithm for causal masking]
+        if is_causal_behavior == _CausalBehavior.SKIP:
+            # If i > rank and load balancing is not turned on.
+            continue
+
+        if i == 0 or (not _cp_options.enable_load_balance or not is_causal):
+            # When local balance is enabled, we still need to do SDPA with
+            # the both local chunks of q, k, v for the first iteration.
+            q, k, v, partial = (query, key, value, False)
+        elif i <= rank:
+            # Round-robin load balancing case, and i <= rank.
+            # We need to do SDPA with only the first local chunk of k, v.
+            # Note that q, k, v each contains two local chunks.
+            ROUND_ROBIN_CYCLE = 2
+            q, k, v, partial = (
+                query,
+                key.chunk(ROUND_ROBIN_CYCLE, dim=2)[0],
+                value.chunk(ROUND_ROBIN_CYCLE, dim=2)[0],
+                False,
+            )
+        else:
+            # Round-robin load balancing case, and i > rank.
+            # We need to do SDPA with only the second half of q, and update
+            # only the second part of logsumexp. So partial is True.
+            # Note that q, k, v each contains two chunks.
+            q, k, v, partial = query.chunk(2, dim=2)[1], key, value, True
+
+        # See https://github.com/pytorch/pytorch/blob/release/2.4/aten/src/ATen/native/native_functions.yaml#L14695
+        # for the SDPA kernel definitions.
+        out, logsumexp, *rest = op(
+            q,
+            k,
+            v,
+            is_causal=is_causal_behavior.value,
+            **kwargs,
+        )
+        sdpa_merger.step(out, logsumexp, partial)
+
+    # pyrefly: ignore [unbound-name]
+    return *sdpa_merger.results(), *rest
+
+
+def _templated_ring_attention_backward(
+    group: dist.ProcessGroup,
+    seq_dim: int,
+    op: _AttentionOp,
+    grad_out: torch.Tensor,
+    grad_out_name: str,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    out: torch.Tensor,
+    logsumexp: torch.Tensor,
+    is_causal: bool,
+    **kwargs: Any,
+) -> tuple[torch.Tensor, ...]:
+    """This API implements the backward pass of the ring attention."""
+    if not is_causal and _cp_options.enable_load_balance:
+        raise RuntimeError("Load balancing requires `is_causal=True`.")
+    rank = dist.get_rank(group)
+    size = dist.get_world_size(group)
+    next_kv = None
+    next_grad_kv = None
+    rest: list[Any]
+    grad_query_, grad_key_, grad_value_ = None, None, None
+
+    accum_dtype = torch.float32 if _cp_options.convert_to_f32 else query.dtype
+    grad_query = torch.zeros_like(query, dtype=accum_dtype)
+    grad_key = torch.zeros_like(key, dtype=accum_dtype)
+    grad_value = torch.zeros_like(value, dtype=accum_dtype)
+
+    key = key.contiguous()
+    value = value.contiguous()
+    kv_rotater = _create_rotater(group, 2)
+    dkv_rotater = _create_rotater(group, 2, method=_RotateMethod.ALL_TO_ALL)
+    for i in range(size):
+        if i > 0:
+            # Wait for the kv from the (cp_rank - 1) rank.
+            buffer = kv_rotater.next_buffer()
+            pointer = 0
+            key = buffer[pointer : pointer + key.numel()].reshape(key.shape)
+            pointer += key.numel()
+            value = buffer[pointer : pointer + value.numel()].reshape(value.shape)
+            pointer += value.numel()
+
+        if i != size - 1:
+            # Send the kv to the next rank.
+            next_kv = torch.cat([key.flatten(), value.flatten()])
+            kv_rotater.exchange_buffers(next_kv)
+
+        is_causal_behavior = _is_causal_behavior(
+            rank=rank, world_size=size, i=i, is_causal=is_causal
+        )
+
+        if is_causal_behavior != _CausalBehavior.SKIP:
+            if i == 0 or (not _cp_options.enable_load_balance or not is_causal):
+                # We need to do SDPA with the full local q, k, v.
+                q, k, v, out_, dout, lse = (query, key, value, out, grad_out, logsumexp)
+            elif i <= rank:
+                # Round-robin load balancing case, and i <= rank.
+                # We need to do SDPA with only the first half of k, v.
+                # Note that q, k, v each contains two chunks.
+                q, k, v, out_, dout, lse = (
+                    query,
+                    key.chunk(2, dim=seq_dim)[0],
+                    value.chunk(2, dim=seq_dim)[0],
+                    out,
+                    grad_out,
+                    logsumexp,
+                )
+            else:
+                # Round-robin load balancing case, and i > rank.
+                # We need to do SDPA with only the second half of q.
+                # Note that q, k, v each contains two chunks.
+                q, k, v, out_, dout, lse = (
+                    query.chunk(2, dim=seq_dim)[1],
+                    key,
+                    value,
+                    out.chunk(2, dim=seq_dim)[1],
+                    grad_out.chunk(2, dim=seq_dim)[1],
+                    # Need to make logsumexp contiguous, otherwise there will
+                    # be numerical error.
+                    logsumexp.chunk(2, dim=seq_dim)[1].contiguous(),
+                )
+
+            kwargs[grad_out_name] = dout
+            # See https://github.com/pytorch/pytorch/blob/release/2.4/aten/src/ATen/native/native_functions.yaml#L14695
+            # for the SDPA kernel definitions.
+            grad_query_, grad_key_, grad_value_, *rest = op(
+                query=q,
+                key=k,
+                value=v,
+                out=out_,
+                logsumexp=lse,
+                is_causal=is_causal_behavior.value,
+                **kwargs,
+            )
+        else:
+            grad_query_ = torch.zeros_like(query, dtype=accum_dtype)
+            grad_key_ = torch.zeros_like(key, dtype=accum_dtype)
+            grad_value_ = torch.zeros_like(value, dtype=accum_dtype)
+
+        ROUND_ROBIN_CYCLE = 2
+        if i == 0:
+            grad_key += grad_key_
+            grad_value += grad_value_
+        else:
+            pointer = 0
+            # Wait for the kv gradient from (cp_rank - 1) rank.
+            next_grad_kv = dkv_rotater.next_buffer()
+            grad_key = next_grad_kv[pointer : pointer + grad_key.numel()].reshape(
+                grad_key.shape
+            )
+            pointer += grad_key.numel()
+            grad_value = next_grad_kv[pointer : pointer + grad_value.numel()].reshape(
+                grad_value.shape
+            )
+
+            if i <= rank and _cp_options.enable_load_balance:
+                grad_key = _partial_update(
+                    grad_key,
+                    grad_key_,
+                    dim=seq_dim,
+                    n_chunks=ROUND_ROBIN_CYCLE,
+                    idx=0,
+                    add=True,
+                )
+                grad_value = _partial_update(
+                    grad_value,
+                    grad_value_,
+                    dim=seq_dim,
+                    n_chunks=ROUND_ROBIN_CYCLE,
+                    idx=0,
+                    add=True,
+                )
+            else:
+                grad_key += grad_key_
+                grad_value += grad_value_
+
+        next_grad_kv = torch.cat([grad_key.flatten(), grad_value.flatten()])
+        # Send the grad key and grad value to the next rank.
+        dkv_rotater.exchange_buffers(next_grad_kv)
+
+        if i <= rank or not _cp_options.enable_load_balance:
+            grad_query += grad_query_
+        else:
+            grad_query = _partial_update(
+                grad_query,
+                grad_query_,
+                dim=seq_dim,
+                n_chunks=ROUND_ROBIN_CYCLE,
+                idx=1,
+                add=True,
+            )
+
+    assert grad_key_ is not None
+    assert grad_value_ is not None
+    grad_query = grad_query.to(query.dtype)
+    next_grad_kv = dkv_rotater.next_buffer().to(key.dtype)
+    grad_key = next_grad_kv[: grad_key.numel()].reshape(grad_key.shape)
+    grad_value = next_grad_kv[grad_key.numel() :].reshape(grad_value.shape)
+    return (
+        grad_query,
+        grad_key,
+        grad_value,
+        # pyrefly: ignore [unbound-name]
+        *rest,
+    )
+
+
+def _scaled_dot_product_ring_flash_attention(
+    mesh: DeviceMesh,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    dropout_p: float = 0.0,
+    is_causal: bool = False,
+    return_debug_mask: bool = False,
+    *,
+    scale: float | None = None,
+) -> tuple[torch.Tensor, ...]:
+    if return_debug_mask:
+        raise NotImplementedError("return_debug_mask is not supported yet")
+
+    # TODO: remove this hardcoding
+    seq_dim = 2
+    group = mesh.get_group()
+    return _templated_ring_attention(
+        group,
+        seq_dim,
+        aten._scaled_dot_product_flash_attention,
+        query=query,
+        key=key,
+        value=value,
+        is_causal=is_causal,
+        dropout_p=dropout_p,
+        scale=scale,
+    )
+
+
+def _scaled_dot_product_ring_efficient_attention(
+    mesh: DeviceMesh,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    attn_bias: torch.Tensor | None = None,
+    compute_log_sumexp: bool = True,
+    dropout_p: float = 0.0,
+    is_causal: bool = False,
+    *,
+    scale: float | None = None,
+) -> tuple[torch.Tensor, ...]:
+    if attn_bias is not None:
+        raise NotImplementedError("attn_bias is not supported yet")
+
+    if not compute_log_sumexp:
+        # CP requires compute_log_sumexp to be True because it always merges LSE
+        compute_log_sumexp = True
+
+    # TODO: remove this hardcoding
+    seq_dim = 2
+    group = mesh.get_group()
+    return _templated_ring_attention(
+        group,
+        seq_dim,
+        aten._scaled_dot_product_efficient_attention,
+        query=query,
+        key=key,
+        value=value,
+        is_causal=is_causal,
+        attn_bias=attn_bias,
+        dropout_p=dropout_p,
+        scale=scale,
+        compute_log_sumexp=compute_log_sumexp,
+    )
+
+
+def _scaled_dot_product_ring_cudnn_attention(
+    mesh: DeviceMesh,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    attn_bias: torch.Tensor | None = None,
+    compute_log_sumexp: bool = True,
+    dropout_p: float = 0.0,
+    is_causal: bool = False,
+    return_debug_mask: bool = False,
+    *,
+    scale: float | None = None,
+) -> tuple[torch.Tensor, ...]:
+    if attn_bias is not None:
+        raise NotImplementedError("attn_bias is not supported yet")
+
+    if not compute_log_sumexp:
+        # CP requires compute_log_sumexp to be True because it always merges LSE
+        compute_log_sumexp = True
+
+    # TODO: remove this hardcoding
+    seq_dim = 2
+    group = mesh.get_group()
+    return _templated_ring_attention(
+        group,
+        seq_dim,
+        aten._scaled_dot_product_cudnn_attention,
+        query=query,
+        key=key,
+        value=value,
+        attn_bias=attn_bias,
+        compute_log_sumexp=compute_log_sumexp,
+        dropout_p=dropout_p,
+        is_causal=is_causal,
+        return_debug_mask=return_debug_mask,
+        scale=scale,
+    )
+
+
+def _scaled_dot_product_ring_flash_attention_backward(
+    mesh: DeviceMesh,
+    grad_out: torch.Tensor,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    out: torch.Tensor,
+    logsumexp: torch.Tensor,
+    cum_seq_q: torch.Tensor,
+    cum_seq_k: torch.Tensor,
+    max_q: int,
+    max_k: int,
+    dropout_p: float,
+    is_causal: bool,
+    philox_seed: torch.Tensor,
+    philox_offset: torch.Tensor,
+    *,
+    scale: float | None = None,
+) -> tuple[torch.Tensor, ...]:
+    # TODO: remove this hardcoding
+    seq_dim = 2
+    group = mesh.get_group()
+    return _templated_ring_attention_backward(
+        group,
+        seq_dim,
+        aten._scaled_dot_product_flash_attention_backward.default,
+        grad_out=grad_out,
+        grad_out_name="grad_out",
+        query=query,
+        key=key,
+        value=value,
+        out=out,
+        logsumexp=logsumexp,
+        is_causal=is_causal,
+        cum_seq_q=cum_seq_q,
+        cum_seq_k=cum_seq_k,
+        max_q=max_q,
+        max_k=max_k,
+        dropout_p=dropout_p,
+        philox_seed=philox_seed,
+        philox_offset=philox_offset,
+        scale=scale,
+    )
+
+
+def _scaled_dot_product_ring_efficient_attention_backward(
+    mesh: DeviceMesh,
+    grad_out: torch.Tensor,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    bias: torch.Tensor,
+    out: torch.Tensor,
+    logsumexp: torch.Tensor,
+    philox_seed: torch.Tensor,
+    philox_offset: torch.Tensor,
+    dropout_p: float,
+    grad_input_mask: tuple[bool, ...],
+    is_causal: bool = False,
+    *,
+    scale: float | None = None,
+) -> tuple[torch.Tensor, ...]:
+    # TODO: remove this hardcoding
+    seq_dim = 2
+    group = mesh.get_group()
+    return _templated_ring_attention_backward(
+        group,
+        seq_dim,
+        aten._scaled_dot_product_efficient_attention_backward.default,
+        grad_out=grad_out,
+        grad_out_name="grad_out_",
+        query=query,
+        key=key,
+        value=value,
+        attn_bias=bias,
+        out=out,
+        logsumexp=logsumexp,
+        philox_seed=philox_seed,
+        philox_offset=philox_offset,
+        dropout_p=dropout_p,
+        grad_input_mask=grad_input_mask,
+        is_causal=is_causal,
+        scale=scale,
+    )
+
+
+def _scaled_dot_product_ring_cudnn_attention_backward(
+    mesh: DeviceMesh,
+    grad_out: torch.Tensor,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    out: torch.Tensor,
+    logsumexp: torch.Tensor,
+    philox_seed: torch.Tensor,
+    philox_offset: torch.Tensor,
+    attn_bias: torch.Tensor,
+    cum_seq_q: torch.Tensor,
+    cum_seq_k: torch.Tensor,
+    max_q: int,
+    max_k: int,
+    dropout_p: float,
+    is_causal: bool,
+    *,
+    scale: float | None = None,
+) -> tuple[torch.Tensor, ...]:
+    # TODO: remove this hardcoding
+    seq_dim = 2
+    group = mesh.get_group()
+    return _templated_ring_attention_backward(
+        group,
+        seq_dim,
+        aten._scaled_dot_product_cudnn_attention_backward.default,
+        grad_out=grad_out,
+        grad_out_name="grad_out",
+        query=query,
+        key=key,
+        value=value,
+        out=out,
+        logsumexp=logsumexp,
+        philox_seed=philox_seed,
+        philox_offset=philox_offset,
+        attn_bias=attn_bias,
+        cum_seq_q=cum_seq_q,
+        cum_seq_k=cum_seq_k,
+        max_q=max_q,
+        max_k=max_k,
+        dropout_p=dropout_p,
+        is_causal=is_causal,
+        scale=scale,
+    )
+
+
+def _sdpa_handler(
+    op_call: torch._ops.OpOverload,
+    args: tuple[object, ...],
+    kwargs: dict[str, object],
+) -> object:
+    # extract local tensor and sharding infos to a OpInfo
+    op_info = DTensor._op_dispatcher.unwrap_to_op_info(op_call, args, kwargs)
+    logger.debug("Dispatching op_call: %s", op_info.schema)
+
+    # sharding propagation
+    # TODO: remove the context parallel strategy from the default propagation
+    # rule. Either figure out how to dynamically enable it or just don't call
+    # propagate.
+    DTensor._op_dispatcher.sharding_propagator.propagate(op_info)
+    output_sharding = op_info.output_sharding
+    assert output_sharding is not None, "output sharding should not be None"
+    assert not output_sharding.needs_redistribute, "inputs need to be redistributed"
+
+    call_maps: dict[torch._ops.OpOverload, Callable] = {
+        aten._scaled_dot_product_flash_attention.default: _scaled_dot_product_ring_flash_attention,
+        aten._scaled_dot_product_efficient_attention.default: _scaled_dot_product_ring_efficient_attention,
+        aten._scaled_dot_product_cudnn_attention.default: _scaled_dot_product_ring_cudnn_attention,
+        aten._scaled_dot_product_flash_attention_backward.default: _scaled_dot_product_ring_flash_attention_backward,
+        aten._scaled_dot_product_efficient_attention_backward.default: _scaled_dot_product_ring_efficient_attention_backward,
+        aten._scaled_dot_product_cudnn_attention_backward.default: _scaled_dot_product_ring_cudnn_attention_backward,
+    }
+    if op_call in call_maps:
+        local_results = call_maps[op_call](
+            op_info.compute_mesh,
+            *op_info.local_args,  # type: ignore[arg-type]
+            **op_info.local_kwargs,  # type: ignore[arg-type]
+        )
+    else:
+        raise NotImplementedError(
+            "CP only supports flash attention and memory efficient attention now."
+        )
+
+    return DTensor._op_dispatcher.wrap(local_results, output_sharding.output_spec)
+
+
+custom_ops = {
+    aten._scaled_dot_product_flash_attention.default: _sdpa_handler,
+    aten._scaled_dot_product_flash_attention_backward.default: _sdpa_handler,
+    aten._scaled_dot_product_efficient_attention.default: _sdpa_handler,
+    aten._scaled_dot_product_efficient_attention_backward.default: _sdpa_handler,
+    aten._scaled_dot_product_cudnn_attention.default: _sdpa_handler,
+    aten._scaled_dot_product_cudnn_attention_backward.default: _sdpa_handler,
+}
+exitsing_custom_ops = DTensor._op_dispatcher._custom_op_handlers
+
+
+ArgsType = tuple[Any, ...]
+KwargsType = dict[str, Any]
+InputFnType = Callable[[nn.Module | None, ArgsType, KwargsType, DeviceMesh], Any]
+OutputFnType = Callable[[nn.Module | None, Any, Any, DeviceMesh], Any]
+
+_replaced_functions: dict[Callable, tuple[str, Callable]] = {}
+
+
+def _distribute_function(
+    fn: Callable,
+    fn_module: types.ModuleType,
+    device_mesh: DeviceMesh,
+    input_fn: InputFnType,
+    output_fn: OutputFnType,
+) -> None:
+    """
+    A helper function to replace a function with a distributed version by
+    using the monkey patching approach.
+
+    This function is for the CP internal usage only.
+    """
+
+    def wrapper(
+        target_fn: Callable, input_fn: InputFnType, output_fn: OutputFnType
+    ) -> Callable:
+        def inner_fn(*args: ArgsType, **kwargs: KwargsType) -> Any:
+            args, kwargs = input_fn(None, args, kwargs, device_mesh)
+            outputs = target_fn(*args, **kwargs)
+            return output_fn(None, (args, kwargs), outputs, device_mesh)
+
+        return inner_fn
+
+    global _replaced_functions
+
+    if fn in _replaced_functions:
+        return
+
+    wrapper_fn = wrapper(fn, input_fn, output_fn)
+    setattr(fn_module, fn.__name__, wrapper_fn)
+    _replaced_functions[wrapper_fn] = (fn.__name__, fn)
+
+
+def _restore_function(fn: Callable, fn_module: types.ModuleType) -> None:
+    """Restore the function that is replaced by _distribute_function."""
+    if fn not in _replaced_functions:
+        return
+
+    original_name, original_fn = _replaced_functions[fn]
+    setattr(fn_module, original_name, original_fn)
+
+
+def _enable_cp_dtensor_dispatcher() -> None:
+    """Enables DTensor dispatcher to dispatch SDPA to CP."""
+    # Enable custom op handlers for CP
+    DTensor._op_dispatcher._custom_op_handlers = {
+        **exitsing_custom_ops,
+        **custom_ops,
+    }
+    # Register CP-specific sharding rules
+    from ._sharding_rules import register_cp_sharding_rules
+
+    register_cp_sharding_rules()
+
+
+def _disable_cp_dtensor_dispatcher() -> None:
+    """Disables DTensor dispatcher to dispatch SDPA to CP."""
+    # Restore original custom op handlers
+    DTensor._op_dispatcher._custom_op_handlers = exitsing_custom_ops
+
+    # TODO: unregister_cp_sharding_rules(clear_the_cache=True) will cause
+    # all DTensor sharding propagation cache being invalidated. It is not
+    # easy to achieve selectively invalidating lru cache without rewriting
+    # the sharding propagation wrapper.
+
+    from ._sharding_rules import unregister_cp_sharding_rules
+
+    unregister_cp_sharding_rules(clear_the_cache=False)
+
+
+def _enable_context_parallel_dispatcher_impl(seq_dim: int, mesh: DeviceMesh) -> None:
+    sdpa_cp = _ContextParallel(
+        seq_dim=seq_dim,
+        attention_type=_ContextParallel.AttentionType.SDPA,
+    )
+
+    if _dispatch_mode == _DispatchMode.MONKEY_PATCH:
+        _distribute_function(
+            F.scaled_dot_product_attention,
+            F,
+            mesh,
+            sdpa_cp.sdpa_input_fn,
+            sdpa_cp.sdpa_output_fn,
+        )
+        _enable_cp_dtensor_dispatcher()
+    elif _dispatch_mode == _DispatchMode.MODULE_WRAPPER:
+        _enable_cp_dtensor_dispatcher()
+    else:
+        raise ValueError(f"Unknown dispatch mode: {_dispatch_mode}")
+
+
+def _disable_context_parallel_dispatcher_impl() -> None:
+    if _dispatch_mode == _DispatchMode.MONKEY_PATCH:
+        _restore_function(F.scaled_dot_product_attention, F)
+    elif _dispatch_mode == _DispatchMode.MODULE_WRAPPER:
+        pass
+    else:
+        raise NotImplementedError(f"Unknown dispatch mode: {_dispatch_mode}")
+
+    _disable_cp_dtensor_dispatcher()
+
+
+_compiled_create_block_mask = None
+
+
+def _context_parallel_buffers(
+    mesh: DeviceMesh,
+    buffers: list[torch.Tensor | BlockMask],
+    buffer_seq_dims: list[int],
+    load_balancer: _LoadBalancer | None = None,
+) -> list[torch.Tensor | BlockMask]:
+    """
+    Shard the buffers along the sequence dimensions according to CP rules.
+    Args:
+        mesh (:class:`DeviceMesh`): the device mesh for the context parallelism.
+        buffers (List[torch.Tensor]): the buffers to be sharded.
+        seq_dims (List[int]): the sequence dimensions of ``buffers``. This list
+            must have the same length as ``buffers``.
+        load_balancer (Optional[:class:`_LoadBalancer`]): an optional `_LoadBalancer`
+            object. If this argument is `None`, it means the `buffers` need no
+            rearrangement before being sharded. If this argument is a `_LoadBalancer`
+            object, call its `_generate_indices(restore=False)` to generate the
+            rearrangement indices such that each shard of `buffer[rearrange_idx]` is
+            well-balanced (i.e., having close sparsities).
+
+    Returns:
+        List[torch.Tensor]: the sharded buffers.
+
+    Note:
+        For `_context_parallel_shard` we require a non-None `load_balancer` object to be
+        explicitly passed if load-balancing is needed.
+    """
+    # generate the index tensor for rearranging the buffer if a load-balance
+    # is available
+    load_balance_indices = load_balancer._generate_indices() if load_balancer else None
+    assert load_balance_indices is None or load_balance_indices.ndim == 2, (
+        "load balance index expects shape (1, seq_len) or (B, seq_len) "
+        f"but got {load_balance_indices.shape}."
+    )
+
+    new_buffers = []
+    sharded_buffer: torch.Tensor | BlockMask
+    for buffer, seq_dim in zip(buffers, buffer_seq_dims):
+        if isinstance(buffer, torch.Tensor):
+            # TODO: the load balance doesn't perform error handling.
+
+            # NOTE: assuming batch dim is 0
+
+            if load_balance_indices is not None:
+                # TODO: we should expclitly ask users to unsqueeze the batch dim.
+                # But this is a BC breaking ask.
+                # However, what we have done today is also not very safe.
+                idx_batch_size = load_balance_indices.size(0)
+                data_batch_size = buffer.size(0) if seq_dim > 0 else 1
+
+                if idx_batch_size != 1 and idx_batch_size != data_batch_size:
+                    raise ValueError(
+                        "Cannot rearrange buffer: "
+                        f"load_balance_indices has shape {load_balance_indices.shape}, "
+                        f"but buffer has shape {buffer.shape}."
+                    )
+
+                if seq_dim == 0:
+                    buffer = torch.index_select(
+                        buffer, dim=0, index=load_balance_indices[0]
+                    )
+                else:
+                    indices = load_balance_indices
+                    if idx_batch_size == 1:
+                        size = [data_batch_size] + list(indices.size())[1:]
+                        indices = indices.expand(*size)
+
+                    for i in range(data_batch_size):
+                        buffer[i] = torch.index_select(
+                            buffer[i], dim=seq_dim - 1, index=indices[i]
+                        )
+
+            # use DTensor to shard the buffer on sequence dimension, retain the local tensor
+            sharded_buffer = distribute_tensor(
+                buffer, mesh, [Shard(seq_dim)], src_data_rank=None
+            ).to_local()
+        elif isinstance(buffer, BlockMask):
+            sharded_buffer = _create_cp_block_mask(
+                mask_mod=buffer.mask_mod,
+                B=buffer.kv_num_blocks.shape[0],
+                H=buffer.kv_num_blocks.shape[1],
+                Q_LEN=buffer.seq_lengths[0],
+                KV_LEN=buffer.seq_lengths[1],
+                device_mesh=mesh,
+                load_balancer=load_balancer,
+            )
+        else:
+            raise ValueError(f"Unknown buffer type: {type(buffer)}")
+
+        new_buffers.append(sharded_buffer)
+
+    return new_buffers
+
+
+def _create_cp_block_mask(
+    mask_mod: _mask_mod_signature,
+    B: int,
+    H: int,
+    Q_LEN: int,
+    KV_LEN: int,
+    device_mesh: DeviceMesh,
+    load_balancer: _LoadBalancer | None = None,
+) -> BlockMask:
+    """
+    Creates a specialized BlockMask for Context Parallel FlexAttention.
+
+    This function creates a BlockMask that enables computation of attention results
+    for sharded Q attending to global KV. The mask appropriately handles the query
+    index offset required when each rank operates on a shard of the query sequence
+    while accessing the full key-value sequence.
+
+    The function internally rewrites the provided mask_mod function to translate local
+    query indices to global query indices, ensuring that the masking logic is applied
+    correctly across the distributed computation.
+
+    Args:
+        mask_mod (Callable): Mask function that operates on global attention indices.
+        B (int): Batch size.
+        H (int): Number of query heads.
+        Q_LEN (int): Global sequence length of the query.
+        KV_LEN (int): Global sequence length of the key/value.
+        device_mesh (DeviceMesh): Device mesh used for context parallelism.
+        load_balancer (Optional[:class:`_LoadBalancer`]): The load-balancer used to rearrange
+            QKV before sharding. This will be used to modify the block_mask generated.
+
+    Returns:
+        BlockMask: A block mask configured for the local query shard that can be used
+            with flex_attention() for the given cp_mesh.
+
+    Raises:
+        NotImplementedError: If Q_LEN is not divisible by (CP world size * BLOCK_SIZE).
+
+    Warning:
+        Currently requires Q_LEN to be divisible by CP mesh world size * BLOCK_SIZE
+        (BLOCK_SIZE defaults to 128). This constraint exists because the BlockMask
+        must handle both padding and offsets correctly. For example, if Q_LEN is 384,
+        CP world size is 2, and BLOCK_SIZE is 128, the local Q_LEN would be 192. In
+        such cases, both rank0 and rank1 would have paddings in their local BlockMasks.
+        Support for padding in this scenario is planned for future work.
+
+    """
+
+    from torch.nn.attention.flex_attention import _DEFAULT_SPARSE_BLOCK_SIZE
+
+    if Q_LEN % (device_mesh.size() * _DEFAULT_SPARSE_BLOCK_SIZE) != 0:
+        raise NotImplementedError(
+            f"Q_LEN {Q_LEN} is not divisible by CP mesh world size {device_mesh.size()} * "
+            f"BLOCK_SIZE {_DEFAULT_SPARSE_BLOCK_SIZE}. This is not supported yet. "
+        )
+
+    global _compiled_create_block_mask
+    if _compiled_create_block_mask is None:
+        _compiled_create_block_mask = torch.compile(
+            create_block_mask, dynamic=False, fullgraph=True
+        )
+    compiled_create_block_mask = _compiled_create_block_mask
+
+    def _rewrite_mask_mod(
+        mask_mod: _mask_mod_signature,
+        rank: int,
+        block_size: int,
+        local_q_size: int,
+        qkv_rearrange_indices: torch.Tensor | None = None,
+    ) -> _mask_mod_signature:
+        assert qkv_rearrange_indices is None or qkv_rearrange_indices.ndim == 2, (
+            "load balance index expects shape (1, seq_len) or (B, seq_len) "
+            f"but got {qkv_rearrange_indices.shape}."
+        )
+
+        def qkv_idx_restore(
+            b: torch.Tensor, idx_post_rearrange: torch.Tensor
+        ) -> torch.Tensor:
+            if qkv_rearrange_indices is not None:
+                if (
+                    qkv_rearrange_indices.size(0) == 1
+                ):  # identical load-balance in batch
+                    idx_pre_rearrange = qkv_rearrange_indices[0][idx_post_rearrange]
+                else:
+                    idx_pre_rearrange = qkv_rearrange_indices[b][idx_post_rearrange]
+            else:
+                idx_pre_rearrange = idx_post_rearrange
+
+            return idx_pre_rearrange
+
+        def local_q_idx_to_q_idx(local_q_idx: torch.Tensor) -> torch.Tensor:
+            # calculate local block_idx and block_offset
+            local_blk_idx, local_blk_offset = (
+                local_q_idx // block_size,
+                local_q_idx % block_size,
+            )
+            # NOTE: load balancing is not used
+            local_num_blocks = local_q_size // block_size
+            blk_idx = local_num_blocks * rank + local_blk_idx
+            return blk_idx * block_size + local_blk_offset
+
+        return lambda b, h, q_idx, kv_idx: mask_mod(
+            b,
+            h,
+            qkv_idx_restore(b, local_q_idx_to_q_idx(q_idx)),
+            qkv_idx_restore(b, kv_idx),
+        )
+
+    cp_rank = device_mesh.get_local_rank()
+    cp_group_size = device_mesh.size()
+    load_balancer = load_balancer or _create_default_load_balancer(
+        Q_LEN, cp_group_size, device_mesh.device_type
+    )
+    Q_SHARD_LEN = Q_LEN // cp_group_size
+    block_size = _DEFAULT_SPARSE_BLOCK_SIZE
+
+    rearrange_indices = (
+        load_balancer._generate_indices(restore=False) if load_balancer else None
+    )
+    block_mask = compiled_create_block_mask(
+        _rewrite_mask_mod(
+            mask_mod,
+            cp_rank,
+            block_size,
+            Q_SHARD_LEN,
+            qkv_rearrange_indices=rearrange_indices,
+        ),
+        B,
+        H,
+        Q_SHARD_LEN,
+        KV_LEN,
+        device=device_mesh.device_type,
+        BLOCK_SIZE=(block_size, block_size),
+    )
+    return block_mask
+
+
+#####################
+# Experimental APIs
+#####################
+
+
+class _ContextParallel(ParallelStyle):
+    class AttentionType(Enum):
+        FLEX = "flex_attention"
+        SDPA = "scaled_dot_product_attention"
+
+    def __init__(
+        self,
+        seq_dim: int,
+        attention_type: AttentionType,
+    ) -> None:
+        super().__init__()
+        self.seq_dim = seq_dim
+        self.attention_type = attention_type
+
+    def _apply(self, module: nn.Module, mesh: DeviceMesh) -> nn.Module:
+        if self.attention_type == self.AttentionType.FLEX:
+            module.register_forward_pre_hook(
+                partial(self.flex_input_fn, mesh=mesh), with_kwargs=True
+            )
+            return module
+        elif self.attention_type == self.AttentionType.SDPA:
+            module.register_forward_pre_hook(
+                partial(self.sdpa_input_fn, mesh=mesh), with_kwargs=True
+            )
+            module.register_forward_hook(partial(self.sdpa_output_fn, mesh=mesh))
+            return module
+        else:
+            raise ValueError(f"Unknown attention type: {self.attention_type}")
+
+    def flex_input_fn(
+        self, module: nn.Module | None, args: Any, kwargs: Any, mesh: DeviceMesh
+    ) -> Any:
+        args_list = list(args)
+        for idx, name in enumerate(
+            ("query", "key", "value", "score_mod", "block_mask")
+        ):
+            if idx >= len(args):
+                args_list.append(kwargs.pop(name, None))
+
+        query, key, value, score_mod, block_mask = args_list[:5]
+        assert isinstance(query, torch.Tensor)
+        assert isinstance(key, torch.Tensor)
+        assert isinstance(value, torch.Tensor)
+        assert isinstance(block_mask, BlockMask | tuple)
+
+        key = key.contiguous()
+        value = value.contiguous()
+
+        global_key, global_value = flex_cp_allgather(
+            key, value, self.seq_dim, c10d._get_process_group_name(mesh.get_group())
+        )
+        args_list[1] = global_key
+        args_list[2] = global_value
+
+        return tuple(args_list), kwargs
+
+    def sdpa_input_fn(
+        self,
+        module: nn.Module | None,
+        args: tuple[Any, ...],
+        kwargs: dict[str, Any],
+        mesh: DeviceMesh,
+    ) -> tuple[tuple[Any, ...], dict[str, Any]]:
+        placement = [Shard(self.seq_dim)]
+        all_args = []
+
+        for arg in itertools.chain(args, kwargs.values()):
+            if isinstance(arg, torch.Tensor):
+                if isinstance(arg, DTensor):
+                    assert arg._spec.placements == placement
+                else:
+                    arg = DTensor.from_local(arg, mesh, placement, run_check=False)
+
+            all_args.append(arg)
+
+        new_args = tuple(all_args[0 : len(args)])
+        new_kwargs = dict(zip(kwargs.keys(), all_args[len(args) :]))
+        return new_args, new_kwargs
+
+    def sdpa_output_fn(
+        self, module: nn.Module | None, inputs: Any, outputs: Any, mesh: DeviceMesh
+    ) -> Any:
+        new_outputs = []
+        for output in [outputs] if isinstance(outputs, torch.Tensor) else outputs:
+            output = output.to_local() if isinstance(output, DTensor) else output
+            new_outputs.append(output)
+
+        if isinstance(outputs, torch.Tensor):
+            return new_outputs[0]
+
+        return tuple(new_outputs)
+
+
+CPBuffer: TypeAlias = torch.Tensor | BlockMask
+CPBufferContainer: TypeAlias = Sequence[CPBuffer] | Mapping[str, CPBuffer]
+CPBufferSeqDims: TypeAlias = Sequence[int] | Mapping[str, int]
+
+
+def _context_parallel_shard(
+    mesh: DeviceMesh,
+    buffers: CPBufferContainer,
+    seq_dims: CPBufferSeqDims,
+    load_balancer: _LoadBalancer | None = None,
+) -> list[torch.Tensor | BlockMask]:
+    """
+    Shard the buffers along the specified sequence dimensions (`seq_dims`), so that each
+    rank retains only its corresponding shard according to the provided `mesh`. If a
+    `load_balancer` is provided, the buffers will be rearranged by the load balancer
+    before sharding to improve load balance. Buffers can be either tensors or `BlockMask`
+    objects. If a buffer is a `BlockMask`, its sharding dimension is determined by the
+    `BlockMask` implementation, and the corresponding `seq_dim` is ignored.
+
+    Note:
+        For `_context_parallel_shard`, a non-None `load_balancer` must be explicitly passed
+        if load balancing is required.
+
+    Args:
+        mesh (DeviceMesh): The device mesh used for context parallelism.
+        buffers (List[torch.Tensor | BlockMask]): Buffers whose usage depends on the sequence
+            dimension. Examples include input batches, labels, and positional embedding buffers.
+            These buffers must be sharded along the sequence dimension to ensure correctness.
+        seq_dims (List[int]): The sequence dimensions for each buffer in `buffers`. Must have
+            the same length as `buffers`.
+        load_balancer (Optional[_LoadBalancer]): An optional load balancer object. If provided,
+            it rearranges the buffers before sharding to achieve better load balance. If not
+            provided, no rearrangement is performed.
+
+    Returns:
+        List[torch.Tensor | BlockMask]: The sharded buffers, each corresponding to the local
+            shard for the current rank.
+    """
+    # TODO: these global variables are going to bite us someday.
+    # We will have to remove them soon.
+    # For the new API, we only support the module wrapper mode.
+    global _dispatch_mode
+    _dispatch_mode = _DispatchMode.MODULE_WRAPPER
+    global _cp_options
+    if load_balancer is not None:
+        _cp_options.enable_load_balance = True
+    else:
+        _cp_options.enable_load_balance = False
+
+    if len(buffers) != len(seq_dims):
+        raise ValueError(
+            "`seq_dims` must have the same number of elements as `buffers`."
+        )
+
+    flat_buffers, spec = tree_flatten(buffers)
+    flat_seq_dims, _ = tree_flatten(seq_dims)
+    if len(flat_buffers) != len(flat_seq_dims):
+        raise ValueError("`seq_dims` must have the pytree structure as `buffers`.")
+
+    if isinstance(flat_buffers[0], torch.Tensor):
+        device = flat_buffers[0].device
+    else:
+        device = flat_buffers[0].kv_num_blocks.device
+    for buffer in flat_buffers:
+        if isinstance(buffer, torch.Tensor):
+            assert device == buffer.device, "All buffers must be on the same device"
+        else:
+            assert device == buffer.kv_num_blocks.device, (
+                "All buffers must be on the same device"
+            )
+
+    flat_sharded_buffers = _context_parallel_buffers(
+        mesh, flat_buffers, flat_seq_dims, load_balancer
+    )
+
+    return tree_unflatten(flat_sharded_buffers, spec)
+
+
+def _enable_context_parallel_dispatcher() -> None:
+    """
+    Enable the context parallel dispatcher. This API is experimental and subject to change.
+    """
+    _enable_cp_dtensor_dispatcher()
+
+
+def _disable_context_parallel_dispatcher() -> None:
+    """
+    Disable the context parallel dispatcher. This API is experimental and subject to change.
+    """
+    _disable_cp_dtensor_dispatcher()
+
+
+#####################################################
+# Current public APIs, but are also subject to change
+#####################################################
+@contextlib.contextmanager
+@torch.no_grad()
+def context_parallel(
+    mesh: DeviceMesh,
+    *,
+    buffers: list[torch.Tensor] | None = None,
+    buffer_seq_dims: list[int] | None = None,
+    no_restore_buffers: set[torch.Tensor] | None = None,
+) -> Generator[None, None, None]:
+    """
+
+    ``context_parallel`` is an experimental API to enable context
+    parallelism (CP). This API performs two actions: 1) patch the SDPA
+    (``torch.nn.functional.scaled_dot_product_attention``) with the CP-enabled
+    one, 2) shard ``buffers`` along the sequence dimension and each rank will
+    preserve the corresponding shard according ``mesh``.
+
+    Args:
+        mesh (:class:`DeviceMesh`): the device mesh for the context parallelism.
+        buffers (Optional[List[torch.Tensor]]): buffers that the usage depend
+            on the sequence dimension. Examples are input batch, labels and
+            positional embedding buffers. These buffers must be sharded along
+            the sequence dimension to ensure the accuracy. The sharding will
+            happen in-place, the buffer's shape will change within the context.
+            The buffers will be restored after the context finishes.
+            ``no_restore_buffers`` can be used to specify which buffers don't
+            need to be restored. Note that ``buffers`` should not contain any
+            nn.Parameter.
+        buffer_seq_dims (Optional[List[int]]): the sequence dimensions of ``buffers``.
+        no_restore_buffers (Optional[Set[torch.Tensor]]): buffers in these set
+            won't be restored after the context exits. This set must be a subset
+            of ``buffers``. If the buffers won't be used after the context exits,
+            these buffers can be put in this list to avoid extra restore time.
+
+    .. warning::
+        `torch.distributed.tensor.experimental.context_parallel` is a
+        prototype feature in PyTorch. The API is subject to change.
+    """
+    # For the legacy API, we only support the monkey-patch mode.
+    # We will deprecate this API once the new API is widely used.
+    global _dispatch_mode
+    _dispatch_mode = _DispatchMode.MONKEY_PATCH
+
+    buffers = [] if buffers is None else buffers
+    buffer_seq_dims = [] if buffer_seq_dims is None else buffer_seq_dims
+    no_restore_buffers = set() if no_restore_buffers is None else no_restore_buffers
+
+    if len(buffers) != len(buffer_seq_dims):
+        raise ValueError(
+            "`seq_dims` must have the same number of elements as `buffers`."
+        )
+
+    for buffer in no_restore_buffers:
+        # Cannot use `if not buffer in buffers` which will incur tensor comparison.
+        if not any(b is buffer for b in buffers):
+            raise ValueError("`no_restore_buffers` must be a subset of `buffers`.")
+
+    original_buffers = [None if b in no_restore_buffers else b.clone() for b in buffers]
+
+    device = buffers[0].device
+    seq_length = buffers[0].shape[buffer_seq_dims[0]]
+    cp_world_size = mesh.size()
+
+    # If `enable_load_balance` is True, the default Head-tail load balancer
+    # (:class:`_HeadTailLoadBalancer`) is used to rearrange the buffers before
+    # sharding. Otherwise, we don't do any load-balance rearrange by passing
+    # `None` to `_context_parallel_shard()`.
+    load_balancer = _create_default_load_balancer(seq_length, cp_world_size, device)
+    shards = _context_parallel_buffers(
+        mesh,
+        cast(list[torch.Tensor | BlockMask], buffers),
+        buffer_seq_dims,
+        load_balancer,
+    )
+    for buffer, shard in zip(buffers, shards):
+        assert isinstance(shard, torch.Tensor), "ContextParallel only supports Tensor"
+        shard = shard.clone()
+        buffer.resize_(shard.shape)
+        buffer.copy_(shard)
+
+    _enable_context_parallel_dispatcher_impl(seq_dim=2, mesh=mesh)
+    yield
+    _disable_context_parallel_dispatcher_impl()
+
+    for buffer, original_buffer in zip(buffers, original_buffers):
+        if original_buffer is not None:
+            buffer.resize_(original_buffer.shape)
+            buffer.copy_(original_buffer)
+
+
+@torch.no_grad()
+def context_parallel_unshard(
+    mesh: DeviceMesh,
+    buffers: list[torch.Tensor],
+    seq_dims: list[int],
+    load_balancer: _LoadBalancer | None = None,
+) -> list[torch.Tensor]:
+    """
+    Unshard the tensors (e.g., output) that are sharded due to context parallelism.
+
+    Args:
+        mesh (:class:`DeviceMesh`): the device mesh for the context parallelism.
+        buffers (List[torch.Tensor]): the buffers to be unsharded.
+        seq_dims (List[int]): the sequence dimensions of ``buffers``. This list
+            must have the same length as ``buffers``.
+        load_balancer (Optional[:class:`_Loadbalancer`]): an optional `_LoadBalancer`
+            object. If this argument is `None`, it means the `buffers` were not
+            rearranged when being sharded and there's no need to put it back to order
+            after unsharding. If this argument is a `_LoadBalancer` object, call
+            its `_generate_indices(restore=True)` to generate the restore indices such
+            that `unsharded[restore_idx]` is the original buffer.
+
+    Returns:
+        List[torch.Tensor]: the unsharded buffers.
+
+    Note:
+        For `context_parallel_unshard` we require not-None `load_balancer` object be
+        explicitly passed if flex_attention() is to be used and load-balancing is needed.
+        This is different from the case of SDPA though we strongly suggest users follow
+        the same convention.
+    """
+    device = buffers[0].device
+    cp_world_size = mesh.size()
+    seq_length = buffers[0].shape[seq_dims[0]] * cp_world_size
+
+    # If users don't pass in a `load_balancer`:
+    # - if `enable_load_balance` is True, we use the default round-robin
+    #   load balancer.
+    # - if `enable_load_balance` is False, we don't do any load balancing
+    #   by passing in `None` as `restore_indices`.
+    load_balancer = load_balancer or _create_default_load_balancer(
+        seq_length, cp_world_size, device
+    )
+    restore_indices = (
+        load_balancer._generate_indices(restore=True) if load_balancer else None
+    )
+
+    assert restore_indices is None or restore_indices.ndim == 2, (
+        "load balance restore index expects shape (1, seq_len) or (B, seq_len) "
+        f"but got {restore_indices.shape}."
+    )
+    unsharded_buffers = []
+    for b, dim in zip(buffers, seq_dims):
+        b = b.contiguous()
+        unsharded_b = _maybe_wait(ft_c.all_gather_tensor(b, dim, mesh))
+
+        if restore_indices is not None:
+            # NOTE: assuming batch dim is 0
+            idx_batch_size = restore_indices.size(0)
+            data_batch_size = unsharded_b.size(0)
+            if idx_batch_size != 1 and idx_batch_size != data_batch_size:
+                raise ValueError(
+                    "Cannot restore buffer: "
+                    f"restore_indices has shape {restore_indices.shape}, "
+                    f"but unsharded_b has shape {unsharded_b.shape}."
+                )
+
+            for i in range(data_batch_size):
+                index = (
+                    restore_indices[0]  # identical load-balance in batch
+                    if idx_batch_size == 1
+                    else restore_indices[i]
+                )
+                unsharded_b_batch_i = torch.index_select(
+                    unsharded_b[i], dim=dim - 1, index=index
+                )
+                unsharded_b[i] = unsharded_b_batch_i
+
+        unsharded_buffers.append(unsharded_b)
+
+    return unsharded_buffers
+
+
+def set_rotate_method(rotate_method: str) -> None:
+    """
+    Context Parallel SDPA requires the rotation of kv shards. Users can call this
+    API to specify which rotation method to use. "alltoall" shuffles the kv shards
+    using all-to-all collective. While "allgather" gathers the kv shards using
+    all-gather collective after the first sub-SDPA computation. If this API has not
+    been called, the default rotate method is "allgather".
+
+    Args:
+        rotate_method (str): the rotate method to use. Currently only supports
+        "allgather" and "alltoall". If a different string other than these two
+        is passed in, the function will raise an error.
+
+    Returns:
+        None
+    """
+    logger.info("Note that FlexAttention CP doesn't support alltoall yet.")
+    if rotate_method == "allgather":
+        _cp_options.rotate_method = _RotateMethod.ALL_GATHER
+    elif rotate_method == "alltoall":
+        _cp_options.rotate_method = _RotateMethod.ALL_TO_ALL
+    else:
+        raise NotImplementedError(
+            "Context Parallel does not support "
+            f"using {rotate_method} for kv shards rotation"
+        )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_cp_custom_ops.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_cp_custom_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..760ca79cfcbe54971afd23d467437e790c4b5fe2
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_cp_custom_ops.py
@@ -0,0 +1,88 @@
+from typing import Any
+
+import torch
+import torch.distributed._functional_collectives as funcol
+import torch.distributed.distributed_c10d as c10d
+
+
+@torch.library.custom_op("cplib::flex_cp_allgather", mutates_args=())
+def flex_cp_allgather(
+    k: torch.Tensor, v: torch.Tensor, seq_dim: int, pg_name: c10d.GroupName
+) -> tuple[torch.Tensor, torch.Tensor]:
+    k = k.contiguous()
+    v = v.contiguous()
+    k = funcol.all_gather_tensor(k, seq_dim, pg_name)
+    v = funcol.all_gather_tensor(v, seq_dim, pg_name)
+    if isinstance(k, funcol.AsyncCollectiveTensor):
+        k = k.wait()
+    if isinstance(v, funcol.AsyncCollectiveTensor):
+        v = v.wait()
+    return k, v
+
+
+@flex_cp_allgather.register_fake
+def _(
+    k: torch.Tensor, v: torch.Tensor, seq_dim: int, pg_name: c10d.GroupName
+) -> tuple[torch.Tensor, torch.Tensor]:
+    shape_k = list(k.shape)
+    shape_v = list(v.shape)
+    shape_k[seq_dim] *= c10d._get_group_size_by_name(pg_name)
+    shape_v[seq_dim] *= c10d._get_group_size_by_name(pg_name)
+    new_k = torch.empty(shape_k, dtype=k.dtype, device=k.device)
+    new_v = torch.empty(shape_v, dtype=v.dtype, device=v.device)
+    return new_k, new_v
+
+
+@torch.library.custom_op("cplib::flex_cp_allgather_backward", mutates_args=())
+def flex_cp_allgather_backward(
+    grad_full_k: torch.Tensor,
+    grad_full_v: torch.Tensor,
+    seq_dim: int,
+    pg_name: c10d.GroupName,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    grad_k = funcol.reduce_scatter_tensor(grad_full_k, "sum", seq_dim, pg_name)
+    if isinstance(grad_k, funcol.AsyncCollectiveTensor):
+        grad_k = grad_k.wait()
+    grad_v = funcol.reduce_scatter_tensor(grad_full_v, "sum", seq_dim, pg_name)
+    if isinstance(grad_v, funcol.AsyncCollectiveTensor):
+        grad_v = grad_v.wait()
+
+    return grad_k, grad_v
+
+
+@flex_cp_allgather_backward.register_fake
+def _(
+    grad_full_k: torch.Tensor,
+    grad_full_v: torch.Tensor,
+    seq_dim: int,
+    pg_name: c10d.GroupName,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    shape_k = list(grad_full_k.shape)
+    shape_v = list(grad_full_v.shape)
+    shape_k[seq_dim] //= c10d._get_group_size_by_name(pg_name)
+    shape_v[seq_dim] //= c10d._get_group_size_by_name(pg_name)
+    new_grad_k = torch.empty(
+        shape_k, dtype=grad_full_k.dtype, device=grad_full_k.device
+    )
+    new_grad_v = torch.empty(
+        shape_v, dtype=grad_full_v.dtype, device=grad_full_v.device
+    )
+    return new_grad_k, new_grad_v
+
+
+def _flex_cp_allgather_backward(
+    ctx: Any, grad_full_k: torch.Tensor, grad_full_v: torch.Tensor
+) -> tuple[torch.Tensor, torch.Tensor, None, None]:
+    grad_k, grad_v = flex_cp_allgather_backward(
+        grad_full_k, grad_full_v, ctx.seq_dim, ctx.pg_name
+    )
+    return grad_k, grad_v, None, None
+
+
+def _flex_cp_setup_context(ctx: Any, inputs: Any, output: Any) -> None:
+    _, _, ctx.seq_dim, ctx.pg_name = inputs
+
+
+flex_cp_allgather.register_autograd(
+    _flex_cp_allgather_backward, setup_context=_flex_cp_setup_context
+)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_load_balancer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_load_balancer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b293b0e260efcc9e8fd308579ecaa0e3d48c0e6
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_load_balancer.py
@@ -0,0 +1,486 @@
+# this file contains the `_LoadBalancer` class and its family of implementation
+# for different load-balancing strategies in tensor sharding.
+import functools
+from abc import ABC, abstractmethod
+
+import torch
+from torch import Tensor
+from torch.nn.attention.flex_attention import BlockMask
+
+
+# make it private since it's still a prototype
+class _LoadBalancer(ABC):
+    @abstractmethod
+    def _generate_indices(self, restore: bool = False) -> Tensor | None:
+        """
+        Generate indices for load balancing.
+        Args:
+            restore (bool):
+
+        Returns:
+            The generated indices of shape `(1, seq_len)` if the load-balancing is
+            identical within the batch, or `(batch_size, seq_len)` if the load-balancing
+            should vary within the batch.
+
+        Warning:
+            For Multi-Head Attention, we require the masks over the head dimension are identical
+            (i.e. the return value of `_generate_indices()` does not have `heads` dimension).
+
+        Example:
+            Here is the causal mask for attention where q_len == kv_len == 8:
+                            KV_index
+                    [1, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 0, 0, 0, 0, 0]
+            Q_index [1, 1, 1, 1, 0, 0, 0, 0]
+                    [1, 1, 1, 1, 1, 0, 0, 0]
+                    [1, 1, 1, 1, 1, 1, 0, 0]
+                    [1, 1, 1, 1, 1, 1, 1, 0]
+                    [1, 1, 1, 1, 1, 1, 1, 1]
+
+            This mask matrix also represents the computation required to compute
+            the masked Q @ K^T by:
+            - mask[i, j] == 1: the computation of Q[i, :] dot K[j, :] is required
+            - mask[i, j] == 0: the computation should be skipped
+
+            Therefore the number of 1s in matrix represents the amount of computation
+            required.
+
+            Assume we want to distribute this Q @ K^T computation to 2 devices, then
+            the matrix is also distributed as:
+                            KV_index
+                    [1, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 0, 0, 0, 0, 0]    rank 0
+                    [1, 1, 1, 1, 0, 0, 0, 0]
+            Q_index ------------------------
+                    [1, 1, 1, 1, 1, 0, 0, 0]
+                    [1, 1, 1, 1, 1, 1, 0, 0]    rank 1
+                    [1, 1, 1, 1, 1, 1, 1, 0]
+                    [1, 1, 1, 1, 1, 1, 1, 1]
+
+            An imbalance of computation is observed on these 2 ranks and this could make
+            rank 1 the straggler when performing Context Parallel. In order to balance
+            the computation, we need to rearrange the QKV tensors before sharding in such a
+            way that the result mask matrix is evenly distributed over devices and each
+            rank has the number of 1s as close as possible.
+
+            This method defines the strategy of how to rearrange the QKV tensor for better
+            load-balance:
+            - when `restore == False`, this method returns an indices tensor `rearrange_idx`
+            such that Q[rearrange_idx] is the desired Q tensor after rearranging.
+            - when `restore == True`, this method returns an indices tensor `restore_idx`
+            such that Q[rearrange_idx][restore_idx] == Q, i.e. restoring the rearranged tensor
+            back to the original status before rearranging.
+        """
+
+
+class _HeadTailLoadBalancer(_LoadBalancer):
+    def __init__(self, seq_length: int, world_size: int, device: str | torch.device):
+        self.seq_length = seq_length
+        self.world_size = world_size
+        self.device = device
+
+    def _generate_indices(self, restore: bool = False) -> Tensor:
+        """
+        Generate head-and-tail load balancing indices or restore indices.
+        Args:
+            restore:
+                If True, generate restore indices that map head-and-tail rearranged
+                positions back to original positions. If False, generate load
+                balance indices that rearrange original positions to head-and-tail pattern.
+
+        Returns:
+            The generated indices of shape `(1, seq_len)` because the load-balancing is
+            identical within the batch.
+
+        Warning:
+            For Multi-Head Attention, we require the masks over the head dimension are identical
+            (i.e. the return value of `_generate_indices()` does not have `heads` dimension).
+
+        Example:
+            Here is the causal mask for attention where q_len == kv_len == 8:
+                            KV_index
+                    [1, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 0, 0, 0, 0, 0]
+            Q_index [1, 1, 1, 1, 0, 0, 0, 0]
+                    [1, 1, 1, 1, 1, 0, 0, 0]
+                    [1, 1, 1, 1, 1, 1, 0, 0]
+                    [1, 1, 1, 1, 1, 1, 1, 0]
+                    [1, 1, 1, 1, 1, 1, 1, 1]
+
+            Head-tail load-balance strategy rearranges the Q tensor by combining
+            Q[0:k] (on seq dim) and Q[-k:] for rank 0, Q[k:2k] and Q[-2k:-k] for
+            rank 1, and so on. In python code it looks like:
+
+                k = Q.size(0) // (2 * cp_world_size)
+                for rank in range(cp_world_size):
+                    reordered_Q[rank * 2 * k : (rank + 1) * 2 * k] = torch.cat(
+                        (Q[rank * k : (rank + 1) * k], Q[-(rank + 1) * k : -rank * k])
+                    )
+
+            This can also be done by tensor slicing. For the above example, the indices
+            tensor for slicing is:
+                slice_indices = Tensor([0, 7, 1, 6, 2, 5, 3, 4])
+
+            After reordering QKV using the `slice_indices`, the corresponding mask matrix
+            distributing over 2 devices becomes well-balanced:
+                            KV_index
+                    [1, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 1, 1, 1, 1, 1]
+                    [1, 1, 0, 0, 0, 0, 0, 0]    rank 0
+                    [1, 1, 1, 1, 1, 1, 1, 0]
+            Q_index ------------------------
+                    [1, 1, 1, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 1, 1, 1, 0, 0]    rank 1
+                    [1, 1, 1, 1, 0, 0, 0, 0]
+                    [1, 1, 1, 1, 1, 0, 0, 0]
+
+            To restore the reordering and putting the tensor back, slicing op can do the
+            trick with a `restore_indices` such that:
+                slice_indices[restore_indices] == Tensor([0, 1, 2, ...])
+
+            In this way, `reordered_Q[restore_indices]` will just be the original Q.
+        """
+        seq_length = self.seq_length
+        world_size = self.world_size
+        assert seq_length % (world_size * 2) == 0
+        chunk_size = seq_length // (world_size * 2)
+        all_indices = []
+
+        for rank in range(world_size):
+            # Generate indices for first chunk of the cp rank
+            first_chunk_start = rank * chunk_size
+            first_chunk_indices = list(
+                range(first_chunk_start, first_chunk_start + chunk_size)
+            )
+
+            # Second chunk: positions from the complementary chunk
+            second_chunk_idx = world_size * 2 - rank - 1
+            second_chunk_start = second_chunk_idx * chunk_size
+            second_chunk_indices = list(
+                range(second_chunk_start, second_chunk_start + chunk_size)
+            )
+            # combine the indices for this rank
+            all_indices.extend(first_chunk_indices + second_chunk_indices)
+
+        all_indices_tensor = torch.tensor(
+            all_indices, dtype=torch.int, device=self.device
+        )
+        if restore:
+            all_indices_tensor = torch.argsort(all_indices_tensor)
+
+        return all_indices_tensor.unsqueeze(0)  # add batch dim
+
+
+class _PerDocumentHeadTailLoadBalancer(_LoadBalancer):
+    def __init__(
+        self,
+        seq_length_per_doc: list[list[int]],
+        world_size: int,
+        device: str | torch.device,
+    ):
+        """
+        `seq_length_per_doc` has size (B, seq_len) if the load-balancing should vary
+        within the batch. Otherwise `seq_length_per_doc` should have size (1, seq_len).
+        """
+        self.seq_length_per_doc = seq_length_per_doc
+        self.world_size = world_size
+        self.device = device
+
+    def _generate_indices(self, restore: bool = False) -> Tensor:
+        """
+        Generate the per-document head-and-tail rearrange indices so that after rearranging
+        the input is load-balanced in per-document head-and-tail style.
+
+        Args:
+            restore:
+                If True, generate restore indices that map per-document head-and-tail
+                rearranged positions back to original positions. If False, generate load
+                balance indices that rearrange original positions to per-document
+                head-and-tail pattern.
+
+        Returns:
+            The generated indices of shape `(batch_size, seq_len)` if the load-balancing
+            should vary within the batch. Otherwise, it should have shape `(1, seq_len)`.
+
+        Warning:
+            For Multi-Head Attention, we require the masks over the head dimension are identical
+            (i.e. `seq_length_per_doc` must have size (B, seq_len) or (1, seq_len)).
+
+        Example:
+            Here is the document causal mask for attention where q_len == kv_len == 16:
+                                        KV_index
+                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            Q_index [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
+
+            The per-document head-and-tail load-balancer will apply head-and-tail
+            reordering within each document. After load-balancing for context-parallel
+            on 2 devices, the above mask matrix will look like this:
+                                        KV_index
+                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
+            Q_index [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]
+                    ------------------------------------------------
+                    [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]
+        """
+        return torch.stack(
+            [
+                self._generate_indices_for_batch(seq_lengths, restore)
+                for seq_lengths in self.seq_length_per_doc
+            ]
+        )
+
+    def _generate_indices_for_batch(self, seq_length_per_doc, restore) -> Tensor:  # type: ignore[no-untyped-def]
+        world_size = self.world_size
+        device = self.device
+        assert all(
+            seq_length % (2 * world_size) == 0 for seq_length in seq_length_per_doc
+        )
+        chunk_length_per_doc = [
+            seq_length // (2 * world_size) for seq_length in seq_length_per_doc
+        ]
+
+        indices = []
+        document_start_idx = 0
+        for seq_length, chunk_length in zip(seq_length_per_doc, chunk_length_per_doc):
+            # Generate the indices for the current document
+            for rank in range(world_size):
+                head_chunk_start_idx = document_start_idx + chunk_length * rank
+                tail_chunk_end_idx = document_start_idx + chunk_length * (
+                    2 * world_size - rank
+                )
+                indices.append(
+                    torch.arange(
+                        head_chunk_start_idx,
+                        head_chunk_start_idx + chunk_length,
+                        device=device,
+                    )
+                )
+                indices.append(
+                    torch.arange(
+                        tail_chunk_end_idx - chunk_length,
+                        tail_chunk_end_idx,
+                        device=device,
+                    )
+                )
+
+            document_start_idx += seq_length
+
+        indices_tensor = torch.cat(indices)
+        if restore:
+            indices_tensor = torch.argsort(indices_tensor)
+
+        return indices_tensor
+
+
+class _PTRRLoadBalancer(_LoadBalancer):
+    """
+    Processing-Time based Round-Robin (PTRR) load balancer. This load balancer should
+    only be used for flex_attention() since it leverages `BlockMask`.
+    """
+
+    def __init__(
+        self,
+        block_mask: BlockMask,
+        world_size: int,
+    ):
+        """
+        `block_mask` must have shape (B, 1, seq_len, seq_len) or (1, 1, seq_len, seq_len).
+        """
+        self.block_mask = block_mask
+        self.world_size = world_size
+
+    @staticmethod
+    def ptrr_scheduling(process_time: Tensor, group_size: int) -> Tensor:
+        """
+        Separate the tasks into `group_size` groups using PTRR scheduling.
+        process_time:
+            1D tensor of size n, where n is the number of tasks. The value
+            is the process time of the task. Size `n` must be divisible by
+            `group_size`.
+        group_size:
+            the number of groups
+
+        Returns:
+        tasks_in_group (list[list[int]]):
+            A collection of list[int] and each list should have size `n // group_size`
+            (`group_size` lists in total). Each element is an index in the input
+            `process_time` (i.e. [0, len(process_time) - 1]).
+
+        Example:
+            process_time = [9, 14, 2, 20, 10, 15, 8, 14, 16, 19, 15, 3, 12, 1, 12, 10]
+            tasks_in_group = [
+                [3, 12, 13, 14],    # values = [1, 12, 12, 20], sum = 45
+                [2, 4, 7, 9],       # values = [2, 10, 14, 19], sum = 45
+                [1, 8, 11, 15],     # values = [14, 16, 3, 10], sum = 43
+                [0, 5, 6, 10]       # values = [9, 15, 8, 15], sum = 47
+            ]
+        """
+        assert process_time.ndim == 1
+
+        num_tasks = process_time.size(0)
+
+        if num_tasks % group_size != 0:
+            raise NotImplementedError(
+                f"num_tasks {num_tasks} must be divisible by group_size {group_size}"
+            )
+
+        device = process_time.device
+        _, sorted_indices_descending = torch.sort(
+            process_time, descending=True, stable=True
+        )  # if process time is tied, the order is preserved
+        sorted_indices_descending_reversed = torch.flip(
+            sorted_indices_descending.view(-1, group_size), dims=[1]
+        ).view(-1)
+        tasks_in_group = torch.where(
+            torch.arange(num_tasks, device=device) // group_size % 2 == 0,
+            sorted_indices_descending,
+            sorted_indices_descending_reversed,
+        )
+        tasks_in_group = tasks_in_group.view(-1, group_size).transpose(
+            0, 1
+        )  # (group_size, n // group_size)
+
+        # sort each group. This step should not have impact on correctness
+        # nor execution run time, but it helps users visualize the mask
+        tasks_in_group, _ = torch.sort(tasks_in_group, dim=1)
+        return tasks_in_group
+
+    def _generate_indices(self, restore: bool = False) -> Tensor:
+        """
+        Generate the PTRR reorder indices of shape `(1, seq_len)` or `(batch_size, seq_len)`.
+
+        Args:
+            restore:
+                If True, generate restore indices that map Processing-Time based Round-Robin
+                (PTRR) rearranged positions back to original positions. If False, generate
+                load balance indices that rearrange original positions to PTRR pattern.
+
+            Returns:
+                The generated indices of shape `(1, seq_len)` if the load-balancing is
+                identical within the batch (i.e. `BlockMask.shape[0] == 1`), or
+                `(batch_size, seq_len)` if the load-balancing should vary within the batch.
+
+        Warning:
+            For Multi-Head Attention, we require the masks over the head dimension are identical
+            (i.e. `self.block_mask` must have shape (B, 1, seq_len, seq_len) or (1, 1, seq_len, seq_len)).
+
+        Example:
+            Here is the document causal mask for attention whereq_len == kv_len == 16 * BLOCK_SIZE
+            (each entry is a block):
+                                        KV_index
+                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 1
+                    [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 2
+                    [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 3
+                    [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 4
+                    [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 1
+                    [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 2
+                    [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 3
+            Q_index [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 4
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]  -> row value = 5
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]  -> row value = 6
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]  -> row value = 7
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]  -> row value = 8
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]  -> row value = 1
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]  -> row value = 2
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]  -> row value = 3
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]  -> row value = 4
+
+            The reorder indices will be: [2, 3, 5, 6, 8, 11, 12, 13, 0, 1, 4, 7, 9, 10, 14, 15] and
+            the mask matrix will look like:
+                                        KV_index
+                    [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 3
+                    [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 4
+                    [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 2
+                    [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 3
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]  -> row value = 5  rank 0 (sum=28)
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]  -> row value = 8
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]  -> row value = 1
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]  -> row value = 2
+                    ------------------------------------------------
+                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 1
+                    [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 2
+                    [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 1
+                    [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]  -> row value = 4
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]  -> row value = 6  rank 1 (sum=28)
+                    [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]  -> row value = 7
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]  -> row value = 3
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]  -> row value = 4
+        """
+        block_mask = self.block_mask
+        kv_num_blocks = block_mask.kv_num_blocks
+        full_kv_num_blocks = block_mask.full_kv_num_blocks
+        non_sparse_kv_num_blocks = (
+            kv_num_blocks + full_kv_num_blocks
+            if full_kv_num_blocks is not None
+            else kv_num_blocks
+        )
+        B, H, Q = non_sparse_kv_num_blocks.shape
+        # requirement: the masking is identical across heads (i.e. H == 1 in BlockMask)
+        non_sparse_kv_num_blocks = non_sparse_kv_num_blocks.view(-1, Q)  # (B, Q_BLK)
+
+        batch_ptrr = torch.vmap(
+            functools.partial(
+                _PTRRLoadBalancer.ptrr_scheduling,
+                group_size=self.world_size,
+            )
+        )
+        ptrr_indices = batch_ptrr(
+            non_sparse_kv_num_blocks
+        )  # (B, group_size, num_blks_in_group)
+        ptrr_indices = ptrr_indices.reshape(B, -1)  # (B, num_blocks)
+
+        # NOTE: only support the case where the qkv block size are equal
+        q_blk_size, kv_blk_size = block_mask.BLOCK_SIZE
+        assert q_blk_size == kv_blk_size, (
+            "for now only support q_blk_size == kv_blk_size"
+        )
+
+        indices = torch.arange(
+            q_blk_size * ptrr_indices.size(1), device=ptrr_indices.device
+        ).view(-1, q_blk_size)  # (NUM_BLOCKS, BLOCK_SIZE)
+        indices = indices[ptrr_indices].view(B, -1)  # (B, qkv_size)
+
+        if restore:
+            indices = torch.vmap(torch.argsort)(indices)
+
+        return indices
+
+
+def _create_default_load_balancer(
+    seq_length: int, world_size: int, device: str | torch.device
+) -> _LoadBalancer | None:
+    from ._attention import _cp_options
+
+    if _cp_options.enable_load_balance:
+        return _HeadTailLoadBalancer(seq_length, world_size, device)
+    else:
+        return None
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_sharding_rules.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_sharding_rules.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebb6eb0cface8449af3b8d7df72c2e41a51de309
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/experimental/_context_parallel/_sharding_rules.py
@@ -0,0 +1,406 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+"""
+Context Parallelism sharding rules for scaled_dot_product attention operators.
+
+The sharding rules for CP cannot be embedded by default because Shard(2) is not
+a valid sharding for SDPA without CP enabled. This module provides utilities to
+dynamically install Shard(2) sharding rules when CP is activated.
+"""
+
+from contextlib import contextmanager
+
+import torch
+from torch.distributed.tensor._op_schema import (
+    OpSchema,
+    OpStrategy,
+    PlacementList,
+    RuntimeSchemaInfo,
+)
+from torch.distributed.tensor._ops.registration import register_op_strategy
+from torch.distributed.tensor._ops.utils import expand_to_full_mesh_op_strategy
+from torch.distributed.tensor.debug import (
+    _clear_fast_path_sharding_prop_cache,
+    _clear_python_sharding_prop_cache,
+)
+from torch.distributed.tensor.placement_types import Replicate, Shard
+
+
+aten = torch.ops.aten
+
+SEQ_DIM = 2
+
+
+@contextmanager
+def _op_strategy_context(op_overload, strategy_func, schema_info=None):
+    """
+    Context manager for setting and clearing op strategies for Context Parallelism.
+
+    Args:
+        op_overload: The operator overload to set or clear the strategy for.
+        strategy_func: The strategy function to set for the operator overload.
+        schema_info: Optional schema information for the operator overload.
+
+    Yields:
+        None
+    """
+    from torch.distributed.tensor import DTensor
+
+    propagator = DTensor._op_dispatcher.sharding_propagator
+    _origin_op_strategy_funcs = None
+    _origin_op_strategy_schema = None
+    try:
+        # Save original strategy if exists
+        if op_overload in propagator.op_strategy_funcs:
+            _origin_op_strategy_funcs = propagator.op_strategy_funcs[op_overload]
+        if op_overload in propagator.op_to_schema_info:
+            _origin_op_strategy_schema = propagator.op_to_schema_info[op_overload]
+
+        # Register the new op strategy
+        register_op_strategy(op_overload, schema_info=schema_info)(strategy_func)
+        yield (_origin_op_strategy_funcs, _origin_op_strategy_schema)
+    finally:
+        # Restore original strategy
+        if _origin_op_strategy_funcs is None:
+            if op_overload in propagator.op_strategy_funcs:
+                del propagator.op_strategy_funcs[op_overload]
+        else:
+            propagator.op_strategy_funcs[op_overload] = _origin_op_strategy_funcs
+
+        if _origin_op_strategy_schema is None:
+            if op_overload in propagator.op_to_schema_info:
+                del propagator.op_to_schema_info[op_overload]
+        else:
+            propagator.op_to_schema_info[op_overload] = _origin_op_strategy_schema
+
+        # Ideally, we should clear the cache, but it is too expensive.
+        # _clear_python_sharding_prop_cache()
+        # _clear_fast_path_sharding_prop_cache()
+
+
+# ==================== Flash Attention Strategies ====================
+
+
+def _scaled_dot_product_flash_attention_cp_strategy(op_schema: OpSchema) -> OpStrategy:
+    """
+    Strategy for flash attention forward with Context Parallelism support.
+    This includes the base strategies plus CP-specific sequence dimension sharding.
+    """
+    # Import here to avoid circular dependency
+    from torch.distributed.tensor._ops._matrix_ops import (
+        _scaled_dot_product_flash_attention_base_strategies,
+    )
+
+    # Get the base strategies (without CP modifications)
+    mesh = op_schema.get_mesh_from_args()
+    single_mesh_dim_strategies = _scaled_dot_product_flash_attention_base_strategies(
+        op_schema
+    )
+
+    # Add Context Parallelism strategy: shards on the sequence dim
+    return_debug_mask = len(op_schema.args_schema) >= 6 and op_schema.args_schema[5]
+    debug_attn_mask_sharding = Shard(SEQ_DIM) if return_debug_mask else Replicate()
+
+    cp_strategy: PlacementList = [
+        Shard(SEQ_DIM),  # output
+        Shard(SEQ_DIM),  # logsumexp
+        None,  # cum_seq_q
+        None,  # cum_seq_k
+        None,  # max_q
+        None,  # max_k
+        Replicate(),  # rng_state
+        None,  # unused
+        debug_attn_mask_sharding,  # debugattn
+        Shard(SEQ_DIM),  # q
+        Shard(SEQ_DIM),  # k
+        Shard(SEQ_DIM),  # v
+    ]
+    single_mesh_dim_strategies.append(cp_strategy)
+
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_mesh_dim_strategies, input_index=9
+    )
+
+
+def _scaled_dot_product_flash_attention_backward_cp_strategy(
+    op_schema: OpSchema,
+) -> OpStrategy:
+    """
+    Strategy for flash attention backward with Context Parallelism support.
+    """
+    from torch.distributed.tensor._ops._matrix_ops import (
+        _scaled_dot_product_flash_attention_backward_base_strategies,
+    )
+
+    mesh = op_schema.get_mesh_from_args(validate=False)
+    single_mesh_dim_strategies = (
+        _scaled_dot_product_flash_attention_backward_base_strategies(op_schema)
+    )
+
+    tensor_input_indices = [
+        i
+        for i, arg_spec in enumerate(op_schema.args_schema)
+        if isinstance(arg_spec, OpStrategy)
+    ]
+    num_tensor_inputs = len(tensor_input_indices)
+
+    # Context Parallelism: shards on the sequence dim
+    cp_strategy: PlacementList = [
+        Shard(SEQ_DIM),  # grad_q
+        Shard(SEQ_DIM),  # grad_k
+        Shard(SEQ_DIM),  # grad_v
+        Shard(SEQ_DIM),  # grad_output
+        Shard(SEQ_DIM),  # q
+        Shard(SEQ_DIM),  # k
+        Shard(SEQ_DIM),  # v
+        Shard(SEQ_DIM),  # output
+        Shard(SEQ_DIM),  # logsumexp
+    ]
+    cp_strategy.extend([Replicate()] * (num_tensor_inputs - 6))
+    single_mesh_dim_strategies.append(cp_strategy)
+
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_mesh_dim_strategies, input_index=3
+    )
+
+
+# ==================== Efficient Attention Strategies ====================
+
+
+def _scaled_dot_product_efficient_attention_cp_strategy(
+    op_schema: OpSchema,
+) -> OpStrategy:
+    """
+    Strategy for efficient attention forward with Context Parallelism support.
+    """
+    from torch.distributed.tensor._ops._matrix_ops import (
+        _scaled_dot_product_efficient_attention_base_strategies,
+    )
+
+    mesh = op_schema.get_mesh_from_args()
+    single_mesh_dim_strategies = (
+        _scaled_dot_product_efficient_attention_base_strategies(op_schema)
+    )
+
+    # Add Context Parallelism strategy
+    has_attn_bias = op_schema.args_schema[3] is not None
+
+    cp_strategy: PlacementList = [
+        Shard(SEQ_DIM),  # output
+        Shard(SEQ_DIM),  # logsumexp
+        None,  # philox_seed
+        None,  # philox_offset
+        Shard(SEQ_DIM),  # q
+        Shard(SEQ_DIM),  # k
+        Shard(SEQ_DIM),  # v
+    ]
+    if has_attn_bias:
+        cp_strategy.append(Replicate())  # attn bias - not sharded for CP
+    single_mesh_dim_strategies.append(cp_strategy)
+
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_mesh_dim_strategies, input_index=4
+    )
+
+
+def _scaled_dot_product_efficient_attention_backward_cp_strategy(
+    op_schema: OpSchema,
+) -> OpStrategy:
+    """
+    Strategy for efficient attention backward with Context Parallelism support.
+    """
+    from torch.distributed.tensor._ops._matrix_ops import (
+        _scaled_dot_product_efficient_attention_backward_base_strategies,
+    )
+
+    mesh = op_schema.get_mesh_from_args(validate=False)
+    single_mesh_dim_strategies = (
+        _scaled_dot_product_efficient_attention_backward_base_strategies(op_schema)
+    )
+
+    has_attn_bias = op_schema.args_schema[4] is not None
+
+    # Context Parallelism: shards on the sequence dim
+    cp_strategy: PlacementList = [
+        Shard(SEQ_DIM),  # grad_q
+        Shard(SEQ_DIM),  # grad_k
+        Shard(SEQ_DIM),  # grad_v
+        Shard(1) if has_attn_bias else None,  # grad_bias
+        Shard(SEQ_DIM),  # grad_output
+        Shard(SEQ_DIM),  # q
+        Shard(SEQ_DIM),  # k
+        Shard(SEQ_DIM),  # v
+        Shard(SEQ_DIM),  # output
+        Shard(SEQ_DIM),  # logsumexp
+    ]
+    if has_attn_bias:
+        cp_strategy.insert(8, Shard(1))  # attn_bias input
+    cp_strategy.extend([Replicate(), Replicate()])
+    single_mesh_dim_strategies.append(cp_strategy)
+
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_mesh_dim_strategies, input_index=4
+    )
+
+
+# ==================== cuDNN Attention Strategies ====================
+
+
+def _scaled_dot_product_cudnn_attention_cp_strategy(op_schema: OpSchema) -> OpStrategy:
+    """
+    Strategy for cudnn attention forward with Context Parallelism support.
+    """
+    from torch.distributed.tensor._ops._matrix_ops import (
+        _scaled_dot_product_cudnn_attention_base_strategies,
+    )
+
+    mesh = op_schema.get_mesh_from_args()
+    single_mesh_dim_strategies = _scaled_dot_product_cudnn_attention_base_strategies(
+        op_schema
+    )
+
+    (
+        query_strategy,
+        _,
+        _,
+        attn_bias_strategy,
+        compute_log_sumexp,
+        *rest_args,
+    ) = op_schema.args_schema
+    return_debug_mask = len(op_schema.args_schema) >= 8 and rest_args[2]
+    has_attn_bias = attn_bias_strategy is not None
+
+    # Context Parallelism: shards on the sequence dim
+    logsumexp_sharding = Shard(SEQ_DIM) if compute_log_sumexp else Replicate()
+    debug_attn_mask_sharding = Shard(SEQ_DIM) if return_debug_mask else None
+
+    cp_strategy: PlacementList = [
+        Shard(SEQ_DIM),  # output
+        logsumexp_sharding,  # logsumexp
+        None,  # cum_seq_q
+        None,  # cum_seq_k
+        None,  # max_q
+        None,  # max_k
+        None,  # philox_seed
+        None,  # philox_offset
+        debug_attn_mask_sharding,  # debug_attn_mask
+        Shard(SEQ_DIM),  # q
+        Shard(SEQ_DIM),  # k
+        Shard(SEQ_DIM),  # v
+    ]
+    if has_attn_bias:
+        cp_strategy.append(Replicate())  # attn_bias - not sharded for CP
+    single_mesh_dim_strategies.append(cp_strategy)
+
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_mesh_dim_strategies, input_index=9
+    )
+
+
+def _scaled_dot_product_cudnn_attention_backward_cp_strategy(
+    op_schema: OpSchema,
+) -> OpStrategy:
+    """
+    Strategy for cudnn attention backward with Context Parallelism support.
+    """
+    from torch.distributed.tensor._ops._matrix_ops import (
+        _scaled_dot_product_cudnn_attention_backward_base_strategies,
+    )
+
+    mesh = op_schema.get_mesh_from_args(validate=False)
+    single_mesh_dim_strategies = (
+        _scaled_dot_product_cudnn_attention_backward_base_strategies(op_schema)
+    )
+
+    has_attn_bias = op_schema.args_schema[8] is not None
+    has_scale = len(op_schema.args_schema) >= 16 and False
+
+    # Context Parallelism: shards on the sequence dim
+    cp_sharding_gout: PlacementList = [Shard(SEQ_DIM)] * 3  # grad_q, grad_k, grad_v
+    cp_sharding_ginp: PlacementList = [
+        Shard(SEQ_DIM)
+    ] * 6  # grad_output, q, k, v, output, logsumexp
+    cp_sharding_ginp += [Replicate()] * 2  # philox_seed, philox_offset
+    cp_sharding_ginp += [Shard(SEQ_DIM) if has_attn_bias else None]  # attn_bias
+    cp_sharding_ginp += [
+        None
+    ] * 6  # cum_seq_q, cum_seq_k, max_q, max_k, dropout_p, is_causal
+    if has_scale:
+        cp_sharding_ginp.append(None)
+
+    cp_sharding = cp_sharding_gout + cp_sharding_ginp
+    single_mesh_dim_strategies.append(cp_sharding)
+
+    return expand_to_full_mesh_op_strategy(
+        mesh, op_schema, single_mesh_dim_strategies, input_index=3
+    )
+
+
+# Store context managers and original strategies
+_cp_strategy_contexts = {}
+_original_strategies = {}
+
+
+def register_cp_sharding_rules():
+    """Register Context Parallelism sharding rules for all scaled_dot_product ops."""
+    global _cp_strategy_contexts, _original_strategies
+
+    # If already registered, don't register again
+    if _cp_strategy_contexts:
+        return
+
+    # Define ops and their corresponding CP strategy functions
+    cp_strategies = [
+        (
+            aten._scaled_dot_product_flash_attention.default,
+            _scaled_dot_product_flash_attention_cp_strategy,
+            RuntimeSchemaInfo(5),
+        ),
+        (
+            aten._scaled_dot_product_flash_attention_backward.default,
+            _scaled_dot_product_flash_attention_backward_cp_strategy,
+            None,
+        ),
+        (
+            aten._scaled_dot_product_efficient_attention.default,
+            _scaled_dot_product_efficient_attention_cp_strategy,
+            RuntimeSchemaInfo(4),
+        ),
+        (
+            aten._scaled_dot_product_efficient_attention_backward.default,
+            _scaled_dot_product_efficient_attention_backward_cp_strategy,
+            None,
+        ),
+        (
+            aten._scaled_dot_product_cudnn_attention.default,
+            _scaled_dot_product_cudnn_attention_cp_strategy,
+            RuntimeSchemaInfo(4),
+        ),
+        (
+            aten._scaled_dot_product_cudnn_attention_backward.default,
+            _scaled_dot_product_cudnn_attention_backward_cp_strategy,
+            None,
+        ),
+    ]
+
+    # Register each strategy
+    for op_overload, strategy_func, schema_info in cp_strategies:
+        ctx = _op_strategy_context(op_overload, strategy_func, schema_info)
+        orig_funcs, orig_schema = ctx.__enter__()
+        _cp_strategy_contexts[op_overload] = ctx
+        _original_strategies[op_overload] = (orig_funcs, orig_schema)
+
+
+def unregister_cp_sharding_rules(clear_the_cache=False):
+    """Unregister Context Parallelism sharding rules and restore original strategies."""
+    global _cp_strategy_contexts, _original_strategies
+
+    # Exit all context managers
+    for ctx in _cp_strategy_contexts.values():
+        ctx.__exit__(None, None, None)
+
+    if clear_the_cache:
+        _clear_fast_path_sharding_prop_cache()
+        _clear_python_sharding_prop_cache()
+
+    _cp_strategy_contexts = {}
+    _original_strategies = {}
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e4881de43874ab238b1cfbe6003c9a8751f0c3b
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+from torch.distributed.tensor.parallel.api import parallelize_module
+from torch.distributed.tensor.parallel.loss import loss_parallel
+from torch.distributed.tensor.parallel.style import (
+    ColwiseParallel,
+    ParallelStyle,
+    PrepareModuleInput,
+    PrepareModuleInputOutput,
+    PrepareModuleOutput,
+    RowwiseParallel,
+    SequenceParallel,
+)
+
+
+__all__ = [
+    "ColwiseParallel",
+    "ParallelStyle",
+    "PrepareModuleInput",
+    "PrepareModuleInputOutput",
+    "PrepareModuleOutput",
+    "RowwiseParallel",
+    "SequenceParallel",
+    "parallelize_module",
+    "loss_parallel",
+]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c30bea5d036c6625795f44b736308a6fe3a94c81
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/_data_parallel_utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/_data_parallel_utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81cd9b3a8d8869e8ebb580a643950c6be2027880
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/_data_parallel_utils.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/api.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/api.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..08f471e9bf2aadd49816e604474294e06d04ac93
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/api.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/ddp.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/ddp.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8360efd1d40d67f4be0431d1c30175fd0b1cd2d9
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/ddp.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/fsdp.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/fsdp.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7447214be8ad41a773264021b015393c5269daab
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/fsdp.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/input_reshard.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/input_reshard.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8c34f6f3185965d66117ac181ae3cecedae6519d
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/input_reshard.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/loss.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/loss.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5647ccd121ea7755d3529c52dc2897ab234ec494
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/loss.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/style.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/style.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..176061b13aec318f715336964b6b26869608db73
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/__pycache__/style.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/_data_parallel_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/_data_parallel_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..735b74e099478ebc606d68b3099f721c59874297
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/_data_parallel_utils.py
@@ -0,0 +1,51 @@
+from functools import partial
+from typing import no_type_check
+
+import torch
+from torch.distributed._functional_collectives import AsyncCollectiveTensor
+from torch.distributed.tensor import DTensor
+from torch.distributed.tensor._dtensor_spec import DTensorSpec
+
+
+@no_type_check
+def sync_grad_hook(grad, *, device_handle=None, compute_stream=None):
+    if isinstance(grad, AsyncCollectiveTensor):
+        if compute_stream is not None:
+            with device_handle.stream(compute_stream):
+                grad = grad.wait()
+        else:
+            grad = grad.wait()
+
+    return grad
+
+
+def _flatten_tensor(
+    tensor: torch.Tensor,
+) -> tuple[torch.Tensor, DTensorSpec | None]:
+    if isinstance(tensor, DTensor):
+        tensor._local_tensor.requires_grad_()
+        return tensor._local_tensor, tensor._spec
+    return tensor, None
+
+
+@no_type_check
+def _unflatten_tensor(tensor, spec, *, device_handle=None, compute_stream=None):
+    # unflatten would mainly be called every time FSDP allgather parameters.
+    result = DTensor.from_local(
+        tensor,
+        spec.mesh,
+        spec.placements,
+        run_check=False,
+        shape=spec.shape,
+        stride=spec.stride,
+    )
+    if tensor.requires_grad:
+        # only register the hook if the tensor requires grad
+        tensor.register_hook(
+            partial(
+                sync_grad_hook,
+                device_handle=device_handle,
+                compute_stream=compute_stream,
+            )
+        )
+    return result
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/api.py
new file mode 100644
index 0000000000000000000000000000000000000000..954b62327808d13da7d56923efe35600085eee1e
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/api.py
@@ -0,0 +1,142 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+import warnings
+from fnmatch import fnmatch
+
+import torch
+import torch.nn as nn
+from torch.distributed.device_mesh import _mesh_resources, DeviceMesh
+from torch.distributed.tensor.parallel.style import ParallelStyle
+
+
+__all__ = ["parallelize_module"]
+
+
+def parallelize_module(  # type: ignore[return]
+    module: nn.Module,
+    device_mesh: DeviceMesh | None = None,
+    parallelize_plan: ParallelStyle | dict[str, ParallelStyle] | None = None,
+    *,
+    src_data_rank: int | None = 0,
+) -> nn.Module:
+    """
+    Apply Tensor Parallelism in PyTorch by parallelizing modules or sub-modules based on a user-specified plan.
+
+    We parallelize module or sub_modules based on a parallelize_plan. The parallelize_plan contains
+    :class:`ParallelStyle`, which indicates how user wants the module or sub_module
+    to be parallelized.
+
+    User can also specify different parallel style per module fully qualified name (FQN).
+
+    Note that ``parallelize_module`` only accepts a 1-D :class:`DeviceMesh`, if you have a 2-D or N-D :class:`DeviceMesh`,
+    slice the DeviceMesh to a 1-D sub DeviceMesh first then pass to this API(i.e. ``device_mesh[\"tp\"]``)
+
+    Args:
+        module (:class:`nn.Module`):
+            Module to be parallelized.
+        device_mesh (:class:`DeviceMesh`, optional):
+            Object which describes the mesh topology of devices for the DTensor.
+            If not specified, the call must be under a DeviceMesh context.
+        parallelize_plan (Union[:class:`ParallelStyle`, Dict[str, :class:`ParallelStyle`]], optional):
+            The plan used to parallelize the module. It can be either a
+            :class:`ParallelStyle` object which contains how we prepare
+            input/output for Tensor Parallelism or it can be a dict of module
+            FQN and its corresponding :class:`ParallelStyle` object. If not
+            specified, the call will do nothing at the moment.
+    Keyword args:
+        src_data_rank (int, optional): the rank of the source data for the logical/global tensor, it is used by
+            :meth:`distribute_tensor` to scatter/broadcast the shards/replicas to other ranks. By default,
+            we use ``group_rank=0`` on each DeviceMesh dimension as the source data to preserve the single-device
+            semantic. If passing ``None`` explicitly, :meth:`parallelize_module` simply uses its local data instead
+            of trying to preserve the single-device semantic via scatter/broadcast. Default: 0
+    Return:
+        A :class:`nn.Module` object parallelized.
+
+    Example::
+        >>> # xdoctest: +SKIP("distributed")
+        >>> from torch.distributed.tensor.parallel import parallelize_module, ColwiseParallel
+        >>> from torch.distributed.device_mesh import init_device_mesh
+        >>>
+        >>> # Define the module.
+        >>> m = Model(...)
+        >>> tp_mesh = init_device_mesh("cuda", (8,))
+        >>> m = parallelize_module(m, tp_mesh, {"w1": ColwiseParallel(), "w2": RowwiseParallel()})
+        >>>
+
+    .. note:: For complex module architecture like Attention, MLP layers, we recommend composing
+        different ParallelStyles together (i.e. ``ColwiseParallel`` and ``RowwiseParallel``) and pass
+        as a parallelize_plan, to achieves the desired sharding computation.
+    """
+    torch._C._log_api_usage_once("torch.distributed.tensor.parallel.parallelize_module")
+
+    device_mesh = device_mesh or _mesh_resources.get_current_mesh()
+
+    if parallelize_plan is None:
+        warnings.warn(
+            "No parallelize_plan is provided and auto-parallel is not supported "
+            "at the moment, so this parallelize_module call will do nothing.",
+            stacklevel=2,
+        )
+        return module
+
+    # note: The RNG tracker will be initialized in distribute_tensor() call if it hasn't
+    # been initialized.
+
+    if isinstance(parallelize_plan, ParallelStyle):
+        parallelize_plan.src_data_rank = src_data_rank
+        return parallelize_plan._apply(module, device_mesh)
+    elif isinstance(parallelize_plan, dict):
+        for module_path, parallelize_style in parallelize_plan.items():
+            if module_path == "":
+                # shortcut: empty string means to apply the plan to the current module
+                parallelize_module(module, device_mesh, parallelize_style)
+                continue
+
+            path_splits = module_path.split(".")
+            # Instead of blindly popping tokens, first check the match,
+            # we only consume/pop the token if we found a match.
+            token = path_splits[0]
+
+            matched_children = list(
+                filter(
+                    # `t[0]` is child name
+                    lambda t: fnmatch(t[0], token),
+                    module.named_children(),
+                )
+            )
+            if not matched_children:
+                # No match at this level. Log a warning and process next plan entry.
+                warnings.warn(
+                    f"Parallelize plan key '{module_path}' could not be resolved: "
+                    f"no submodule matching token '{token}' in module {module}, "
+                    f"skipping this plan entry.",
+                    stacklevel=2,
+                )
+                continue
+
+            # Now that we have a match, we can consume the token.
+            path_splits.pop(0)
+            # apply the plan to all matched submodules
+            for _, submodule in matched_children:
+                if path_splits:
+                    # we haven't reached the leaf, apply in dict style
+                    leaf_path = ".".join(path_splits)  # rest of the path after `token`
+                    parallelize_module(
+                        submodule,
+                        device_mesh,
+                        {leaf_path: parallelize_style},
+                        src_data_rank=src_data_rank,
+                    )
+                else:
+                    # otherwise, directly apply style to this submodule
+                    parallelize_module(
+                        submodule,
+                        device_mesh,
+                        parallelize_style,
+                        src_data_rank=src_data_rank,
+                    )
+        return module
+    else:
+        raise TypeError(  # pyre-ignore[7]
+            "Expect Union[ParallelStyle, Dict[str, ParallelStyle]] for"
+            f" parallelize_plan, {type(parallelize_plan)} found!"
+        )
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/ddp.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/ddp.py
new file mode 100644
index 0000000000000000000000000000000000000000..19c1d3ca5477ee79f418fd3d2de71eac4103c1e4
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/ddp.py
@@ -0,0 +1,104 @@
+# mypy: allow-untyped-defs
+from typing import Any
+
+import torch.nn as nn
+from torch.distributed.tensor.parallel._data_parallel_utils import (
+    _flatten_tensor,
+    _unflatten_tensor,
+)
+
+
+__all__ = []  # type: ignore[var-annotated]
+
+
+def _get_submodule_n_params(module: nn.Module, path: str):
+    """
+    Get submodule and the direct path of parameter from the module
+    """
+    if "." in path:
+        path_list = path.split(".")
+        parent_module_path = ".".join(path_list[:-1])
+        module = module.get_submodule(parent_module_path)
+        path = path_list[-1]
+    return module, path
+
+
+def _update_module_param(param_list: list[tuple[nn.Module, str, nn.Parameter]]):
+    """
+    Update parameters within the module
+    """
+    for item in param_list:
+        parent_module, module_path, t = item
+        assert hasattr(parent_module, module_path)
+        delattr(parent_module, module_path)
+        setattr(parent_module, module_path, t)
+
+
+def _reconstruct_dtensor(module: nn.Module, _input: Any):
+    """
+    Reconstruct DTensor parameters from local tensors
+    """
+    param_list = []
+    # TODO: To add perf optimizations to this iterations
+    for name, t in module.named_parameters():
+        if hasattr(t, "_st_info"):
+            dtensor = _unflatten_tensor(t, t._st_info)
+            param_list.append((*_get_submodule_n_params(module, name), dtensor))
+    _update_module_param(param_list)  # type: ignore[arg-type]
+
+
+def _localize_dtensor(
+    module: nn.Module, *_: Any, ignored_params: set[nn.Parameter] | None = None
+):
+    """
+    Convert DTensor parameters to local tensors
+    """
+    if ignored_params is None:
+        ignored_params = set()
+    param_list = []
+    for name, param in module.named_parameters():
+        if param in ignored_params:
+            continue
+        t, sharding_info = _flatten_tensor(param)
+        if sharding_info is not None:
+            t = nn.Parameter(t)
+            t._st_info = sharding_info  # type: ignore[attr-defined]
+            param_list.append((*_get_submodule_n_params(module, name), t))
+    _update_module_param(param_list)  # type: ignore[arg-type]
+
+
+def _pre_dp_module_transform(module: nn.Module):
+    """
+    Enable the composability between Tensor Parallelism (TP) and Data
+    Parallelism(DP) in PyTorch when using DDP. We need to convert Parameters which
+    are DTensors to local tensors before wrapping with data parallelism API.
+    We then register two hooks, one for converting local tensors back to DTensor
+    preforward and one to convert DTensors back to tensors after Forward. By
+    integrating this way, we avoid any special handling of DTensor parameters by DDP
+    and get DTensor's gradients propagated back to DP, e.g. gradient buckets of DDP.
+
+    For now, this API only works with ``DistributedDataParallel``. It will later support
+    other DP methods such as FSDP.
+
+    Args:
+        module (:class:`nn.Module`):
+            Module which has been applied TP on.
+
+    Example::
+        >>> # xdoctest: +SKIP("distributed")
+        >>> from torch.distributed.tensor.parallel import parallelize_module, PairwiseParallel
+        >>> from torch.nn.parallel import DistributedDataParallel as DDP
+        >>> from torch.distributed.tensor.parallel.ddp import pre_dp_module_transform
+        >>>
+        >>> # Define the module.
+        >>> m = module(...)
+        >>> parallelize_module(m, PairwiseParallel())
+        >>> m = pre_dp_module_transform(m)
+        >>> m = DDP(m)
+        >>>
+    """
+
+    _localize_dtensor(module, None, None)
+    # TODO: To add test cases and ensure that it works for nested modules
+    module.register_forward_pre_hook(_reconstruct_dtensor)
+    module.register_forward_hook(_localize_dtensor)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/fsdp.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/fsdp.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e68ed6b1dba50f35981f3b633f089e852e57f7c
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/fsdp.py
@@ -0,0 +1,391 @@
+# mypy: allow-untyped-defs
+import copy
+from typing import Any, cast
+
+import torch
+import torch.distributed as dist
+import torch.distributed._shard.sharding_spec as shard_spec
+import torch.distributed.distributed_c10d as c10d
+from torch.distributed._shard.sharded_tensor import (
+    Shard,
+    ShardedTensor,
+    ShardedTensorMetadata,
+    TensorProperties,
+)
+from torch.distributed._shard.sharding_spec import ShardMetadata
+from torch.distributed._shard.sharding_spec.chunk_sharding_spec import ChunkShardingSpec
+from torch.distributed.fsdp._common_utils import _set_fsdp_flattened
+from torch.distributed.fsdp._fsdp_extensions import FSDPExtensions
+from torch.distributed.fsdp._shard_utils import _create_chunk_sharded_tensor
+from torch.distributed.remote_device import _remote_device
+from torch.distributed.tensor import DeviceMesh, DTensor, Replicate, Shard as DShard
+from torch.distributed.tensor.parallel._data_parallel_utils import (
+    _flatten_tensor,
+    _unflatten_tensor,
+)
+
+
+__all__ = ["DTensorExtensions"]
+
+
+def _get_box(tensor: DTensor) -> tuple[torch.Size, torch.Size]:
+    device_mesh = tensor.device_mesh
+    assert device_mesh.ndim == 1, "Only 1D DeviceMeshes currently handled"
+
+    placement = tensor.placements[0]
+    offsets = [0] * len(tensor.size())
+    num_chunks = device_mesh.size(mesh_dim=0)
+
+    if tensor.placements[0].is_shard():
+        shard_dim = cast(DShard, placement).dim
+        chunk_size = tensor.size(shard_dim) // num_chunks
+        offsets[shard_dim] = chunk_size
+
+    return (torch.Size(offsets), tensor._local_tensor.size())
+
+
+def _get_box_for(tensor: DTensor, idx: int) -> tuple[torch.Size, torch.Size]:
+    offsets, size = _get_box(tensor)
+    return (torch.Size([val * idx for val in offsets]), size)
+
+
+def _get_local_box(tensor: DTensor) -> tuple[torch.Size, torch.Size]:
+    device_mesh = tensor.device_mesh
+    coord = device_mesh.get_coordinate()
+    assert coord is not None
+    return _get_box_for(tensor, coord[0])
+
+
+def _create_shard_md_from_dt(dt: DTensor, current_rank: int) -> ShardMetadata:
+    mesh = dt.device_mesh
+    assert mesh.ndim == 1, "Only 1D DeviceMeshes currently handled"
+
+    offsets, sizes = _get_local_box(dt)
+    return ShardMetadata(
+        shard_offsets=list(offsets),
+        shard_sizes=list(sizes),
+        placement=f"rank:{current_rank}/{dt._local_tensor.device}",
+    )
+
+
+def _create_sharded_tensor_md_from_dt(
+    dt: DTensor, dt_pg: c10d.ProcessGroup
+) -> ShardedTensorMetadata:
+    # This is where it gets tricky, we have to produce a ShardedTensor that has full coverage
+    # and yet has only one valid shard for the current rank.
+
+    shards_md = []
+    my_rank = dist.get_rank(dt_pg)
+    scapegoat_rank = 0 if my_rank > 0 else 1
+
+    if dt.placements[0].is_shard():
+        shard_count = dt_pg.size()
+    else:
+        shard_count = 1
+
+    for i in range(shard_count):
+        offsets, sizes = _get_box_for(dt, i)
+        shards_md.append(
+            ShardMetadata(
+                shard_offsets=list(offsets),
+                shard_sizes=list(sizes),
+                placement=(
+                    f"rank:{scapegoat_rank if i > 0 else my_rank}/{dt._local_tensor.device}"
+                ),
+            )
+        )
+
+    return ShardedTensorMetadata(
+        shards_metadata=shards_md,
+        size=dt.size(),
+        tensor_properties=TensorProperties(
+            dtype=dt.dtype,
+            layout=dt.layout,
+            requires_grad=dt.requires_grad,
+            # ignore memory_format and pin_memory as those are not supported by DT
+        ),
+    )
+
+
+def _get_dt_pg(dt: DTensor) -> c10d.ProcessGroup:
+    mesh = dt.device_mesh
+    assert mesh.ndim == 1, "Only 1D DeviceMeshes currently handled"
+    return mesh.get_group()
+
+
+def _rewrite_spec_if_needed(
+    spec: shard_spec.ShardingSpec, tensor: torch.Tensor, rank: int
+) -> shard_spec.ShardingSpec:
+    """
+    Rewrite ``spec`` to match the device of ``tensor``.
+
+    FSDP.sharded_optim_state_dict sneakly ships optimizer state to CPU so if the original ShardingSpec
+    produces CUDA metadata, ST construction bombs.
+    """
+    if not isinstance(spec, ChunkShardingSpec):
+        return spec
+
+    # let's see if we need
+    rewrite = False
+    for p in spec.placements:
+        p = cast(_remote_device, p)
+        if p.rank() == rank and p.device() != tensor.device:
+            rewrite = True
+            break
+    if rewrite:
+        spec = copy.deepcopy(spec)
+        # pyrefly: ignore [missing-attribute]
+        for i, placement in enumerate(spec.placements):
+            placement = cast(_remote_device, placement)
+            if placement.rank() == rank and placement.device() != tensor.device:
+                # pyrefly: ignore [missing-attribute]
+                spec.placements[i] = _remote_device(f"rank:{rank}/{tensor.device}")
+
+    return spec
+
+
+def _chunk_tensor(
+    tensor: torch.Tensor,
+    rank: int,
+    world_size: int,
+    num_devices_per_node: int,
+    pg: dist.ProcessGroup,
+) -> torch.Tensor:
+    if type(tensor) is ShardedTensor:
+        assert len(tensor.local_shards()) == 1
+
+        inner_param = tensor.local_tensor()
+        inner_st = _create_chunk_sharded_tensor(
+            inner_param,
+            rank,
+            world_size,
+            num_devices_per_node,
+            pg,
+        )
+
+        outer_local_shard = tensor.local_shards()[0]
+        shards: list[Shard] = [
+            Shard(inner_st, copy.deepcopy(outer_local_shard.metadata))
+        ]
+        st_meta = copy.deepcopy(tensor.metadata())
+        st_meta.tensor_properties.requires_grad = False
+
+        st_outer = ShardedTensor._init_from_local_shards_and_global_metadata(
+            shards,
+            sharded_tensor_metadata=st_meta,
+            process_group=tensor._process_group,
+            init_rrefs=False,
+        )
+        return st_outer
+    elif type(tensor) is DTensor:
+        device_mesh = tensor.device_mesh
+        assert device_mesh.ndim == 1, "Only 1D DeviceMeshes currently handled"
+
+        inner_param = tensor._local_tensor
+
+        inner_st = _create_chunk_sharded_tensor(
+            inner_param,
+            rank,
+            world_size,
+            torch.accelerator.device_count(),
+            pg,
+        )
+
+        dt_pg = _get_dt_pg(tensor)
+        # We do this differently here, we create a ST with no local shards then patch it
+        shards = [
+            Shard(inner_st, _create_shard_md_from_dt(tensor, dist.get_rank(dt_pg)))
+        ]
+
+        st_meta = _create_sharded_tensor_md_from_dt(tensor, dt_pg)
+        st_meta.tensor_properties.requires_grad = False
+
+        st_outer = ShardedTensor._init_from_local_shards_and_global_metadata(
+            shards,
+            sharded_tensor_metadata=st_meta,
+            process_group=dt_pg,
+            init_rrefs=False,
+        )
+
+        return st_outer
+    else:
+        return _create_chunk_sharded_tensor(
+            tensor,
+            rank,
+            world_size,
+            num_devices_per_node,
+            pg,
+        )
+
+
+def _chunk_dtensor(
+    tensor: torch.Tensor,
+    rank: int,
+    device_mesh: DeviceMesh,
+) -> DTensor:
+    """
+    Shard a tensor to chunks along the first dimension.
+
+    The local rank will gets its corresponding chunk as the local tensor to create a DTensor.
+    """
+    root_mesh = device_mesh._get_root_mesh() if device_mesh is not None else None
+    if root_mesh is None:
+        raise RuntimeError("No parent device_mesh is found for FSDP device_mesh.")
+    if root_mesh.ndim < 2:
+        raise RuntimeError(
+            f"Found parent device_mesh of ndim={root_mesh.ndim},",
+            "but meshes must be at least 2D.",
+        )
+
+    # We need to explicitly call .detach() to return a new tensor detached from the current graph.
+    tensor = tensor.detach().clone()
+
+    # When a layer is not involved in TP, then the tensor will not be a DTensor.
+    # e.g. When a layer is not sppecified in the parallelize_plan, TP will have no effect on the layer.
+    # e.g. When you do PairwiseParallel on a 3 layer model, TP will have no effect on the third layer.
+    if isinstance(tensor, torch.Tensor) and not isinstance(tensor, DTensor):
+        # For tensors, it is replicated across tp dimension and sharded across FSDP dimension.
+        # TP is the inner dimension and FSDP is the outer dimension.
+        # Therefore, shard placements for tensor is (Shard(0), Replicate()).
+        replicate_placements = [Replicate() for _ in range(root_mesh.ndim)]
+        shard_placements = [Replicate() for _ in range(root_mesh.ndim)]
+        shard_placements[0] = DShard(0)  # type: ignore[call-overload]
+
+        return DTensor.from_local(
+            tensor, root_mesh, replicate_placements, run_check=False
+        ).redistribute(
+            device_mesh=root_mesh,
+            placements=shard_placements,
+        )
+
+    else:
+        tp_placements = tensor.placements
+        tp_placement = tp_placements[0]
+
+        tensor = tensor.to_local()
+
+        # For DTensors, it is sharded across tp dimension first and then sharded across FSDP dimension.
+        # TP is the inner dimension and FSDP is the outer dimension.
+        # Therefore, shard placements for tensor is (Shard(0), tp_placement).
+        # For higher dimensional meshes, it is replicated across other dimensions. For example, with
+        # HSDP the shard placements for tensor is (Replicate, Shard(0), tp_placement).
+        replicate_placements = [Replicate() for _ in range(root_mesh.ndim)]
+        replicate_placements[-1] = tp_placement  # type: ignore[call-overload]
+        shard_placements = [Replicate() for i in range(root_mesh.ndim)]  # type: ignore[misc]
+        shard_placements[-2] = DShard(0)  # type: ignore[call-overload]
+        shard_placements[-1] = tp_placement  # type: ignore[call-overload]
+
+        return DTensor.from_local(
+            tensor, root_mesh, replicate_placements, run_check=False
+        ).redistribute(
+            device_mesh=root_mesh,
+            placements=shard_placements,
+        )
+
+
+def _pre_load_state_dict(
+    tensor: torch.Tensor,
+) -> tuple[torch.Tensor, list[Shard]]:
+    shards = cast(ShardedTensor, tensor).local_shards()
+    if len(shards) == 1 and type(shards[0].tensor) is ShardedTensor:
+        inner_tensor = shards[0].tensor
+        shards = inner_tensor.local_shards()  # pyre-ignore[16]
+        tensor = inner_tensor
+
+    return (tensor, shards if len(shards) > 0 else [])
+
+
+def _all_gather_dtensor(
+    tensor: DTensor,
+    parent_mesh: DeviceMesh | None,
+) -> torch.Tensor:
+    """All gather a DTensor in its FSDP dimension and return the local tensor."""
+    assert parent_mesh == tensor.device_mesh
+
+    placements = list(copy.deepcopy(tensor.placements))
+    # FSDP + TP: [Shard(0), tp_placement] -> [Replicate(), tp_placement]
+    # HSDP + TP: [Replicate(), Shard(0), tp_placement] -> [Replicate(), Replicate(), tp_placement]
+    for i in range(len(placements) - 1):
+        placements[i] = Replicate()
+    tensor = tensor.redistribute(
+        device_mesh=tensor.device_mesh,
+        placements=placements,
+    )
+
+    return tensor.to_local()
+
+
+class DTensorExtensions(FSDPExtensions):
+    """
+    DTensorExtension is the TensorFlattener extension needed for 2D FSDP + TP.
+
+    This is the implementation for FSDPExtensions defined in
+    https://github.com/pytorch/pytorch/blob/main/torch/distributed/fsdp/_fsdp_extensions.py
+    """
+
+    def __init__(self, device_handle) -> None:
+        super().__init__()
+        self.compute_stream = None
+        self.device_handle = device_handle
+        # we have to use the dynamo disable this way to disable dynamo as the decorator way would
+        # trigger build failure with torch deploy...
+        self.post_unflatten_transform = torch._dynamo.disable(  # type: ignore[method-assign]
+            self.post_unflatten_transform
+        )
+
+    def pre_flatten_transform(
+        self,
+        tensor: torch.Tensor,
+    ) -> tuple[torch.Tensor, Any | None]:
+        return _flatten_tensor(tensor)
+
+    def post_unflatten_transform(
+        self, tensor: torch.Tensor, param_extension: Any
+    ) -> torch.Tensor:
+        stream = self.compute_stream or self.device_handle.current_stream()
+        with self.device_handle.stream(stream):
+            # runtime we put the unflattened tensor call on the compute stream since
+            # the unflattened tensor might contain computations in fwd/bwd where we
+            # need to sync properly.
+            # TODO: this is a short term fix and we should make the get_unflat_views
+            # directly happen in the compute stream.
+            result = _unflatten_tensor(
+                tensor,
+                param_extension,
+                device_handle=self.device_handle,
+                compute_stream=self.compute_stream,
+            )
+            _set_fsdp_flattened(result)
+            return result
+
+    def chunk_tensor(
+        self,
+        tensor: torch.Tensor,
+        rank: int,
+        world_size: int,
+        num_devices_per_node: int,
+        pg: dist.ProcessGroup,
+        device: torch.device | None = None,
+    ) -> torch.Tensor:
+        return _chunk_tensor(tensor, rank, world_size, num_devices_per_node, pg)
+
+    def chunk_dtensor(
+        self,
+        tensor: torch.Tensor,
+        rank: int,
+        device_mesh: DeviceMesh,
+    ) -> torch.Tensor:
+        return _chunk_dtensor(tensor, rank, device_mesh)
+
+    def pre_load_state_dict_transform(
+        self,
+        tensor: torch.Tensor,
+    ) -> tuple[torch.Tensor, list[Shard]]:
+        return _pre_load_state_dict(tensor)
+
+    def all_gather_dtensor(
+        self,
+        tensor: DTensor,
+        parent_mesh: DeviceMesh | None,
+    ) -> torch.Tensor:
+        return _all_gather_dtensor(tensor, parent_mesh)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/input_reshard.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/input_reshard.py
new file mode 100644
index 0000000000000000000000000000000000000000..81e25621e040abd767e033ee2efec56e466262b6
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/input_reshard.py
@@ -0,0 +1,106 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates
+from functools import partial
+from typing import Any
+
+import torch
+from torch.distributed.tensor import DeviceMesh, DTensor, Replicate, Shard
+
+
+__all__ = [
+    "input_reshard",
+]
+
+
+def input_reshard(
+    module: torch.nn.Module,
+    tp_device_mesh: DeviceMesh,
+    input_reshard_dim: int | None = None,
+) -> torch.nn.Module:
+    """
+    Register hooks to an nn.Module for input resharding, enabling sharding and restoration during backward computation.
+
+    Register hooks to an nn.Module with input resharding so that we can shard
+    per the given `tp_device_mesh` and `input_reshard_dim` and restore the
+    input back when recomputing the activations in the backward. The reason
+    why we can do this is that for Tensor Parallel(TP), the input are same
+    across all TP ranks.
+
+    Args:
+        module (:class:`nn.Module`):
+            Module to be registered with input resharding.
+        tp_device_mesh (:class:`DeviceMesh`):
+            Object which describes the mesh topology
+            of devices for Tensor Parallel.
+        input_reshard_dim (Optional[int]):
+            The dimension of where we perform the sharding
+            of input. If set None, there is no sharding of input.
+            Default: None
+
+    Return:
+        A :class:`nn.Module` object registered with TP input resharding.
+    """
+    if input_reshard_dim is None:
+        return module
+
+    cx: torch.autograd.graph.saved_tensors_hooks | None = None
+
+    def input_reshard_forward_pre_hook(_: torch.nn.Module, _i: tuple[Any, ...]) -> None:
+        saved_tensor_hooks = torch.autograd.graph.saved_tensors_hooks(
+            partial(_pack_hook_tp, tp_device_mesh, input_reshard_dim),
+            partial(_unpack_hook_tp, tp_device_mesh, input_reshard_dim),
+        )
+        saved_tensor_hooks.__enter__()
+        nonlocal cx
+        cx = saved_tensor_hooks  # type: ignore[name-defined]
+
+    def input_reshard_backward_hook(
+        _: torch.nn.Module, _i: tuple[Any, ...], _o: Any
+    ) -> Any:
+        nonlocal cx
+        cx.__exit__()  # type: ignore[name-defined, union-attr]
+
+    module.register_forward_pre_hook(input_reshard_forward_pre_hook)
+    module.register_forward_hook(input_reshard_backward_hook)
+    return module
+
+
+def _pack_hook_tp(mesh: DeviceMesh, input_reshard_dim: int, x: torch.Tensor) -> Any:  # noqa: D401
+    """Hook function called after FWD to shard input."""
+    if isinstance(x, DTensor) and all(p.is_replicate() for p in x._spec.placements):
+        return x.redistribute(device_mesh=mesh, placements=[Shard(input_reshard_dim)])
+    elif (
+        not isinstance(x, DTensor)
+        and isinstance(x, torch.Tensor)
+        and x.numel() >= mesh.size()
+    ):
+        return (
+            DTensor.from_local(x, device_mesh=mesh)
+            .redistribute(device_mesh=mesh, placements=[Shard(input_reshard_dim)])
+            .to_local()
+        )
+    else:
+        return x
+
+
+def _unpack_hook_tp(mesh: DeviceMesh, input_reshard_dim: int, x: Any) -> torch.Tensor:  # noqa: D401
+    """Hook function called before activation recomputing in BWD to restore input."""
+    if (
+        isinstance(x, DTensor)
+        and len(x._spec.placements) == 1
+        and x._spec.placements[0].is_shard()
+    ):
+        return x.redistribute(device_mesh=mesh, placements=[Replicate()])
+    elif (
+        not isinstance(x, DTensor)
+        and isinstance(x, torch.Tensor)
+        and x.numel() >= mesh.size()
+    ):
+        return (
+            DTensor.from_local(
+                x, device_mesh=mesh, placements=[Shard(input_reshard_dim)]
+            )
+            .redistribute(device_mesh=mesh, placements=[Replicate()])
+            .to_local()
+        )
+    else:
+        return x
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/loss.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c1adbf2a672a5bbe2004f17b1652c29803c9b33
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/loss.py
@@ -0,0 +1,505 @@
+# mypy: allow-untyped-defs
+# Copyright (c) Meta Platforms, Inc. and affiliates
+import contextlib
+from typing import cast
+
+import torch
+import torch._prims_common as utils
+import torch.distributed._functional_collectives as funcol
+import torch.distributed.distributed_c10d as c10d
+from torch import Tensor
+from torch.distributed.device_mesh import DeviceMesh
+from torch.distributed.tensor import DTensor, Replicate, Shard
+from torch.distributed.tensor._dtensor_spec import DTensorSpec, TensorMeta
+from torch.distributed.tensor._ops._embedding_ops import MaskPartial
+from torch.distributed.tensor._ops._math_ops import (
+    _skip_dim,
+    Reduction,
+    replicate_reduction_dims,
+)
+from torch.distributed.tensor._ops.utils import normalize_dim
+from torch.distributed.tensor.placement_types import Placement
+
+
+aten = torch.ops.aten
+
+
+__all__ = ["loss_parallel"]
+
+
+@contextlib.contextmanager
+def loss_parallel():
+    """
+    A context manager that enables loss parallelism, where efficient parallelized loss computation
+    can be performed when the input is sharded on the class dimension. Currently only the cross-entropy
+    loss is supported.
+
+    Within this context manager, one can use :func:`~torch.nn.functional.cross_entropy` or
+    :class:`~torch.nn.CrossEntropyLoss` as usual, with the following assumptions on the input parameters.
+    The corresponding ``backward()`` call, if any, also needs to happen under this context manager.
+
+    Args:
+        input (:class:`DTensor`):
+            Input logits. Assumed to be sharded on the class dimension.
+        target (Union[:class:`torch.Tensor`, :class:`DTensor`]):
+            Must be ground truth class indices (class probabilities currently not supported).
+            Assumed to be replicated across the ``DeviceMesh``.
+        weight (Union[:class:`torch.Tensor`, :class:`DTensor`], optional):
+            If given, assumed to be replicated across the ``DeviceMesh``.
+        label_smoothing:
+            Currently not supported.
+
+    Returns:
+        A replicated :class:`DTensor`.
+
+    Example:
+        A sharded DTensor is manually created here to showcase the usage.
+        In practice, it is usually the output of a TP module.
+
+        >>> # xdoctest: +SKIP("distributed")
+        >>> from torch.distributed.tensor.parallel import loss_parallel
+        >>> from torch.distributed.device_mesh import init_device_mesh
+        >>> ...
+        >>> device_mesh = init_device_mesh("cuda", (8,))
+        >>> input = torch.randn(4, 16, device="cuda", requires_grad=True)
+        >>> dist_input = distribute_tensor(input, device_mesh, placements=[Shard(1)])
+        >>> target = torch.randint(16, (4,), device="cuda")
+        >>> with loss_parallel():
+        >>>     loss = F.cross_entropy(dist_input, target, reduction="mean")
+        >>>     loss.backward()
+        >>> ...
+    """
+    _enable_custom_loss_ops()
+
+    yield
+
+    _disable_custom_loss_ops()
+
+
+# Currently only needs to support one dimensional DeviceMesh; in general return
+# the mesh_dim with placements[mesh_dim].is_shard(dim)
+def _find_all_reduce_mesh_dim(placements: tuple[Placement, ...], dim: int) -> int:
+    if not len(placements) == 1:
+        raise ValueError(
+            "Currently loss_parallel() only supports input on one-dimensional DeviceMesh."
+        )
+    if not placements[0].is_shard(dim):
+        raise ValueError(
+            f"loss_parallel() should be enabled only when the input tensor is sharded on dimension {dim}."
+        )
+    return 0
+
+
+def _cast_to_dtensor(
+    tensor, placements: tuple[Placement, ...], mesh: DeviceMesh
+) -> DTensor:
+    if isinstance(tensor, DTensor):
+        if tensor.placements == placements:
+            return tensor
+        else:
+            raise RuntimeError(f"Expected {placements} but got {tensor.placements}.")
+    elif isinstance(tensor, torch.Tensor):
+        return DTensor.from_local(
+            tensor, device_mesh=mesh, placements=placements, run_check=False
+        )
+    else:
+        raise TypeError(f"Unsupported type {type(tensor)}")
+
+
+def _propagate_tensor_meta(
+    op_call: torch._ops.OpOverload,
+    args: tuple[object, ...],
+    kwargs: dict[str, object],
+) -> TensorMeta:
+    op_info = DTensor._op_dispatcher.unwrap_to_op_info(op_call, args, kwargs)
+    tensor_meta = DTensor._op_dispatcher.sharding_propagator.propagate_tensor_meta(
+        op_info.schema
+    )
+    if isinstance(tensor_meta, TensorMeta):
+        return tensor_meta
+    elif isinstance(tensor_meta, tuple):
+        return tensor_meta[0]
+    else:
+        raise RuntimeError(f"Unexpected tensor meta type: {type(tensor_meta)}.")
+
+
+# NOTE: The implementation follows torch._decomp.decomposition._log_softmax,
+# with all_reduce manually inserted to perform distributed computation.
+def _log_softmax(x, dim, half_to_float, mesh, mesh_dim):
+    if half_to_float:
+        assert x.dtype == torch.half
+    computation_dtype, result_dtype = utils.elementwise_dtypes(
+        x, type_promotion_kind=utils.ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT
+    )
+    x = x.to(dtype=computation_dtype, memory_format=torch.contiguous_format)
+    if x.numel() == 0:
+        shifted = x
+    else:
+        x_max = torch.amax(x, dim, keepdim=True)
+        x_max = funcol.all_reduce(
+            x_max, reduceOp=c10d.ReduceOp.MAX.name, group=(mesh, mesh_dim)
+        )
+        shifted = x - x_max
+    shifted_sumexp = torch.sum(torch.exp(shifted), dim, keepdim=True)
+    shifted_sumexp = funcol.all_reduce(
+        shifted_sumexp, reduceOp=c10d.ReduceOp.SUM.name, group=(mesh, mesh_dim)
+    )
+    shifted_logsumexp = torch.log(shifted_sumexp)
+    result = shifted - shifted_logsumexp
+    if not half_to_float:
+        result = result.to(result_dtype)
+    return result
+
+
+def _log_softmax_handler(
+    op_call: torch._ops.OpOverload,
+    args: tuple[object, ...],
+    kwargs: dict[str, object],
+) -> object:
+    x = cast(DTensor, args[0])
+    dim = cast(int, args[1])
+    half_to_float = cast(bool, args[2])
+
+    spec = x._spec
+    dim = normalize_dim(dim, x.dim())
+    mesh_dim = _find_all_reduce_mesh_dim(spec.placements, dim)
+
+    output_tensor_meta = _propagate_tensor_meta(op_call, args, kwargs)
+
+    res = _log_softmax(x._local_tensor, dim, half_to_float, spec.mesh, mesh_dim)
+
+    res_spec = DTensorSpec(
+        spec.mesh,
+        spec.placements,
+        tensor_meta=output_tensor_meta,
+    )
+
+    # pyrefly: ignore [bad-argument-type]
+    return DTensor(
+        # pyrefly: ignore [bad-argument-count]
+        res,
+        res_spec,
+        # pyrefly: ignore [unexpected-keyword]
+        requires_grad=res.requires_grad,
+    )
+
+
+# NOTE: As explained below at _nll_loss_and_log_softmax_backward, the
+# _log_softmax_backward_handler does not actually do any computation.
+def _log_softmax_backward_handler(
+    op_call: torch._ops.OpOverload,
+    args: tuple[object, ...],
+    kwargs: dict[str, object],
+) -> object:
+    grad_output = cast(DTensor, args[0])
+    input_dtype = cast(torch.dtype, args[3])
+    return grad_output.to(input_dtype)
+
+
+# NOTE: The implementation follows torch._decomp.decomposition._nll_loss_forward,
+# with customized communication inserted to perform distributed computation.
+def _nll_loss_forward(
+    x: Tensor,
+    target: Tensor,
+    weight: Tensor | None,
+    local_weight: Tensor | None,
+    reduction: int,
+    ignore_index: int,
+    input_shape: torch.Size,
+    channel_dim: int,
+    mesh: DeviceMesh,
+    mesh_dim: int,
+) -> tuple[Tensor, Tensor]:
+    n_dims = x.dim()
+    channel_dim = 1
+    if n_dims < 2:
+        channel_dim = 0
+
+    def _weight_view(weight: Tensor) -> Tensor:
+        if n_dims > 1:
+            shape = [
+                1,
+            ] * n_dims
+            shape[channel_dim] = weight.shape[0]
+            w = weight.view(shape)
+        else:
+            w = weight
+        return w
+
+    if weight is not None:
+        w = _weight_view(weight)
+        assert local_weight is not None
+        local_w = _weight_view(local_weight)
+        x = x * local_w
+    safe_target = torch.where(target != ignore_index, target, 0)
+    safe_target_ = safe_target.unsqueeze(channel_dim)
+
+    # The following code block is a distributed version of
+    # result = -torch.gather(self, channel_dim, safe_target_).squeeze(channel_dim)
+    partial_placement = MaskPartial(offset_shape=input_shape, offset_dim=channel_dim)
+    safe_target_partial_ = partial_placement._partition_value(
+        safe_target_, mesh, mesh_dim
+    )
+    result_partial = torch.gather(x, channel_dim, safe_target_partial_)
+    # an all_reduce happens here
+    result_reduced = partial_placement._reduce_value(result_partial, mesh, mesh_dim)
+    result = -result_reduced.squeeze(channel_dim)
+
+    result = torch.where(target != ignore_index, result, 0)
+
+    if reduction == Reduction.NONE.value and n_dims > 1:
+        total_weight = x.new_full((), 0.0)
+        return result, total_weight
+
+    if weight is not None:
+        new_shape = list(x.shape)
+        new_shape[channel_dim] = -1
+        # pyrefly: ignore [unbound-name]
+        w = w.expand(new_shape)
+        wsum = torch.gather(w, channel_dim, safe_target_).squeeze(channel_dim)
+        wsum = torch.where(target != ignore_index, wsum, 0)
+        total_weight = wsum.sum()
+    else:
+        total_weight = (target != ignore_index).sum().to(x)
+
+    # NOTE: this is correct only on 1D DeviceMesh; o/w additional
+    #       all-reduce on result and total_weight is needed
+    if reduction == Reduction.SUM.value:
+        result = result.sum()
+    elif reduction == Reduction.MEAN.value:
+        result = result.sum() / total_weight
+
+    return result, total_weight
+
+
+def _nll_loss_forward_handler(
+    op_call: torch._ops.OpOverload,
+    args: tuple[object, ...],
+    kwargs: dict[str, object],
+) -> object:
+    x = cast(DTensor, args[0])
+    target = args[1]
+    weight = args[2]
+    reduction = cast(int, args[3])
+    ignore_index = cast(int, args[4])
+
+    channel_dim = 1 if x.dim() >= 2 else 0
+    spec = x._spec
+    mesh_dim = _find_all_reduce_mesh_dim(spec.placements, channel_dim)
+
+    # Check user input: if target and weight are not DTensors, convert them to DTensors;
+    # if they are DTensors, check that they have the desired placements.
+    target_placements = _skip_dim(
+        replicate_reduction_dims(spec.placements, [channel_dim]), channel_dim
+    )
+    all_replicate_placements = (Replicate(),) * spec.mesh.ndim
+    target = _cast_to_dtensor(target, target_placements, spec.mesh)
+    local_weight = None
+    if weight is not None:
+        weight = _cast_to_dtensor(weight, all_replicate_placements, spec.mesh)
+        # For local computation, both (replicated) weight and (sharded) local_weight
+        # are needed in _nll_loss_forward(). local_weight is generated here using
+        # DTensor API, without incurring any communication.
+        sharded_placements = [
+            Shard(0) if i == mesh_dim else Replicate() for i in range(spec.mesh.ndim)
+        ]
+        local_weight = weight.redistribute(spec.mesh, sharded_placements)._local_tensor
+        assert local_weight.shape[0] == x._local_tensor.shape[channel_dim]
+
+    if reduction == Reduction.NONE.value:
+        output_placements = target_placements
+    else:
+        output_placements = all_replicate_placements
+
+    # tensor inputs to _propagate_tensor_meta need to be DTensors
+    # pyrefly: ignore [bad-assignment]
+    args = list(args)
+    # pyrefly: ignore [unsupported-operation]
+    args[1], args[2] = target, weight
+    output_tensor_meta = _propagate_tensor_meta(op_call, tuple(args), kwargs)
+
+    result, total_weight = _nll_loss_forward(
+        x._local_tensor,
+        target._local_tensor,
+        weight._local_tensor if weight is not None else None,
+        local_weight,
+        reduction,
+        ignore_index,
+        x.shape,
+        channel_dim,
+        spec.mesh,
+        mesh_dim,
+    )
+    out_spec = DTensorSpec(spec.mesh, output_placements, tensor_meta=output_tensor_meta)
+
+    return (
+        # pyrefly: ignore [bad-argument-type]
+        DTensor(
+            # pyrefly: ignore [bad-argument-count]
+            result,
+            out_spec,
+            # pyrefly: ignore [unexpected-keyword]
+            requires_grad=result.requires_grad,
+        ),
+        total_weight,
+    )
+
+
+# NOTE: The backward computation of cross_entropy goes through two steps:
+# backward for nll_loss and then backward for log_softmax. In loss parallel,
+# the two steps are fused into the following function (called by _nll_loss_backward_handler)
+# to avoid communication when target contains class indices not class probabilities.
+# Also note that the _log_softmax_backward_handler does not perform computation.
+# The implementation resembles _nll_loss_backward and _log_softmax_backward_data
+# from torch._decomp.decomposition.
+def _nll_loss_and_log_softmax_backward(
+    grad_output: Tensor,
+    x: Tensor,
+    target: Tensor,
+    weight: Tensor | None,
+    reduction: int,
+    ignore_index: int,
+    total_weight: Tensor,
+    input_shape: torch.Size,
+    channel_dim: int,
+    mesh: DeviceMesh,
+    mesh_dim: int,
+) -> Tensor:
+    channel_dim = 0 if x.dim() < 2 else 1
+    if reduction == Reduction.MEAN.value:
+        grad_output = grad_output / total_weight
+
+    target = target.unsqueeze(channel_dim)
+    safe_target = torch.where(target != ignore_index, target, 0)
+    grad_input = torch.zeros_like(x)
+
+    # The following code block is a distributed version of
+    # grad_input = torch.scatter(grad_input, channel_dim, safe_target, -1.0)
+    partial_placement = MaskPartial(offset_shape=input_shape, offset_dim=channel_dim)
+    safe_target = safe_target.squeeze(channel_dim).flatten()
+    masked_safe_target = partial_placement._partition_value(safe_target, mesh, mesh_dim)
+    # only update grad_input to -1 if not masked
+    assert partial_placement.mask_buffer.data is not None
+    grad_update = partial_placement.mask_buffer.data.to(grad_input.dtype) - 1.0
+    arange_1d = torch.arange(
+        masked_safe_target.shape[0], device=masked_safe_target.device
+    )
+    # The first two cases with x.dim() <= 2 are for aten.nll_loss_backward.default;
+    # the last case is for aten.nll_loss2d_backward.default.
+    if x.dim() == 1:
+        grad_input[masked_safe_target] = grad_update
+    elif x.dim() == 2:
+        grad_input[arange_1d, masked_safe_target] = grad_update
+    else:
+        grad_input_t = grad_input.transpose(channel_dim, -1)
+        intermidate_shape = grad_input_t.shape
+        grad_input_2d = grad_input_t.reshape(-1, x.shape[channel_dim])
+        grad_input_2d[arange_1d, masked_safe_target] = grad_update
+        grad_input = grad_input_2d.view(intermidate_shape).transpose(channel_dim, -1)
+
+    if grad_input.dim() > grad_output.dim() > 0:
+        grad_output = grad_output.unsqueeze(channel_dim)
+
+    if weight is not None:
+        new_shape = [1 for _ in range(x.dim())]
+        new_shape[channel_dim] = weight.shape[0]
+        weight = weight.reshape(new_shape)
+        # In order for fused computation to work, the following line is rewritten.
+        # grad_output = grad_output * weight
+        new_shape = list(x.shape)
+        new_shape[channel_dim] = -1
+        w = weight.expand(new_shape)
+        w_target = torch.gather(w, channel_dim, target)
+        grad_output = grad_output * w_target
+
+    grad_output = torch.where(target != ignore_index, grad_output, 0)
+
+    # NOTE: Instead of directly returning the grad_input as grad_output for log_softmax,
+    # here we perform backward computation for log_softmax altogether to avoid the
+    # otherwise extra all_gather communication.
+    # return grad_input * grad_output
+    return (grad_input + torch.exp(x)) * grad_output
+
+
+def _nll_loss_backward_handler(
+    op_call: torch._ops.OpOverload,
+    args: tuple[object, ...],
+    kwargs: dict[str, object],
+) -> object:
+    grad_output = cast(DTensor, args[0])
+    x = cast(DTensor, args[1])
+    target = args[2]
+    weight = args[3]
+    reduction = cast(int, args[4])
+    ignore_index = cast(int, args[5])
+    total_weight = cast(Tensor, args[6])
+
+    channel_dim = 1 if x.dim() >= 2 else 0
+    spec = x._spec
+    mesh_dim = _find_all_reduce_mesh_dim(spec.placements, channel_dim)
+
+    # if target and weight are not DTensors, convert them to DTensors
+    target_placements = _skip_dim(
+        replicate_reduction_dims(spec.placements, [channel_dim]), channel_dim
+    )
+    all_replicate_placements = (Replicate(),) * spec.mesh.ndim
+    target = _cast_to_dtensor(target, target_placements, spec.mesh)
+    if weight is not None:
+        weight = _cast_to_dtensor(weight, all_replicate_placements, spec.mesh)
+
+    # tensor inputs to _propagate_tensor_meta need to be DTensors
+    # pyrefly: ignore [bad-assignment]
+    args = list(args)
+    # pyrefly: ignore [unsupported-operation]
+    args[2], args[3] = target, weight
+    # pyrefly: ignore [unsupported-operation]
+    args[6] = _cast_to_dtensor(total_weight, all_replicate_placements, spec.mesh)
+    output_tensor_meta = _propagate_tensor_meta(op_call, tuple(args), kwargs)
+
+    result = _nll_loss_and_log_softmax_backward(
+        grad_output._local_tensor,
+        x._local_tensor,
+        target._local_tensor,
+        weight._local_tensor if weight is not None else None,
+        reduction,
+        ignore_index,
+        total_weight,
+        x.shape,
+        channel_dim,
+        spec.mesh,
+        mesh_dim,
+    )
+    # the output sharding is the same as input sharding: Shard(channel_dim) on mesh_dim
+    out_spec = DTensorSpec(
+        spec.mesh,
+        spec.placements,
+        tensor_meta=output_tensor_meta,
+    )
+
+    # pyrefly: ignore [bad-argument-type]
+    return DTensor(
+        # pyrefly: ignore [bad-argument-count]
+        result,
+        out_spec,
+        # pyrefly: ignore [unexpected-keyword]
+        requires_grad=result.requires_grad,
+    )
+
+
+customized_loss_ops = {
+    aten._log_softmax.default: _log_softmax_handler,
+    aten._log_softmax_backward_data.default: _log_softmax_backward_handler,
+    aten.nll_loss_forward.default: _nll_loss_forward_handler,
+    aten.nll_loss2d_forward.default: _nll_loss_forward_handler,
+    aten.nll_loss_backward.default: _nll_loss_backward_handler,
+    aten.nll_loss2d_backward.default: _nll_loss_backward_handler,
+}
+
+
+def _enable_custom_loss_ops():
+    DTensor._op_dispatcher._custom_op_handlers.update(customized_loss_ops)
+
+
+def _disable_custom_loss_ops():
+    for custom_op in customized_loss_ops:
+        DTensor._op_dispatcher._custom_op_handlers.pop(custom_op)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/style.py b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/style.py
new file mode 100644
index 0000000000000000000000000000000000000000..9eed832eabe8653c9e02ee0bb72b2e1256f76275
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torch/distributed/tensor/parallel/style.py
@@ -0,0 +1,810 @@
+# mypy: allow-untyped-defs
+# Copyright (c) Meta Platforms, Inc. and affiliates
+from abc import ABC, abstractmethod
+from functools import partial
+from typing import Any
+
+import torch
+import torch.nn as nn
+from torch.distributed.tensor import (
+    DeviceMesh,
+    distribute_module,
+    distribute_tensor,
+    DTensor,
+    Replicate,
+    Shard,
+)
+from torch.distributed.tensor.placement_types import Placement
+
+
+__all__ = [
+    "ParallelStyle",
+    "RowwiseParallel",
+    "SequenceParallel",
+    "ColwiseParallel",
+    "PrepareModuleInput",
+    "PrepareModuleInputOutput",
+    "PrepareModuleOutput",
+]
+
+
+class ParallelStyle(ABC):
+    """
+    The parallel style contract defines how the module or submodule should be parallelized.
+
+    It only defines the ``apply`` method for ``parallelize_module`` to use, this allows maximum
+    flexibility for different kind of style implementations.
+    """
+
+    src_data_rank: int | None = 0
+
+    @abstractmethod
+    def _apply(self, module: nn.Module, device_mesh: DeviceMesh) -> nn.Module: ...
+
+
+class ColwiseParallel(ParallelStyle):
+    """
+    Partition a compatible nn.Module in a column-wise fashion. Currently supports nn.Linear and nn.Embedding.
+    Users can compose it together with RowwiseParallel to achieve the sharding of more complicated modules.
+    (i.e. MLP, Attention)
+
+    Keyword Args:
+        input_layouts (Placement, optional):
+            The DTensor layout of input tensor for the nn.Module, this is used to annotate the input tensor to
+            become a DTensor. If not specified, we assume the input tensor to be replicated.
+        output_layouts (Placement, optional):
+            The DTensor layout of the output for the nn.Module, this is used to ensure the output of the nn.Module
+            with the user desired layout. If not specified, the output tensor is sharded on the last dimension.
+        use_local_output (bool, optional):
+            Whether to use local :class:`torch.Tensor` instead of :class:`DTensor` for the module output, default: True.
+    Returns:
+        A :class:`ParallelStyle` object that represents Colwise sharding of the nn.Module.
+
+    Example::
+        >>> # xdoctest: +SKIP(failing)
+        >>> from torch.distributed.tensor.parallel import parallelize_module, ColwiseParallel
+        >>> from torch.distributed.device_mesh import init_device_mesh
+        >>> ...
+        >>> m = Model(...)  # m is a nn.Module that contains a "w1" nn.Linear submodule
+        >>> tp_mesh = init_device_mesh("cuda", (8,))
+        >>>
+        >>> # By default, the input of the "w1" Linear will be converted to Replicated DTensor
+        >>> # and the output of "w1" will return :class:`torch.Tensor` that shards on the last dim.
+        >>>
+        >>> sharded_mod = parallelize_module(m, tp_mesh, {"w1": ColwiseParallel()})
+        >>> ...
+
+    .. note:: By default ``ColwiseParallel`` output is sharded on the last dimension if the ``output_layouts`` not
+        specified, if there're operators that require specific tensor shape (i.e. before the paired ``RowwiseParallel``),
+        keep in mind that if the output is sharded the operator might need to be adjusted to the sharded size.
+    """
+
+    def __init__(
+        self,
+        *,
+        input_layouts: Placement | None = None,
+        output_layouts: Placement | None = None,
+        use_local_output: bool = True,
+    ):
+        super().__init__()
+        self.input_layouts = (input_layouts or Replicate(),)
+        self.output_layouts = (output_layouts or Shard(-1),)
+        # colwise linear runtime sharding (desired sharding):
+        # 1. requires replicate input
+        # 2. shard output on last dim
+        self.desired_input_layouts = (Replicate(),)
+        self.use_local_output = use_local_output
+
+    @staticmethod
+    def _prepare_input_fn(
+        input_layouts, desired_input_layouts, mod, inputs, device_mesh
+    ):
+        # TODO: figure out dynamo support for instance method and switch this to instance method
+
+        # annotate module input placements/sharding with input_layouts
+        input_tensor = inputs[0]
+        if not isinstance(input_tensor, DTensor):
+            input_tensor = DTensor.from_local(
+                input_tensor, device_mesh, input_layouts, run_check=False
+            )
+
+        # transform the input layouts to the desired layouts of ColwiseParallel
+        if input_layouts != desired_input_layouts:
+            input_tensor = input_tensor.redistribute(
+                placements=desired_input_layouts, async_op=True
+            )
+        return input_tensor
+
+    def _partition_linear_fn(self, name, module, device_mesh):
+        # colwise shard weight/bias to Shard(0), weight be Shard(0)
+        # means Colwise as Linear is input * weight^T + bias, where
+        # weight would become Shard(1)
+        for name, param in module.named_parameters():
+            dist_param = nn.Parameter(
+                distribute_tensor(
+                    param, device_mesh, [Shard(0)], src_data_rank=self.src_data_rank
+                )
+            )
+            module.register_parameter(name, dist_param)
+
+    def _partition_embedding_fn(self, name, module, device_mesh):
+        # colwise shard embedding.weight is straight forward as Shard(1)
+        for name, param in module.named_parameters():
+            dist_param = nn.Parameter(
+                distribute_tensor(
+                    param, device_mesh, [Shard(1)], src_data_rank=self.src_data_rank
+                )
+            )
+            module.register_parameter(name, dist_param)
+
+    @staticmethod
+    def _prepare_output_fn(output_layouts, use_local_output, mod, outputs, device_mesh):
+        # outputs is a shard on last dimension DTensor, i.e. Shard(-1)
+        if outputs.placements != output_layouts:
+            outputs = outputs.redistribute(placements=output_layouts, async_op=True)
+        # back to local tensor
+        return outputs.to_local() if use_local_output else outputs
+
+    def _apply(self, module: nn.Module, device_mesh: DeviceMesh) -> nn.Module:
+        if isinstance(module, nn.Linear):
+            partition_fn = self._partition_linear_fn
+        elif isinstance(module, nn.Embedding):
+            partition_fn = self._partition_embedding_fn
+        else:
+            raise NotImplementedError(
+                "ColwiseParallel currently only support nn.Linear and nn.Embedding!"
+            )
+
+        return distribute_module(
+            module,
+            device_mesh,
+            partition_fn,
+            partial(
+                self._prepare_input_fn, self.input_layouts, self.desired_input_layouts
+            ),
+            partial(
+                self._prepare_output_fn, self.output_layouts, self.use_local_output
+            ),
+        )
+
+    def __repr__(self) -> str:
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += f"input_layouts={self.input_layouts}, "
+        tmpstr += f"output_layouts={self.output_layouts}, "
+        tmpstr += f"use_local_output={self.use_local_output}"
+        tmpstr += ")"
+        return tmpstr
+
+
+class RowwiseParallel(ParallelStyle):
+    """
+    Partition a compatible nn.Module in a row-wise fashion. Currently supports nn.Linear and nn.Embedding.
+    Users can compose it with ColwiseParallel to achieve the sharding of more complicated modules.
+    (i.e. MLP, Attention)
+
+    Keyword Args:
+        input_layouts (Placement, optional):
+            The DTensor layout of input tensor for the nn.Module, this is used to annotate the input tensor to
+            become a DTensor. If not specified, we assume the input tensor to be sharded on the last dimension.
+        output_layouts (Placement, optional):
+            The DTensor layout of the output for the nn.Module, this is used to ensure the output of the nn.Module
+            with the user desired layout. If not specified, the output tensor is replicated.
+        use_local_output (bool, optional):
+            Whether to use local :class:`torch.Tensor` instead of :class:`DTensor` for the module output, default: True.
+    Returns:
+        A :class:`ParallelStyle` object that represents Rowwise sharding of the nn.Module.
+
+    Example::
+        >>> # xdoctest: +SKIP(failing)
+        >>> from torch.distributed.tensor.parallel import parallelize_module, RowwiseParallel
+        >>> from torch.distributed.device_mesh import init_device_mesh
+        >>> ...
+        >>> m = Model(...)  # m is a nn.Module that contains a "w2" nn.Linear submodule
+        >>> tp_mesh = init_device_mesh("cuda", (8,))
+        >>>
+        >>> # By default, the input of the "w2" Linear will be converted to DTensor that shards on the last dim
+        >>> # and the output of "w2" will return a replicated :class:`torch.Tensor`.
+        >>>
+        >>> sharded_mod = parallelize_module(m, tp_mesh, {"w2": RowwiseParallel()}),
+        >>> ...
+    """
+
+    def __init__(
+        self,
+        *,
+        input_layouts: Placement | None = None,
+        output_layouts: Placement | None = None,
+        use_local_output: bool = True,
+    ):
+        super().__init__()
+        self.input_layouts = (input_layouts or Shard(-1),)
+        self.output_layouts = (output_layouts or Replicate(),)
+        self.use_local_output = use_local_output
+
+    @staticmethod
+    def _prepare_input_fn(
+        input_layouts, desired_input_layouts, mod, inputs, device_mesh
+    ):
+        input_tensor = inputs[0]
+        if not isinstance(input_tensor, DTensor):
+            input_tensor = DTensor.from_local(
+                input_tensor, device_mesh, input_layouts, run_check=False
+            )
+
+        if input_layouts != desired_input_layouts:
+            input_tensor = input_tensor.redistribute(
+                placements=desired_input_layouts, async_op=True
+            )
+        return input_tensor
+
+    def _partition_linear_fn(self, name, module, device_mesh):
+        # Rowwise shard weight to Shard(1), bias to Replicate(), weight be Shard(1)
+        # means Rowwise as nn.Linear is input * weight^T + bias, where
+        # weight would become Shard(0)
+        module.register_parameter(
+            "weight",
+            nn.Parameter(
+                distribute_tensor(
+                    module.weight,
+                    device_mesh,
+                    [Shard(1)],
+                    src_data_rank=self.src_data_rank,
+                )
+            ),
+        )
+        if getattr(module, "bias", None) is not None:
+            # The Linear module has bias
+            module.register_parameter(
+                "bias",
+                nn.Parameter(
+                    distribute_tensor(
+                        module.bias,
+                        device_mesh,
+                        [Replicate()],
+                        src_data_rank=self.src_data_rank,
+                    )
+                ),
+            )
+
+    def _partition_embedding_fn(self, name, module, device_mesh):
+        # rowwise shard embedding.weight is Shard(0)
+        for name, param in module.named_parameters():
+            dist_param = nn.Parameter(
+                distribute_tensor(
+                    param, device_mesh, [Shard(0)], src_data_rank=self.src_data_rank
+                )
+            )
+            module.register_parameter(name, dist_param)
+
+    @staticmethod
+    def _prepare_output_fn(output_layouts, use_local_output, mod, outputs, device_mesh):
+        # Rowwise sharding produces partial output, depending on output layouts:
+        # 1. to replicate -> allreduce
+        # 2. to shard -> reduce_scatter
+        if outputs.placements != output_layouts:
+            outputs = outputs.redistribute(placements=output_layouts, async_op=True)
+        # back to local tensor if use_local_output is True
+        return outputs.to_local() if use_local_output else outputs
+
+    def _apply(self, module: nn.Module, device_mesh: DeviceMesh) -> nn.Module:
+        if isinstance(module, nn.Linear):
+            partition_fn = self._partition_linear_fn
+            # rowwise linear runtime sharding requires input tensor shard on last dim
+            self.desired_input_layouts: tuple[Placement, ...] = (Shard(-1),)
+        elif isinstance(module, nn.Embedding):
+            partition_fn = self._partition_embedding_fn
+            # rowwise embedding runtime sharding requires input tensor replicated
+            self.desired_input_layouts = (Replicate(),)
+        else:
+            raise NotImplementedError(
+                "RowwiseParallel currently only support nn.Linear and nn.Embedding!"
+            )
+
+        return distribute_module(
+            module,
+            device_mesh,
+            partition_fn,
+            partial(
+                self._prepare_input_fn, self.input_layouts, self.desired_input_layouts
+            ),
+            partial(
+                self._prepare_output_fn, self.output_layouts, self.use_local_output
+            ),
+        )
+
+    def __repr__(self) -> str:
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += f"input_layouts={self.input_layouts}, "
+        tmpstr += f"output_layouts={self.output_layouts}, "
+        tmpstr += f"use_local_output={self.use_local_output}"
+        tmpstr += ")"
+        return tmpstr
+
+
+class SequenceParallel(ParallelStyle):
+    """
+    SequenceParallel replicates a compatible ``nn.Module`` parameters and runs the sharded computation with
+    input sharded on the sequence dimension. This currently supports ``nn.LayerNorm``, ``nn.Dropout``, and the
+    `RMSNorm python implementation `__
+
+    This style implements the operation that is described in the paper
+    `Reducing Activation Recomputation in Large Transformer Models `__
+
+    If the input passed in to this ``nn.Module`` is a :class:`torch.Tensor`, it assumes that the input is already sharded
+    on the sequence dimension and converts the input to a :class:`DTensor` sharded on the sequence dimension. If the input
+    passed in to this ``nn.Module`` is already a :class:`DTensor` but is not sharded on the sequence dimension, it would
+    redistribute the input to be sharded on the sequence dimension.
+
+    The output of the ``nn.Module`` will be sharded on the sequence dimension.
+
+    Keyword Args:
+        sequence_dim (int, optional):
+            The sequence dimension of the input tensor for the ``nn.Module``, this is used to annotate the input tensor to
+            become a DTensor that is sharded on the sequence dimension, default: 1.
+        use_local_output (bool, optional):
+            Whether to use local :class:`torch.Tensor` instead of :class:`DTensor` for the module output, default: False.
+    Returns:
+        A :class:`ParallelStyle` object that represents Sequence Parallel of the ``nn.Module``.
+
+    Example::
+        >>> # xdoctest: +SKIP(failing)
+        >>> from torch.distributed.tensor.parallel import parallelize_module, SequenceParallel
+        >>> from torch.distributed.device_mesh import init_device_mesh
+        >>> ...
+        >>> m = Model(...)  # m is a nn.Module that contains a "norm" nn.LayerNorm submodule
+        >>> tp_mesh = init_device_mesh("cuda", (8,))
+        >>>
+        >>> # By default, the input of the "norm" will be converted to DTensor that shards on the sequence dim
+        >>> # and the output of "norm" will return a sharded on sequence dimension :class:`DTensor`.
+        >>>
+        >>> sharded_mod = parallelize_module(m, tp_mesh, {"norm": SequenceParallel()}),
+        >>> ...
+
+    .. note:: SequenceParallel style assumes ones initialization if there are weights in the nn.Module (i.e.
+        ``nn.LayerNorm`` or ``RMSNorm``, and they by default have ones initialization). If you have custom
+        inits for the weights on those modules, you need to broadcast the weights before/after parallelizing
+        to ensure that they are replicated.
+    """
+
+    def __init__(self, *, sequence_dim: int = 1, use_local_output: bool = False):
+        super().__init__()
+        self.sequence_sharding = (Shard(sequence_dim),)
+        self.use_local_output = use_local_output
+
+    def _replicate_module_fn(
+        self, name: str, module: nn.Module, device_mesh: DeviceMesh
+    ):
+        for p_name, param in module.named_parameters():
+            # simple replication with fixed ones_ init from LayerNorm/RMSNorm, which allow
+            # us to simply just use from_local
+            replicated_param = torch.nn.Parameter(
+                DTensor.from_local(param, device_mesh, [Replicate()], run_check=False)
+            )
+            module.register_parameter(p_name, replicated_param)
+
+    @staticmethod
+    def _prepare_input_fn(sequence_sharding, mod, inputs, device_mesh):
+        input_tensor = inputs[0]
+        if isinstance(input_tensor, DTensor):
+            # if the passed in input DTensor is not sharded on the sequence dim, we need to redistribute it
+            if input_tensor.placements != sequence_sharding:
+                input_tensor = input_tensor.redistribute(
+                    placements=sequence_sharding, async_op=True
+                )
+            return input_tensor
+        elif isinstance(input_tensor, torch.Tensor):
+            # assume the input passed in already sharded on the sequence dim and create the DTensor
+            return DTensor.from_local(
+                input_tensor, device_mesh, sequence_sharding, run_check=False
+            )
+        else:
+            raise ValueError(
+                f"expecting input of {mod} to be a torch.Tensor or DTensor, but got {input_tensor}"
+            )
+
+    @staticmethod
+    def _prepare_output_fn(use_local_output, mod, outputs, device_mesh):
+        return outputs.to_local() if use_local_output else outputs
+
+    def _apply(self, module: nn.Module, device_mesh: DeviceMesh) -> nn.Module:
+        return distribute_module(
+            module,
+            device_mesh,
+            self._replicate_module_fn,
+            partial(self._prepare_input_fn, self.sequence_sharding),
+            partial(self._prepare_output_fn, self.use_local_output),
+        )
+
+    def __repr__(self) -> str:
+        tmpstr = self.__class__.__name__ + "("
+        if len(self.sequence_sharding) == 1:
+            tmpstr += f"sequence_dim={self.sequence_sharding[0].dim}, "
+        tmpstr += f"use_local_output={self.use_local_output}"
+        tmpstr += ")"
+        return tmpstr
+
+
+class PrepareModuleInput(ParallelStyle):
+    """
+    Configure the nn.Module's inputs to convert the input tensors of the nn.Module to DTensors at runtime according to
+    ``input_layouts``, and perform layout redistribution according to the ``desired_input_layouts``.
+
+    Keyword Args:
+        input_layouts (Union[Placement, Tuple[Optional[Placement]]]):
+            The DTensor layouts of input tensors for the nn.Module, this is used to convert the input tensors to
+            DTensors. If some inputs are not torch.Tensor or no need to convert to DTensors, ``None`` need to be specified
+            as a placeholder. default: None.
+        desired_input_layouts (Union[Placement, Tuple[Optional[Placement]]]):
+            The desired DTensor layout of input tensors for the nn.Module, this is used to ensure the inputs of the nn.Module
+            have the desired DTensor layouts. This argument needs to have the same length with ``input_layouts``. default: None.
+        input_kwarg_layouts (Dict[str, Placement]):
+            The DTensor layouts of input kwargs for the nn.Module, this is used to convert the input kwarg tensors to DTensors.
+            default: None
+        desired_input_kwarg_layouts: (Dict[str, Placement]):
+            The desired DTensor layout of input kwargs for the nn.Module, this is used to ensure the inputs of the nn.Module
+            have the desired DTensor layouts. default: None.
+        use_local_output (bool, optional):
+            Whether to use local :class:`torch.Tensor` instead of :class:`DTensor` for the module inputs, default: False.
+    Returns:
+        A :class:`ParallelStyle` object that prepares the sharding layouts of the nn.Module's inputs.
+
+    Example::
+        >>> # xdoctest: +SKIP(failing)
+        >>> from torch.distributed.tensor.parallel import parallelize_module, PrepareModuleInput
+        >>> from torch.distributed.device_mesh import init_device_mesh
+        >>> ...
+        >>> block = TransformerBlock(...)  # block is a nn.Module that contains an "attn" Attention submodule
+        >>> tp_mesh = init_device_mesh("cuda", (8,))
+        >>>
+        >>> # According to the style specified below, the first input of attn will be annotated to Sharded DTensor
+        >>> # and then redistributed to Replicated DTensor.
+        >>> parallelize_module(
+        >>>     block, # this can be a submodule or module
+        >>>     tp_mesh,
+        >>>     parallelize_plan={
+        >>>         "attn": PrepareModuleInput(
+        >>>             input_layouts=(Shard(0), None, None, ...),
+        >>>             desired_input_layouts=(Replicate(), None, None, ...)
+        >>>         ),
+        >>>     }
+        >>> )
+    """
+
+    def __init__(
+        self,
+        *,
+        input_layouts: Placement | tuple[Placement | None, ...] | None = None,
+        desired_input_layouts: Placement | tuple[Placement | None, ...] | None = None,
+        input_kwarg_layouts: dict[str, Placement] | None = None,
+        desired_input_kwarg_layouts: dict[str, Placement] | None = None,
+        use_local_output: bool = False,
+    ):
+        self.input_layouts = (
+            (input_layouts,) if isinstance(input_layouts, Placement) else input_layouts
+        )
+        self.desired_input_layouts = (
+            (desired_input_layouts,)
+            if isinstance(desired_input_layouts, Placement)
+            else desired_input_layouts
+        )
+        self.use_local_output = use_local_output
+        if self.input_layouts is not None:
+            assert self.desired_input_layouts is not None, (
+                "desired module inputs should not be None!"
+            )
+            assert len(self.input_layouts) == len(self.desired_input_layouts), (
+                "input_layouts and desired_input_layouts should have same length!"
+            )
+        self.with_kwargs = input_kwarg_layouts is not None
+        self.input_kwarg_layouts = input_kwarg_layouts or {}
+        self.desired_input_kwarg_layouts = desired_input_kwarg_layouts or {}
+        if self.with_kwargs:
+            assert len(self.input_kwarg_layouts) == len(
+                self.desired_input_kwarg_layouts
+            ), (
+                "input_kwarg_layouts and desired_input_kwarg_layouts should have same length!"
+            )
+
+    def _prepare_input_arg(
+        self,
+        input: Any,
+        mesh: DeviceMesh,
+        input_layout: Placement | None,
+        desired_layout: Placement | None,
+    ):
+        if input_layout is not None:
+            if isinstance(input, DTensor):
+                # TODO: re-enable the check once we fix the compile path
+                # assert inp.placements[0] == input_layout
+                dt_inp = input
+            else:
+                assert isinstance(input, torch.Tensor), (
+                    "expecting input to be a torch.Tensor!"
+                )
+                dt_inp = DTensor.from_local(
+                    input, mesh, (input_layout,), run_check=False
+                )
+
+            if desired_layout is not None and input_layout != desired_layout:
+                dt_inp = dt_inp.redistribute(placements=(desired_layout,))
+
+            return dt_inp.to_local() if self.use_local_output else dt_inp
+        else:
+            return input
+
+    def _prepare_input_fn(self, inputs, device_mesh):
+        if self.input_layouts is None:
+            return inputs
+        prepared_inputs = []
+        if not isinstance(inputs, tuple):
+            inputs = (inputs,)
+        if len(inputs) != len(self.input_layouts):
+            raise ValueError("module inputs and input_layouts should have same length!")
+
+        assert self.desired_input_layouts is not None, (
+            "desired module inputs should not be None!"
+        )
+
+        for inp, input_layout, desired_layout in zip(
+            inputs, self.input_layouts, self.desired_input_layouts
+        ):
+            prepared_inputs.append(
+                self._prepare_input_arg(inp, device_mesh, input_layout, desired_layout)
+            )
+        return tuple(prepared_inputs)
+
+    def _prepare_input_kwarg_fn(self, inputs, kwarg_inputs, device_mesh):
+        prepared_arg_inputs = self._prepare_input_fn(inputs, device_mesh)
+        prepared_kwarg_inputs = {}
+        for kwarg_key in kwarg_inputs:
+            kwarg_val = kwarg_inputs[kwarg_key]
+            input_layout = self.input_kwarg_layouts.get(kwarg_key)
+            desired_input_layout = self.desired_input_kwarg_layouts.get(kwarg_key)
+
+            prepared_kwarg_inputs[kwarg_key] = self._prepare_input_arg(
+                kwarg_val, device_mesh, input_layout, desired_input_layout
+            )
+
+        return (prepared_arg_inputs, prepared_kwarg_inputs)
+
+    def _apply(self, module: nn.Module, device_mesh: DeviceMesh) -> nn.Module:
+        if self.with_kwargs:
+            module.register_forward_pre_hook(
+                lambda _, inputs, kwargs: self._prepare_input_kwarg_fn(
+                    inputs, kwargs, device_mesh
+                ),
+                with_kwargs=True,
+            )  # type: ignore[misc]
+        else:
+            module.register_forward_pre_hook(
+                lambda _, inputs: self._prepare_input_fn(inputs, device_mesh)
+            )  # type: ignore[misc, call-arg]
+        return module
+
+    def __repr__(self) -> str:
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += f"input_layouts={self.input_layouts}, "
+        tmpstr += f"desired_input_layouts={self.desired_input_layouts}, "
+        tmpstr += f"input_kwarg_layouts={self.input_kwarg_layouts}, "
+        tmpstr += f"desired_input_kwarg_layouts={self.desired_input_kwarg_layouts}, "
+        tmpstr += f"use_local_output={self.use_local_output}"
+        tmpstr += ")"
+        return tmpstr
+
+
+class PrepareModuleOutput(ParallelStyle):
+    """
+    Configure the nn.Module's outputs to convert the output tensors of the nn.Module to DTensors at runtime according to
+    ``output_layouts``, and perform layout redistribution according to the ``desired_output_layouts``.
+
+    Keyword Args:
+        output_layouts (Union[Placement, Tuple[Placement]]):
+            The DTensor layouts of output tensors for the nn.Module, this is used to convert the output tensors to
+            DTensors if they are :class:`torch.Tensor`. If some outputs are not torch.Tensor or no need to convert to DTensors,
+            ``None`` need to be specified as a placeholder.
+        desired_output_layouts (Union[Placement, Tuple[Placement]]):
+            The desired DTensor layouts of output tensors for the nn.Module, this is used to ensure the outputs of the nn.Module
+            have the desired DTensor layouts.
+        use_local_output (bool, optional):
+            Whether to use local :class:`torch.Tensor` instead of :class:`DTensor` for the module outputs, default: True.
+    Returns:
+        A ParallelStyle object that prepares the sharding layouts of the nn.Module's outputs.
+
+    Example::
+        >>> # xdoctest: +SKIP(failing)
+        >>> from torch.distributed.tensor.parallel import parallelize_module, PrepareModuleOutput
+        >>> from torch.distributed.device_mesh import init_device_mesh
+        >>> ...
+        >>> block = TransformerBlock(...)  # block is a nn.Module that contains an "attn" Attention submodule
+        >>> tp_mesh = init_device_mesh("cuda", (8,))
+        >>>
+        >>> # According to the style specified below, the output of the TransformerBlock will be converted to Replicated DTensor
+        >>> # and then redistributed to Sharded DTensor.
+        >>> parallelize_module(
+        >>>     block, # this can be a submodule or module
+        >>>     tp_mesh,
+        >>>     parallelize_plan = PrepareModuleOutput(
+        >>>         output_layouts=Replicate(),
+        >>>         desired_output_layouts=Shard(0)
+        >>>     )
+        >>> )
+    """
+
+    def __init__(
+        self,
+        *,
+        output_layouts: Placement | tuple[Placement | None, ...],
+        desired_output_layouts: Placement | tuple[Placement, ...],
+        use_local_output: bool = True,
+    ):
+        self.output_layouts = (
+            (output_layouts,)
+            if isinstance(output_layouts, Placement)
+            else output_layouts
+        )
+        self.desired_output_layouts = (
+            (desired_output_layouts,)
+            if isinstance(desired_output_layouts, Placement)
+            else desired_output_layouts
+        )
+        self.use_local_output = use_local_output
+        assert len(self.output_layouts) == len(self.desired_output_layouts), (
+            "output_layouts and desired_output_layouts should have same length!"
+        )
+
+    def _prepare_out_fn(self, outputs, device_mesh):
+        prepared_outputs = []
+        if not isinstance(outputs, tuple):
+            outputs = (outputs,)
+        if len(outputs) != len(self.output_layouts):
+            raise ValueError(
+                "module outputs and output_layouts should have same length!"
+            )
+
+        for out, out_layout, desired_out_layout in zip(
+            outputs, self.output_layouts, self.desired_output_layouts
+        ):
+            if out_layout is not None:
+                if isinstance(out, DTensor):
+                    # TODO: re-enable the check once we fix the compile path
+                    # assert out.placements[0] == out_layout
+                    dt_out = out
+                else:
+                    dt_out = DTensor.from_local(
+                        out, device_mesh, (out_layout,), run_check=False
+                    )
+
+                if out_layout != desired_out_layout:
+                    dt_out = dt_out.redistribute(placements=(desired_out_layout,))
+                prepared_outputs.append(
+                    dt_out.to_local() if self.use_local_output else dt_out
+                )
+            else:
+                prepared_outputs.append(out)
+        if len(prepared_outputs) == 1:
+            return prepared_outputs[0]
+        else:
+            return tuple(prepared_outputs)
+
+    def _apply(self, module: nn.Module, device_mesh: DeviceMesh) -> nn.Module:
+        module.register_forward_hook(
+            lambda _, inputs, outputs: self._prepare_out_fn(outputs, device_mesh)
+        )  # type: ignore[misc, call-arg]
+        return module
+
+    def __repr__(self) -> str:
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += f"output_layouts={self.output_layouts}, "
+        tmpstr += f"desired_output_layouts={self.desired_output_layouts}, "
+        tmpstr += f"use_local_output={self.use_local_output}"
+        tmpstr += ")"
+        return tmpstr
+
+
+class PrepareModuleInputOutput(ParallelStyle):
+    """
+    Configure the nn.Module's inputs (and outputs) to convert the input tensors (and output tensors, respectively) of the nn.Module
+    to DTensors at runtime according to ``input_layouts`` (and output_layouts, respectively), and perform layout redistribution
+    according to the ``desired_input_layouts`` (and ``desired_output_layouts``, respectively). This is a combination of
+    :class:`PrepareModuleInput` and :class:`PrepareModuleOutput`.
+
+    Keyword Args:
+        input_layouts (Union[Placement, Tuple[Optional[Placement]]]):
+            The DTensor layouts of input tensors for the nn.Module, this is used to convert the input tensors to
+            DTensors. If some inputs are not torch.Tensor or no need to convert to DTensors, ``None`` need to be specified
+            as a placeholder. default: None.
+        desired_input_layouts (Union[Placement, Tuple[Optional[Placement]]]):
+            The desired DTensor layout of input tensors for the nn.Module, this is used to ensure the inputs of the nn.Module
+            have the desired DTensor layouts. This argument needs to have the same length with ``input_layouts``. default: None.
+        input_kwarg_layouts (Dict[str, Placement]):
+            The DTensor layouts of input kwargs for the nn.Module, this is used to convert the input kwarg tensors to DTensors.
+            default: None
+        desired_input_kwarg_layouts: (Dict[str, Placement]):
+            The desired DTensor layout of input kwargs for the nn.Module, this is used to ensure the inputs of the nn.Module
+            have the desired DTensor layouts. default: None.
+        use_local_input (bool, optional):
+            Whether to use local :class:`torch.Tensor` instead of :class:`DTensor` for the module inputs, default: False.
+        output_layouts (Union[Placement, Tuple[Placement]]):
+            The DTensor layouts of output tensors for the nn.Module, this is used to convert the output tensors to
+            DTensors if they are :class:`torch.Tensor`. If some outputs are not torch.Tensor or no need to convert to DTensors,
+            ``None`` need to be specified as a placeholder.
+        desired_output_layouts (Union[Placement, Tuple[Placement]]):
+            The desired DTensor layouts of output tensors for the nn.Module, this is used to ensure the outputs of the nn.Module
+            have the desired DTensor layouts.
+        use_local_output (bool, optional):
+            Whether to use local :class:`torch.Tensor` instead of :class:`DTensor` for the module outputs, default: True.
+    Returns:
+        A :class:`ParallelStyle` object that prepares the sharding layouts of the nn.Module's inputs and outputs.
+
+    Example::
+        >>> # xdoctest: +SKIP(failing)
+        >>> from torch.distributed.tensor.parallel import parallelize_module, PrepareModuleInputOutput
+        >>> from torch.distributed.device_mesh import init_device_mesh
+        >>> ...
+        >>> block = TransformerBlock(...)  # block is a nn.Module that contains an "attn" Attention submodule
+        >>> tp_mesh = init_device_mesh("cuda", (8,))
+        >>>
+        >>> # According to the style specified below, the first input of attn will be annotated as Sharded DTensor
+        >>> # and then redistributed to Replicated DTensor, and the output of the TransformerBlock will be annotated
+        >>> # as Replicated DTensor and then redistributed to Sharded DTensor.
+        >>> parallelize_module(
+        >>>     block, # this can be a submodule or module
+        >>>     tp_mesh,
+        >>>     parallelize_plan={
+        >>>         "attn": PrepareModuleInputOutput(
+        >>>             input_layouts=(Shard(0), None, None, ...),
+        >>>             desired_input_layouts=(Replicate(), None, None, ...),
+        >>>             output_layouts=Replicate(),
+        >>>             desired_output_layouts=Shard(0),
+        >>>         ),
+        >>>     }
+        >>> )
+    """
+
+    def __init__(
+        self,
+        *,
+        input_layouts: Placement | tuple[Placement | None, ...] | None = None,
+        desired_input_layouts: Placement | tuple[Placement | None, ...] | None = None,
+        input_kwarg_layouts: dict[str, Placement] | None = None,
+        desired_input_kwarg_layouts: dict[str, Placement] | None = None,
+        use_local_input: bool = False,
+        output_layouts: Placement | tuple[Placement | None, ...],
+        desired_output_layouts: Placement | tuple[Placement, ...],
+        use_local_output: bool = True,
+    ):
+        self.prepare_module_input = PrepareModuleInput(
+            input_layouts=input_layouts,
+            desired_input_layouts=desired_input_layouts,
+            input_kwarg_layouts=input_kwarg_layouts,
+            desired_input_kwarg_layouts=desired_input_kwarg_layouts,
+            use_local_output=use_local_input,
+        )
+        self.prepare_module_output = PrepareModuleOutput(
+            output_layouts=output_layouts,
+            desired_output_layouts=desired_output_layouts,
+            use_local_output=use_local_output,
+        )
+
+    def _apply(self, module: nn.Module, device_mesh: DeviceMesh) -> nn.Module:
+        self.prepare_module_input._apply(module, device_mesh)
+        self.prepare_module_output._apply(module, device_mesh)
+
+        return module
+
+    def __repr__(self) -> str:
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += f"input_layouts={self.prepare_module_input.input_layouts}, "
+        tmpstr += (
+            f"desired_input_layouts={self.prepare_module_input.desired_input_layouts}, "
+        )
+        tmpstr += (
+            f"input_kwarg_layouts={self.prepare_module_input.input_kwarg_layouts}, "
+        )
+        tmpstr += f"desired_input_kwarg_layouts={self.prepare_module_input.desired_input_kwarg_layouts}, "
+        tmpstr += f"use_local_input={self.prepare_module_input.use_local_output}, "
+        tmpstr += f"output_layouts={self.prepare_module_output.output_layouts}, "
+        tmpstr += f"desired_output_layouts={self.prepare_module_output.desired_output_layouts}, "
+        tmpstr += f"use_local_output={self.prepare_module_output.use_local_output}"
+        tmpstr += ")"
+        return tmpstr
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/torchaudio-2.10.0+cu128.dist-info/licenses/LICENSE b/URSA/.venv_ursa/lib/python3.12/site-packages/torchaudio-2.10.0+cu128.dist-info/licenses/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..1bec23eaf1dd562ae3d3216420b1b1bbfbd39cbc
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/torchaudio-2.10.0+cu128.dist-info/licenses/LICENSE
@@ -0,0 +1,25 @@
+BSD 2-Clause License
+
+Copyright (c) 2017 Facebook Inc. (Soumith Chintala), 
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.